diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index 532255d14884..587685d02971 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -4,6 +4,59 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.3.1 (November 7, 2016) + + Bug fixes: + - Fix a severe virtual memory leak. This regression was first released in + 4.3.0. (@interwq, @jasone) + - Refactor atomic and prng APIs to restore support for 32-bit platforms that + use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) + +* 4.3.0 (November 4, 2016) + + This is the first release that passes the test suite for multiple Windows + configurations, thanks in large part to @glandium setting up continuous + integration via AppVeyor (and Travis CI for Linux and OS X). + + New features: + - Add "J" (JSON) support to malloc_stats_print(). (@jasone) + - Add Cray compiler support. (@ronawho) + + Optimizations: + - Add/use adaptive spinning for bootstrapping and radix tree node + initialization. (@jasone) + + Bug fixes: + - Fix large allocation to search starting in the optimal size class heap, + which can substantially reduce virtual memory churn and fragmentation. This + regression was first released in 4.0.0. (@mjp41, @jasone) + - Fix stats.arenas..nthreads accounting. (@interwq) + - Fix and simplify decay-based purging. (@jasone) + - Make DSS (sbrk(2)-related) operations lockless, which resolves potential + deadlocks during thread exit. (@jasone) + - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, + @jasone) + - Fix over-sized allocation of arena_t (plus associated stats) data + structures. (@jasone, @interwq) + - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) + - Fix a Valgrind integration bug. (@ronawho) + - Disallow 0x5a junk filling when running in Valgrind. (@jasone) + - Fix a file descriptor leak on Linux. This regression was first released in + 4.2.0. (@vsarunas, @jasone) + - Fix static linking of jemalloc with glibc. (@djwatson) + - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This + works around other libraries' system call wrappers performing reentrant + allocation. (@kspinka, @Whissi, @jasone) + - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, + @jasone) + - Fix cached memory management to avoid needless commit/decommit operations + during purging, which resolves permanent virtual memory map fragmentation + issues on Windows. (@mjp41, @jasone) + - Fix TSD fetches to avoid (recursive) allocation. This is relevant to + non-TLS and Windows configurations. (@jasone) + - Fix malloc_conf overriding to work on Windows. (@jasone) + - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) + * 4.2.1 (June 8, 2016) Bug fixes: diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist index dff8d26408ac..5d1faad1a956 100644 --- a/contrib/jemalloc/FREEBSD-Xlist +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -1,6 +1,8 @@ $FreeBSD$ +.appveyor.yml .autom4te.cfg .git* +.travis.yml FREEBSD-* INSTALL Makefile* diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index df0a538368c9..4b2a527b279d 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -1,11 +1,11 @@ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in -index c4a44e3..4626e9b 100644 +index 3d2e721..b361db2 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -53,11 +53,23 @@ This manual describes jemalloc @jemalloc_version@. More information can be found at the jemalloc website. + url="http://jemalloc.net/">jemalloc website. + + The following configuration options are enabled in libc's built-in + jemalloc: , @@ -27,30 +27,30 @@ index c4a44e3..4626e9b 100644 Standard API -@@ -2961,4 +2973,18 @@ malloc_conf = "lg_chunk:24";]]> - The posix_memalign function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”). +@@ -2963,4 +2975,18 @@ malloc_conf = "lg_chunk:24";]]> + The posix_memalign() function conforms + to IEEE Std 1003.1-2001 (POSIX.1). + + HISTORY -+ The malloc_usable_size and -+ posix_memalign functions first appeared in -+ FreeBSD 7.0. ++ The malloc_usable_size() and ++ posix_memalign() functions first appeared in FreeBSD ++ 7.0. + -+ The aligned_alloc, -+ malloc_stats_print, and -+ mallctl* functions first appeared in -+ FreeBSD 10.0. ++ The aligned_alloc(), ++ malloc_stats_print(), and ++ mallctl*() functions first appeared in FreeBSD ++ 10.0. + -+ The *allocx functions first appeared -+ in FreeBSD 11.0. ++ The *allocx() functions first appeared in FreeBSD ++ 11.0. + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h -index b1de2b6..da6b6d2 100644 +index f39ce54..a3ba55d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h -@@ -718,8 +718,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) +@@ -719,8 +719,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) { @@ -64,7 +64,7 @@ index b1de2b6..da6b6d2 100644 } JEMALLOC_ALWAYS_INLINE size_t -@@ -778,8 +783,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) +@@ -779,8 +784,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) JEMALLOC_ALWAYS_INLINE const size_t * arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) { @@ -79,7 +79,7 @@ index b1de2b6..da6b6d2 100644 JEMALLOC_ALWAYS_INLINE size_t diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in -index 8f82edd..78e2df2 100644 +index fdc8fef..56a35a4 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -8,6 +8,9 @@ @@ -108,7 +108,7 @@ index 8f82edd..78e2df2 100644 static const bool config_prof = #ifdef JEMALLOC_PROF diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h -index 2b8ca5d..42d97f2 100644 +index c907d91..4626632 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -1,6 +1,9 @@ @@ -122,10 +122,10 @@ index 2b8ca5d..42d97f2 100644 #ifdef _WIN32 # include diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h -index 5221799..60ab041 100644 +index b442d2d..76518db 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h -@@ -52,9 +52,6 @@ struct malloc_mutex_s { +@@ -57,9 +57,6 @@ struct malloc_mutex_s { #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; @@ -135,7 +135,7 @@ index 5221799..60ab041 100644 #endif bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, -@@ -62,6 +59,7 @@ bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, +@@ -67,6 +64,7 @@ bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); @@ -144,10 +144,10 @@ index 5221799..60ab041 100644 #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt -index f2b6a55..69369c9 100644 +index 87c8c9b..df576f6 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt -@@ -311,7 +311,6 @@ iralloct_realign +@@ -307,7 +307,6 @@ iralloct_realign isalloc isdalloct isqalloc @@ -335,7 +335,7 @@ index f943891..47d032c 100755 +#include "jemalloc_FreeBSD.h" EOF diff --git a/src/jemalloc.c b/src/jemalloc.c -index 5d1f493..46dd1d1 100644 +index 38650ff..f659b55 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,6 +4,10 @@ @@ -347,9 +347,9 @@ index 5d1f493..46dd1d1 100644 +__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); + /* Runtime configuration options. */ - const char *je_malloc_conf JEMALLOC_ATTR(weak); - bool opt_abort = -@@ -2673,6 +2677,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) + const char *je_malloc_conf + #ifndef _WIN32 +@@ -2756,6 +2760,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) */ /******************************************************************************/ /* @@ -457,7 +457,7 @@ index 5d1f493..46dd1d1 100644 * The following functions are used by threading libraries for protection of * malloc during fork(). */ -@@ -2814,4 +2919,11 @@ jemalloc_postfork_child(void) +@@ -2894,4 +2999,11 @@ jemalloc_postfork_child(void) ctl_postfork_child(tsd_tsdn(tsd)); } @@ -470,7 +470,7 @@ index 5d1f493..46dd1d1 100644 + /******************************************************************************/ diff --git a/src/mutex.c b/src/mutex.c -index a1fac34..a24e420 100644 +index 6333e73..13f8d79 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -66,6 +66,17 @@ pthread_create(pthread_t *__restrict thread, @@ -491,7 +491,7 @@ index a1fac34..a24e420 100644 #endif bool -@@ -140,7 +151,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) +@@ -142,7 +153,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) } bool @@ -500,7 +500,7 @@ index a1fac34..a24e420 100644 { #ifdef JEMALLOC_MUTEX_INIT_CB -@@ -154,3 +165,14 @@ malloc_mutex_boot(void) +@@ -156,3 +167,14 @@ malloc_mutex_boot(void) #endif return (false); } @@ -516,7 +516,7 @@ index a1fac34..a24e420 100644 +#endif +} diff --git a/src/util.c b/src/util.c -index a1c4a2a..04f9153 100644 +index 7905267..bee1c77 100644 --- a/src/util.c +++ b/src/util.c @@ -67,6 +67,22 @@ wrtmessage(void *cbopaque, const char *s) diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION index b17fa1dc6714..c108ecb84aad 100644 --- a/contrib/jemalloc/VERSION +++ b/contrib/jemalloc/VERSION @@ -1 +1 @@ -4.2.1-0-g3de035335255d553bdb344c32ffdb603816195d8 +4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2 diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 index 26fb02a05105..5787e6ea446b 100644 --- a/contrib/jemalloc/doc/jemalloc.3 +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -2,12 +2,12 @@ .\" Title: JEMALLOC .\" Author: Jason Evans .\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 06/08/2016 +.\" Date: 11/08/2016 .\" Manual: User Manual -.\" Source: jemalloc 4.2.1-0-g3de035335255d553bdb344c32ffdb603816195d8 +.\" Source: jemalloc 4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2 .\" Language: English .\" -.TH "JEMALLOC" "3" "06/08/2016" "jemalloc 4.2.1-0-g3de035335255" "User Manual" +.TH "JEMALLOC" "3" "11/08/2016" "jemalloc 4.3.1-0-g0110fa8451af" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 4\&.2\&.1\-0\-g3de035335255d553bdb344c32ffdb603816195d8\&. More information can be found at the +This manual describes jemalloc 4\&.3\&.1\-0\-g0110fa8451af905affd77c3bea0d545fee2251b2\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: @@ -101,26 +101,26 @@ const char *\fImalloc_conf\fR; .SS "Standard API" .PP The -\fBmalloc\fR\fB\fR +malloc() function allocates \fIsize\fR bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&. .PP The -\fBcalloc\fR\fB\fR +calloc() function allocates space for \fInumber\fR objects, each \fIsize\fR bytes in length\&. The result is identical to calling -\fBmalloc\fR\fB\fR +malloc() with an argument of \fInumber\fR * \fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -131,7 +131,7 @@ must be a power of 2 at least as large as sizeof(\fBvoid *\fR)\&. .PP The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function allocates \fIsize\fR bytes of memory such that the allocation\*(Aqs base address is a multiple of @@ -143,7 +143,7 @@ is not an integral multiple of \fIalignment\fR\&. .PP The -\fBrealloc\fR\fB\fR +realloc() function changes the size of the previously allocated memory referenced by \fIptr\fR to @@ -151,19 +151,19 @@ to bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by \fIptr\fR is freed and a pointer to the newly allocated memory is returned\&. Note that -\fBrealloc\fR\fB\fR +realloc() may move the memory allocation, resulting in a different return value than \fIptr\fR\&. If \fIptr\fR is \fBNULL\fR, the -\fBrealloc\fR\fB\fR +realloc() function behaves identically to -\fBmalloc\fR\fB\fR +malloc() for the specified size\&. .PP The -\fBfree\fR\fB\fR +free() function causes the allocated memory referenced by \fIptr\fR to be made available for future allocations\&. If @@ -173,13 +173,13 @@ is .SS "Non\-standard API" .PP The -\fBmallocx\fR\fB\fR, -\fBrallocx\fR\fB\fR, -\fBxallocx\fR\fB\fR, -\fBsallocx\fR\fB\fR, -\fBdallocx\fR\fB\fR, -\fBsdallocx\fR\fB\fR, and -\fBnallocx\fR\fB\fR +mallocx(), +rallocx(), +xallocx(), +sallocx(), +dallocx(), +sdallocx(), and +nallocx() functions all have a \fIflags\fR argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: @@ -211,7 +211,7 @@ Initialize newly allocated memory to contain zero bytes\&. In the growing reallo .RS 4 Use the thread\-specific cache (tcache) specified by the identifier \fItc\fR, which must have been acquired via the -"tcache\&.create" +tcache\&.create mallctl\&. This macro does not validate that \fItc\fR specifies a valid identifier\&. @@ -238,7 +238,7 @@ specifies an arena index in the valid range\&. .RE .PP The -\fBmallocx\fR\fB\fR +mallocx() function allocates at least \fIsize\fR bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if @@ -247,7 +247,7 @@ is \fB0\fR\&. .PP The -\fBrallocx\fR\fB\fR +rallocx() function resizes the allocation at \fIptr\fR to be at least @@ -258,7 +258,7 @@ is \fB0\fR\&. .PP The -\fBxallocx\fR\fB\fR +xallocx() function resizes the allocation at \fIptr\fR in place to be at least @@ -274,33 +274,33 @@ is (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. .PP The -\fBsallocx\fR\fB\fR +sallocx() function returns the real size of the allocation at \fIptr\fR\&. .PP The -\fBdallocx\fR\fB\fR +dallocx() function causes the memory referenced by \fIptr\fR to be made available for future allocations\&. .PP The -\fBsdallocx\fR\fB\fR +sdallocx() function is an extension of -\fBdallocx\fR\fB\fR +dallocx() with a \fIsize\fR parameter to allow the caller to pass in the allocation size as an optimization\&. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by -\fBnallocx\fR\fB\fR +nallocx() or -\fBsallocx\fR\fB\fR\&. +sallocx()\&. .PP The -\fBnallocx\fR\fB\fR +nallocx() function allocates no memory, but it performs the same size computation as the -\fBmallocx\fR\fB\fR +mallocx() function, and returns the real size of the allocation that would result from the equivalent -\fBmallocx\fR\fB\fR +mallocx() function call, or \fB0\fR if the inputs exceed the maximum supported size class and/or alignment\&. Behavior is undefined if @@ -309,7 +309,7 @@ is \fB0\fR\&. .PP The -\fBmallctl\fR\fB\fR +mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated \fIname\fR argument specifies a location in a tree\-structured namespace; see the @@ -328,10 +328,12 @@ and \fB0\fR\&. .PP The -\fBmallctlnametomib\fR\fB\fR -function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to -\fBmallctlbymib\fR\fB\fR\&. Upon successful return from -\fBmallctlnametomib\fR\fB\fR, +mallctlnametomib() +function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a +\(lqManagement Information Base\(rq +(MIB) that can be passed repeatedly to +mallctlbymib()\&. Upon successful return from +mallctlnametomib(), \fImibp\fR contains an array of \fI*miblenp\fR @@ -343,7 +345,7 @@ and the input value of \fI*miblenp\fR\&. Thus it is possible to pass a \fI*miblenp\fR that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in -"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: +arenas\&.bin\&.2\&.size), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: .sp .if n \{\ .RS 4 @@ -372,35 +374,50 @@ for (i = 0; i < nbins; i++) { .\} .PP The -\fBmalloc_stats_print\fR\fB\fR -function writes human\-readable summary statistics via the +malloc_stats_print() +function writes summary statistics via the \fIwrite_cb\fR callback function pointer and \fIcbopaque\fR data passed to \fIwrite_cb\fR, or -\fBmalloc_message\fR\fB\fR +malloc_message() if \fIwrite_cb\fR is -\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the +\fBNULL\fR\&. The statistics are presented in human\-readable form unless +\(lqJ\(rq +is specified as a character within the +\fIopts\fR +string, in which case the statistics are presented in +\m[blue]\fBJSON format\fR\m[]\&\s-2\u[2]\d\s+2\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying +\(lqg\(rq +as a character within the \fIopts\fR string\&. Note that -\fBmalloc_message\fR\fB\fR +malloc_message() uses the -\fBmallctl*\fR\fB\fR +mallctl*() functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If \fB\-\-enable\-stats\fR -is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq, \(lql\(rq, and \(lqh\(rq can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +is specified during configuration, +\(lqm\(rq +and +\(lqa\(rq +can be specified to omit merged arena and per arena statistics, respectively; +\(lqb\(rq, +\(lql\(rq, and +\(lqh\(rq +can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. .PP The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function returns the usable size of the allocation pointed to by \fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function is not a mechanism for in\-place -\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by -\fBmalloc_usable_size\fR\fB\fR +realloc(); rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by +malloc_usable_size() should not be depended on, since such behavior is entirely implementation\-dependent\&. .SH "TUNING" .PP @@ -408,12 +425,14 @@ Once, when the first call is made to one of the memory allocation routines, the .PP The string specified via \fB\-\-with\-malloc\-conf\fR, the string pointed to by the global variable -\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named +\fImalloc_conf\fR, the +\(lqname\(rq +of the file referenced by the symbolic link named /etc/malloc\&.conf, and the value of the environment variable \fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. Note that \fImalloc_conf\fR may be read before -\fBmain\fR\fB\fR +main() is entered, so the declaration of \fImalloc_conf\fR should specify an initializer that contains the final value to be read by jemalloc\&. @@ -427,15 +446,15 @@ and can be safely set any time prior to program invocation\&. .PP An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each -"opt\&.*" +opt\&.* mallctl (see the MALLCTL NAMESPACE section for options documentation)\&. For example, abort:true,narenas:1 sets the -"opt\&.abort" +opt\&.abort and -"opt\&.narenas" +opt\&.narenas options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&. .SH "IMPLEMENTATION NOTES" .PP @@ -460,22 +479,22 @@ Memory is conceptually broken into equal\-sized chunks, where the chunk size is .PP Small objects are managed in groups by page runs\&. Each run maintains a bitmap to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least sizeof(\fBdouble\fR)\&. All other object size classes are multiples of the quantum, spaced such that there are four size classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes\&. Small size classes are smaller than four times the page size, large size classes are smaller than the chunk size (see the -"opt\&.lg_chunk" +opt\&.lg_chunk option), and huge size classes extend from the chunk size up to the largest size class that does not exceed \fBPTRDIFF_MAX\fR\&. .PP Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&. .PP The -\fBrealloc\fR\fB\fR, -\fBrallocx\fR\fB\fR, and -\fBxallocx\fR\fB\fR +realloc(), +rallocx(), and +xallocx() functions may resize allocations without moving them under limited circumstances\&. Unlike the -\fB*allocx\fR\fB\fR +*allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call -\fBrealloc\fR\fB\fR +realloc() to grow e\&.g\&. a 9\-byte allocation to 16 bytes, or shrink a 16\-byte allocation to 9 bytes\&. Growth and shrinkage trivially succeeds in place as long as the pre\-size and post\-size both round up to the same size class\&. No other API guarantees are made regarding in\-place resizing, but the current implementation also tries to resize large and huge allocations in place, as long as the pre\-size and post\-size are both large or both huge\&. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support splitting (see -"arena\&.\&.chunk_hooks")\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. +arena\&.\&.chunk_hooks)\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&. .PP Assuming 2 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in Table 1\&. @@ -663,7 +682,7 @@ T} .SH "MALLCTL NAMESPACE" .PP The following names are defined in the namespace accessible via the -\fBmallctl*\fR\fB\fR +mallctl*() functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r\-, @@ -673,118 +692,118 @@ r\-, or indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of -"stats\&.arenas\&.\&.*", +stats\&.arenas\&.\&.*, equal to -"arenas\&.narenas" +arenas\&.narenas can be used to access the summation of statistics from all arenas\&. Take special note of the -"epoch" +epoch mallctl, which controls refreshing of cached dynamic statistics\&. .PP -"version" (\fBconst char *\fR) r\- +version (\fBconst char *\fR) r\- .RS 4 Return the jemalloc version string\&. .RE .PP -"epoch" (\fBuint64_t\fR) rw +epoch (\fBuint64_t\fR) rw .RS 4 If a value is passed in, refresh the data from which the -\fBmallctl*\fR\fB\fR +mallctl*() functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&. .RE .PP -"config\&.cache_oblivious" (\fBbool\fR) r\- +config\&.cache_oblivious (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-cache\-oblivious\fR was specified during build configuration\&. .RE .PP -"config\&.debug" (\fBbool\fR) r\- +config\&.debug (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-debug\fR was specified during build configuration\&. .RE .PP -"config\&.fill" (\fBbool\fR) r\- +config\&.fill (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-fill\fR was specified during build configuration\&. .RE .PP -"config\&.lazy_lock" (\fBbool\fR) r\- +config\&.lazy_lock (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-lazy\-lock\fR was specified during build configuration\&. .RE .PP -"config\&.malloc_conf" (\fBconst char *\fR) r\- +config\&.malloc_conf (\fBconst char *\fR) r\- .RS 4 Embedded configure\-time\-specified run\-time options string, empty unless \fB\-\-with\-malloc\-conf\fR was specified during build configuration\&. .RE .PP -"config\&.munmap" (\fBbool\fR) r\- +config\&.munmap (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-munmap\fR was specified during build configuration\&. .RE .PP -"config\&.prof" (\fBbool\fR) r\- +config\&.prof (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\fR was specified during build configuration\&. .RE .PP -"config\&.prof_libgcc" (\fBbool\fR) r\- +config\&.prof_libgcc (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-prof\-libgcc\fR was not specified during build configuration\&. .RE .PP -"config\&.prof_libunwind" (\fBbool\fR) r\- +config\&.prof_libunwind (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-prof\-libunwind\fR was specified during build configuration\&. .RE .PP -"config\&.stats" (\fBbool\fR) r\- +config\&.stats (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-stats\fR was specified during build configuration\&. .RE .PP -"config\&.tcache" (\fBbool\fR) r\- +config\&.tcache (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tcache\fR was not specified during build configuration\&. .RE .PP -"config\&.tls" (\fBbool\fR) r\- +config\&.tls (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tls\fR was not specified during build configuration\&. .RE .PP -"config\&.utrace" (\fBbool\fR) r\- +config\&.utrace (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-utrace\fR was specified during build configuration\&. .RE .PP -"config\&.valgrind" (\fBbool\fR) r\- +config\&.valgrind (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-valgrind\fR was specified during build configuration\&. .RE .PP -"config\&.xmalloc" (\fBbool\fR) r\- +config\&.xmalloc (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-xmalloc\fR was specified during build configuration\&. .RE .PP -"opt\&.abort" (\fBbool\fR) r\- +opt\&.abort (\fBbool\fR) r\- .RS 4 Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call \fBabort\fR(3) @@ -793,115 +812,132 @@ in these cases\&. This option is disabled by default unless is specified during configuration, in which case it is enabled by default\&. .RE .PP -"opt\&.dss" (\fBconst char *\fR) r\- +opt\&.dss (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. The following settings are supported if \fBsbrk\fR(2) -is supported by the operating system: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq; otherwise only \(lqdisabled\(rq is supported\&. The default is \(lqsecondary\(rq if +is supported by the operating system: +\(lqdisabled\(rq, +\(lqprimary\(rq, and +\(lqsecondary\(rq; otherwise only +\(lqdisabled\(rq +is supported\&. The default is +\(lqsecondary\(rq +if \fBsbrk\fR(2) -is supported by the operating system; \(lqdisabled\(rq otherwise\&. +is supported by the operating system; +\(lqdisabled\(rq +otherwise\&. .RE .PP -"opt\&.lg_chunk" (\fBsize_t\fR) r\- +opt\&.lg_chunk (\fBsize_t\fR) r\- .RS 4 Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 2 MiB (2^21)\&. .RE .PP -"opt\&.narenas" (\fBunsigned\fR) r\- +opt\&.narenas (\fBunsigned\fR) r\- .RS 4 Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&. .RE .PP -"opt\&.purge" (\fBconst char *\fR) r\- +opt\&.purge (\fBconst char *\fR) r\- .RS 4 Purge mode is \(lqratio\(rq (default) or \(lqdecay\(rq\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for details of the ratio mode\&. See -"opt\&.decay_time" +opt\&.decay_time for details of the decay mode\&. .RE .PP -"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\- +opt\&.lg_dirty_mult (\fBssize_t\fR) r\- .RS 4 Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via \fBmadvise\fR(2) or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&. See -"arenas\&.lg_dirty_mult" +arenas\&.lg_dirty_mult and -"arena\&.\&.lg_dirty_mult" +arena\&.\&.lg_dirty_mult for related dynamic control options\&. .RE .PP -"opt\&.decay_time" (\fBssize_t\fR) r\- +opt\&.decay_time (\fBssize_t\fR) r\- .RS 4 Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See -"arenas\&.decay_time" +arenas\&.decay_time and -"arena\&.\&.decay_time" +arena\&.\&.decay_time for related dynamic control options\&. .RE .PP -"opt\&.stats_print" (\fBbool\fR) r\- +opt\&.stats_print (\fBbool\fR) r\- .RS 4 Enable/disable statistics printing at exit\&. If enabled, the -\fBmalloc_stats_print\fR\fB\fR +malloc_stats_print() function is called at program exit via an \fBatexit\fR(3) function\&. If \fB\-\-enable\-stats\fR is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Furthermore, -\fBatexit\fR\fB\fR +atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -\fBatexit\fR\fB\fR, so this option is not universally usable (though the application can register its own -\fBatexit\fR\fB\fR +atexit(), so this option is not universally usable (though the application can register its own +atexit() function with equivalent functionality)\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&. .RE .PP -"opt\&.junk" (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.junk (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 -Junk filling\&. If set to "alloc", each byte of uninitialized allocated memory will be initialized to -0xa5\&. If set to "free", all deallocated memory will be initialized to -0x5a\&. If set to "true", both allocated and deallocated memory will be initialized, and if set to "false", junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is "false" by default unless +Junk filling\&. If set to +\(lqalloc\(rq, each byte of uninitialized allocated memory will be initialized to +0xa5\&. If set to +\(lqfree\(rq, all deallocated memory will be initialized to +0x5a\&. If set to +\(lqtrue\(rq, both allocated and deallocated memory will be initialized, and if set to +\(lqfalse\(rq, junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is +\(lqfalse\(rq +by default unless \fB\-\-enable\-debug\fR -is specified during configuration, in which case it is "true" by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&. +is specified during configuration, in which case it is +\(lqtrue\(rq +by default unless running inside +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2\&. .RE .PP -"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.quarantine (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the -"opt\&.junk" +opt\&.junk option is enabled\&. This feature is of particular use in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. .RE .PP -"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.redzone (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the -"opt\&.junk" +opt\&.junk option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. .RE .PP -"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +opt\&.zero (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so -\fBrealloc\fR\fB\fR +realloc() and -\fBrallocx\fR\fB\fR +rallocx() calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. .RE .PP -"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] +opt\&.utrace (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] .RS 4 Allocation tracing based on \fButrace\fR(2) enabled/disabled\&. This option is disabled by default\&. .RE .PP -"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] +opt\&.xmalloc (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] .RS 4 Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on \fBSTDERR_FILENO\fR @@ -921,94 +957,94 @@ malloc_conf = "xmalloc:true"; This option is disabled by default\&. .RE .PP -"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] +opt\&.tcache (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Thread\-specific caching (tcache) enabled/disabled\&. When there are multiple threads, each thread uses a tcache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the -"opt\&.lg_tcache_max" +opt\&.lg_tcache_max option for related tuning information\&. This option is enabled by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, in which case it is forcefully disabled\&. +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, in which case it is forcefully disabled\&. .RE .PP -"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +opt\&.lg_tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. .RE .PP -"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the -"opt\&.prof_active" +opt\&.prof_active option for on\-the\-fly activation/deactivation\&. See the -"opt\&.lg_prof_sample" +opt\&.lg_prof_sample option for probabilistic sampling control\&. See the -"opt\&.prof_accum" +opt\&.prof_accum option for control of cumulative sample reporting\&. See the -"opt\&.lg_prof_interval" +opt\&.lg_prof_interval option for information on interval\-triggered profile dumping, the -"opt\&.prof_gdump" +opt\&.prof_gdump option for information on high\-water\-triggered profile dumping, and the -"opt\&.prof_final" +opt\&.prof_final option for final profile dumping\&. Profile output is compatible with the \fBjeprof\fR command, which is based on the \fBpprof\fR that is developed as part of the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. See +\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2\&. See HEAP PROFILE FORMAT for heap profile format documentation\&. .RE .PP -"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_prefix (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is jeprof\&. .RE .PP -"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_active (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the -"opt\&.prof" +opt\&.prof option) but inactive, then toggle profiling at any time during program execution with the -"prof\&.active" +prof\&.active mallctl\&. This option is enabled by default\&. .RE .PP -"opt\&.prof_thread_active_init" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_thread_active_init (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Initial setting for -"thread\&.prof\&.active" +thread\&.prof\&.active in newly created threads\&. The initial setting for newly created threads can also be changed during execution via the -"prof\&.thread_active_init" +prof\&.thread_active_init mallctl\&. This option is enabled by default\&. .RE .PP -"opt\&.lg_prof_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.lg_prof_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&. .RE .PP -"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_accum (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&. .RE .PP -"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.lg_prof_interval (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern \&.\&.\&.i\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&. .RE .PP -"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_gdump (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Set the initial state of -"prof\&.gdump", which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. +prof\&.gdump, which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&. .RE .PP -"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_final (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Use an \fBatexit\fR(3) @@ -1016,71 +1052,71 @@ function to dump final memory usage to a file named according to the pattern \&.\&.\&.f\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. Note that -\fBatexit\fR\fB\fR +atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls -\fBatexit\fR\fB\fR, so this option is not universally usable (though the application can register its own -\fBatexit\fR\fB\fR +atexit(), so this option is not universally usable (though the application can register its own +atexit() function with equivalent functionality)\&. This option is disabled by default\&. .RE .PP -"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +opt\&.prof_leak (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Leak reporting enabled/disabled\&. If enabled, use an \fBatexit\fR(3) function to report memory leaks detected by allocation sampling\&. See the -"opt\&.prof" +opt\&.prof option for information on analyzing heap profile output\&. This option is disabled by default\&. .RE .PP -"thread\&.arena" (\fBunsigned\fR) rw +thread\&.arena (\fBunsigned\fR) rw .RS 4 Get or set the arena associated with the calling thread\&. If the specified arena was not initialized beforehand (see the -"arenas\&.initialized" +arenas\&.initialized mallctl), it will be automatically initialized as a side effect of calling this interface\&. .RE .PP -"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.allocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.allocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -"thread\&.allocated" +thread\&.allocated mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR +mallctl*() calls\&. .RE .PP -"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.deallocated (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. .RE .PP -"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +thread\&.deallocatedp (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Get a pointer to the the value that is returned by the -"thread\&.deallocated" +thread\&.deallocated mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR +mallctl*() calls\&. .RE .PP -"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] +thread\&.tcache\&.enabled (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] .RS 4 Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see -"thread\&.tcache\&.flush")\&. +thread\&.tcache\&.flush)\&. .RE .PP -"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] +thread\&.tcache\&.flush (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] .RS 4 Flush calling thread\*(Aqs thread\-specific cache (tcache)\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs tcache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&. .RE .PP -"thread\&.prof\&.name" (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] +thread\&.prof\&.name (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR] .RS 4 Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must be nil\-terminated and comprised only of characters in the sets recognized by \fBisgraph\fR(3) @@ -1088,78 +1124,78 @@ and \fBisblank\fR(3)\&. .RE .PP -"thread\&.prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +thread\&.prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active for the calling thread\&. This is an activation mechanism in addition to -"prof\&.active"; both must be active for the calling thread to sample\&. This flag is enabled by default\&. +prof\&.active; both must be active for the calling thread to sample\&. This flag is enabled by default\&. .RE .PP -"tcache\&.create" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +tcache\&.create (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Create an explicit thread\-specific cache (tcache) and return an identifier that can be passed to the \fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR macro to explicitly use the specified cache rather than the automatically managed one that is used by default\&. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds\&. .RE .PP -"tcache\&.flush" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +tcache\&.flush (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache)\&. The same considerations apply to this interface as to -"thread\&.tcache\&.flush", except that the tcache will never be automatically discarded\&. +thread\&.tcache\&.flush, except that the tcache will never be automatically discarded\&. .RE .PP -"tcache\&.destroy" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] +tcache\&.destroy (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR] .RS 4 Flush the specified thread\-specific cache (tcache) and make the identifier available for use during a future tcache creation\&. .RE .PP -"arena\&.\&.purge" (\fBvoid\fR) \-\- +arena\&.\&.purge (\fBvoid\fR) \-\- .RS 4 Purge all unused dirty pages for arena , or for all arenas if equals -"arenas\&.narenas"\&. +arenas\&.narenas\&. .RE .PP -"arena\&.\&.decay" (\fBvoid\fR) \-\- +arena\&.\&.decay (\fBvoid\fR) \-\- .RS 4 Trigger decay\-based purging of unused dirty pages for arena , or for all arenas if equals -"arenas\&.narenas"\&. The proportion of unused dirty pages to be purged depends on the current time; see -"opt\&.decay_time" +arenas\&.narenas\&. The proportion of unused dirty pages to be purged depends on the current time; see +opt\&.decay_time for details\&. .RE .PP -"arena\&.\&.reset" (\fBvoid\fR) \-\- +arena\&.\&.reset (\fBvoid\fR) \-\- .RS 4 Discard all of the arena\*(Aqs extant allocations\&. This interface can only be used with arenas created via -"arenas\&.extend"\&. None of the arena\*(Aqs discarded/cached allocations may accessed afterward\&. As part of this requirement, all thread caches which were used to allocate/deallocate in conjunction with the arena must be flushed beforehand\&. This interface cannot be used if running inside Valgrind, nor if the +arenas\&.extend\&. None of the arena\*(Aqs discarded/cached allocations may accessed afterward\&. As part of this requirement, all thread caches which were used to allocate/deallocate in conjunction with the arena must be flushed beforehand\&. This interface cannot be used if running inside Valgrind, nor if the quarantine size is non\-zero\&. .RE .PP -"arena\&.\&.dss" (\fBconst char *\fR) rw +arena\&.\&.dss (\fBconst char *\fR) rw .RS 4 Set the precedence of dss allocation as related to mmap allocation for arena , or for all arenas if equals -"arenas\&.narenas"\&. See -"opt\&.dss" +arenas\&.narenas\&. See +opt\&.dss for supported settings\&. .RE .PP -"arena\&.\&.lg_dirty_mult" (\fBssize_t\fR) rw +arena\&.\&.lg_dirty_mult (\fBssize_t\fR) rw .RS 4 Current per\-arena minimum ratio (log base 2) of active to dirty pages for arena \&. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to impose the new ratio\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for additional information\&. .RE .PP -"arena\&.\&.decay_time" (\fBssize_t\fR) rw +arena\&.\&.decay_time (\fBssize_t\fR) rw .RS 4 Current per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. Each time this interface is set, all currently unused dirty pages are considered to have fully decayed, which causes immediate purging of all unused dirty pages unless the decay time is set to \-1 (i\&.e\&. purging disabled)\&. See -"opt\&.decay_time" +opt\&.decay_time for additional information\&. .RE .PP -"arena\&.\&.chunk_hooks" (\fBchunk_hooks_t\fR) rw +arena\&.\&.chunk_hooks (\fBchunk_hooks_t\fR) rw .RS 4 Get or set the chunk management hook functions for arena \&. The functions must be capable of operating on all extant chunks associated with arena , usually by passing unknown chunks to the replaced functions\&. In practice, it is feasible to control allocation for arenas created via -"arenas\&.extend" +arenas\&.extend such that all chunks originate from an application\-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior to the application having an opportunity to take over chunk allocation\&. .sp .if n \{\ @@ -1228,7 +1264,7 @@ is not on success or \fBNULL\fR on error\&. Committed memory may be committed in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults\&. Note that replacing the default chunk allocation function makes the arena\*(Aqs -"arena\&.\&.dss" +arena\&.\&.dss setting irrelevant\&. .HP \w'typedef\ bool\ (chunk_dalloc_t)('u .BI "typedef bool (chunk_dalloc_t)(void\ *" "chunk" ", size_t\ " "size" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");" @@ -1375,436 +1411,436 @@ into one contiguous chunk, operating on \fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on independently\&. .RE .PP -"arenas\&.narenas" (\fBunsigned\fR) r\- +arenas\&.narenas (\fBunsigned\fR) r\- .RS 4 Current limit on number of arenas\&. .RE .PP -"arenas\&.initialized" (\fBbool *\fR) r\- +arenas\&.initialized (\fBbool *\fR) r\- .RS 4 An array of -"arenas\&.narenas" +arenas\&.narenas booleans\&. Each boolean indicates whether the corresponding arena is initialized\&. .RE .PP -"arenas\&.lg_dirty_mult" (\fBssize_t\fR) rw +arenas\&.lg_dirty_mult (\fBssize_t\fR) rw .RS 4 Current default per\-arena minimum ratio (log base 2) of active to dirty pages, used to initialize -"arena\&.\&.lg_dirty_mult" +arena\&.\&.lg_dirty_mult during arena creation\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for additional information\&. .RE .PP -"arenas\&.decay_time" (\fBssize_t\fR) rw +arenas\&.decay_time (\fBssize_t\fR) rw .RS 4 Current default per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused, used to initialize -"arena\&.\&.decay_time" +arena\&.\&.decay_time during arena creation\&. See -"opt\&.decay_time" +opt\&.decay_time for additional information\&. .RE .PP -"arenas\&.quantum" (\fBsize_t\fR) r\- +arenas\&.quantum (\fBsize_t\fR) r\- .RS 4 Quantum size\&. .RE .PP -"arenas\&.page" (\fBsize_t\fR) r\- +arenas\&.page (\fBsize_t\fR) r\- .RS 4 Page size\&. .RE .PP -"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +arenas\&.tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum thread\-cached size class\&. .RE .PP -"arenas\&.nbins" (\fBunsigned\fR) r\- +arenas\&.nbins (\fBunsigned\fR) r\- .RS 4 Number of bin size classes\&. .RE .PP -"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +arenas\&.nhbins (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Total number of thread cache bin size classes\&. .RE .PP -"arenas\&.bin\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.bin\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by size class\&. .RE .PP -"arenas\&.bin\&.\&.nregs" (\fBuint32_t\fR) r\- +arenas\&.bin\&.\&.nregs (\fBuint32_t\fR) r\- .RS 4 Number of regions per page run\&. .RE .PP -"arenas\&.bin\&.\&.run_size" (\fBsize_t\fR) r\- +arenas\&.bin\&.\&.run_size (\fBsize_t\fR) r\- .RS 4 Number of bytes per page run\&. .RE .PP -"arenas\&.nlruns" (\fBunsigned\fR) r\- +arenas\&.nlruns (\fBunsigned\fR) r\- .RS 4 Total number of large size classes\&. .RE .PP -"arenas\&.lrun\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.lrun\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this large size class\&. .RE .PP -"arenas\&.nhchunks" (\fBunsigned\fR) r\- +arenas\&.nhchunks (\fBunsigned\fR) r\- .RS 4 Total number of huge size classes\&. .RE .PP -"arenas\&.hchunk\&.\&.size" (\fBsize_t\fR) r\- +arenas\&.hchunk\&.\&.size (\fBsize_t\fR) r\- .RS 4 Maximum size supported by this huge size class\&. .RE .PP -"arenas\&.extend" (\fBunsigned\fR) r\- +arenas\&.extend (\fBunsigned\fR) r\- .RS 4 Extend the array of arenas by appending a new arena, and returning the new arena index\&. .RE .PP -"prof\&.thread_active_init" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.thread_active_init (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control the initial setting for -"thread\&.prof\&.active" +thread\&.prof\&.active in newly created threads\&. See the -"opt\&.prof_thread_active_init" +opt\&.prof_thread_active_init option for additional information\&. .RE .PP -"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.active (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control whether sampling is currently active\&. See the -"opt\&.prof_active" +opt\&.prof_active option for additional information, as well as the interrelated -"thread\&.prof\&.active" +thread\&.prof\&.active mallctl\&. .RE .PP -"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] +prof\&.dump (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern \&.\&.\&.m\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. .RE .PP -"prof\&.gdump" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +prof\&.gdump (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 When enabled, trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern \&.\&.\&.u\&.heap, where is controlled by the -"opt\&.prof_prefix" +opt\&.prof_prefix option\&. .RE .PP -"prof\&.reset" (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] +prof\&.reset (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR] .RS 4 Reset all memory profile statistics, and optionally update the sample rate (see -"opt\&.lg_prof_sample" +opt\&.lg_prof_sample and -"prof\&.lg_sample")\&. +prof\&.lg_sample)\&. .RE .PP -"prof\&.lg_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] +prof\&.lg_sample (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Get the current sample rate (see -"opt\&.lg_prof_sample")\&. +opt\&.lg_prof_sample)\&. .RE .PP -"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] +prof\&.interval (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Average number of bytes allocated between interval\-based profile dumps\&. See the -"opt\&.lg_prof_interval" +opt\&.lg_prof_interval option for additional information\&. .RE .PP -"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.cactive (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up when computing its contribution to the counter\&. Note that the -"epoch" +epoch mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&. .RE .PP -"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes allocated by the application\&. .RE .PP -"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.active (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to -"stats\&.allocated"\&. This does not include -"stats\&.arenas\&.\&.pdirty", nor pages entirely devoted to allocator metadata\&. +stats\&.allocated\&. This does not include +stats\&.arenas\&.\&.pdirty, nor pages entirely devoted to allocator metadata\&. .RE .PP -"stats\&.metadata" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.metadata (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive internal allocator data structures, arena chunk headers (see -"stats\&.arenas\&.\&.metadata\&.mapped"), and internal allocations (see -"stats\&.arenas\&.\&.metadata\&.allocated")\&. +stats\&.arenas\&.\&.metadata\&.mapped), and internal allocations (see +stats\&.arenas\&.\&.metadata\&.allocated)\&. .RE .PP -"stats\&.resident" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size, and is larger than -"stats\&.active"\&. +stats\&.active\&. .RE .PP -"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in active chunks mapped by the allocator\&. This is a multiple of the chunk size, and is larger than -"stats\&.active"\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and -"stats\&.resident"\&. +stats\&.active\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and +stats\&.resident\&. .RE .PP -"stats\&.retained" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system via e\&.g\&. \fBmunmap\fR(2)\&. Retained virtual memory is typically untouched, decommitted, or purged, so it has no strongly associated physical memory (see chunk hooks for details)\&. Retained memory is excluded from mapped memory statistics, e\&.g\&. -"stats\&.mapped"\&. +stats\&.mapped\&. .RE .PP -"stats\&.arenas\&.\&.dss" (\fBconst char *\fR) r\- +stats\&.arenas\&.\&.dss (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) allocation\&. See -"opt\&.dss" +opt\&.dss for details\&. .RE .PP -"stats\&.arenas\&.\&.lg_dirty_mult" (\fBssize_t\fR) r\- +stats\&.arenas\&.\&.lg_dirty_mult (\fBssize_t\fR) r\- .RS 4 Minimum ratio (log base 2) of active to dirty pages\&. See -"opt\&.lg_dirty_mult" +opt\&.lg_dirty_mult for details\&. .RE .PP -"stats\&.arenas\&.\&.decay_time" (\fBssize_t\fR) r\- +stats\&.arenas\&.\&.decay_time (\fBssize_t\fR) r\- .RS 4 Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. See -"opt\&.decay_time" +opt\&.decay_time for details\&. .RE .PP -"stats\&.arenas\&.\&.nthreads" (\fBunsigned\fR) r\- +stats\&.arenas\&.\&.nthreads (\fBunsigned\fR) r\- .RS 4 Number of threads currently assigned to arena\&. .RE .PP -"stats\&.arenas\&.\&.pactive" (\fBsize_t\fR) r\- +stats\&.arenas\&.\&.pactive (\fBsize_t\fR) r\- .RS 4 Number of pages in active runs\&. .RE .PP -"stats\&.arenas\&.\&.pdirty" (\fBsize_t\fR) r\- +stats\&.arenas\&.\&.pdirty (\fBsize_t\fR) r\- .RS 4 Number of pages within unused runs that are potentially dirty, and for which -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR or similar has not been called\&. .RE .PP -"stats\&.arenas\&.\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes\&. .RE .PP -"stats\&.arenas\&.\&.retained" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.retained (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of retained bytes\&. See -"stats\&.retained" +stats\&.retained for details\&. .RE .PP -"stats\&.arenas\&.\&.metadata\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.metadata\&.mapped (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of mapped bytes in arena chunk headers, which track the states of the non\-metadata pages\&. .RE .PP -"stats\&.arenas\&.\&.metadata\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.metadata\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&. This statistic is reported separately from -"stats\&.metadata" +stats\&.metadata and -"stats\&.arenas\&.\&.metadata\&.mapped" +stats\&.arenas\&.\&.metadata\&.mapped because it overlaps with e\&.g\&. the -"stats\&.allocated" +stats\&.allocated and -"stats\&.active" +stats\&.active statistics, whereas the other metadata statistics do not\&. .RE .PP -"stats\&.arenas\&.\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.npurge (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of dirty page purge sweeps performed\&. .RE .PP -"stats\&.arenas\&.\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.nmadvise (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +madvise\fI\&.\&.\&.\fR \fI\fBMADV_DONTNEED\fR\fR or similar calls made to purge dirty pages\&. .RE .PP -"stats\&.arenas\&.\&.purged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.purged (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of pages purged\&. .RE .PP -"stats\&.arenas\&.\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by small objects\&. .RE .PP -"stats\&.arenas\&.\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests served by small bins\&. .RE .PP -"stats\&.arenas\&.\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small objects returned to bins\&. .RE .PP -"stats\&.arenas\&.\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.small\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of small allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by large objects\&. .RE .PP -"stats\&.arenas\&.\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large deallocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.large\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of large allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.allocated (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of bytes currently allocated by huge objects\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge deallocation requests served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.huge\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.huge\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of huge allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations served by bin\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocations returned to bin\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.curregs" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.curregs (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of regions for this size class\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +stats\&.arenas\&.\&.bins\&.\&.nfills (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache fills\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +stats\&.arenas\&.\&.bins\&.\&.nflushes (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] .RS 4 Cumulative number of tcache flushes\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of runs created\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.nreruns (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of times the current run from which to allocate changed\&. .RE .PP -"stats\&.arenas\&.\&.bins\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.bins\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -"stats\&.arenas\&.\&.lruns\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.lruns\&.\&.curruns (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of runs for this size class\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.nmalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.ndalloc (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of deallocation requests for this size class served directly by the arena\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.nrequests (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Cumulative number of allocation requests for this size class\&. .RE .PP -"stats\&.arenas\&.\&.hchunks\&.\&.curhchunks" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +stats\&.arenas\&.\&.hchunks\&.\&.curhchunks (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Current number of huge allocations for this size class\&. .RE @@ -1813,7 +1849,7 @@ Current number of huge allocations for this size class\&. Although the heap profiling functionality was originally designed to be compatible with the \fBpprof\fR command that is developed as part of the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2, the addition of per thread heap profiling functionality required a different heap profile format\&. The +\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[4]\d\s+2, the addition of per thread heap profiling functionality required a different heap profile format\&. The \fBjeprof\fR command is derived from \fBpprof\fR, with enhancements to support the heap profile format described here\&. @@ -1881,14 +1917,16 @@ and \fB\-\-enable\-fill\fR options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&. .PP -Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the -"opt\&.junk" +Programs often accidentally depend on +\(lquninitialized\(rq +memory actually being filled with zero bytes\&. Junk filling (see the +opt\&.junk option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the -"opt\&.zero" +opt\&.zero option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&. .PP This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2 tool if the \fB\-\-enable\-valgrind\fR configuration option is enabled\&. @@ -1896,7 +1934,7 @@ configuration option is enabled\&. .PP If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor \fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the -"opt\&.abort" +opt\&.abort option is set, most warnings are treated as errors\&. .PP The @@ -1904,22 +1942,23 @@ The variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the \fBSTDERR_FILENO\fR file descriptor is not suitable for this\&. -\fBmalloc_message\fR\fB\fR +malloc_message() takes the \fIcbopaque\fR pointer argument that is \fBNULL\fR unless overridden by the arguments in a call to -\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. +malloc_stats_print(), followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. .PP -All messages are prefixed by \(lq:\(rq\&. +All messages are prefixed by +\(lq: \(rq\&. .SH "RETURN VALUES" .SS "Standard API" .PP The -\fBmalloc\fR\fB\fR +malloc() and -\fBcalloc\fR\fB\fR +calloc() functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and @@ -1928,9 +1967,9 @@ is set to ENOMEM\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function returns the value 0 if successful; otherwise it returns an error value\&. The -\fBposix_memalign\fR\fB\fR +posix_memalign() function will fail if: .PP EINVAL @@ -1947,13 +1986,13 @@ Memory allocation error\&. .RE .PP The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function returns a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned and \fIerrno\fR is set\&. The -\fBaligned_alloc\fR\fB\fR +aligned_alloc() function will fail if: .PP EINVAL @@ -1969,7 +2008,7 @@ Memory allocation error\&. .RE .PP The -\fBrealloc\fR\fB\fR +realloc() function returns a pointer, possibly identical to \fIptr\fR, to the allocated memory if successful; otherwise a \fBNULL\fR @@ -1978,44 +2017,44 @@ pointer is returned, and is set to ENOMEM if the error was the result of an allocation failure\&. The -\fBrealloc\fR\fB\fR +realloc() function always leaves the original buffer intact when an error occurs\&. .PP The -\fBfree\fR\fB\fR +free() function returns no value\&. .SS "Non\-standard API" .PP The -\fBmallocx\fR\fB\fR +mallocx() and -\fBrallocx\fR\fB\fR +rallocx() functions return a pointer to the allocated memory if successful; otherwise a \fBNULL\fR pointer is returned to indicate insufficient contiguous memory was available to service the allocation request\&. .PP The -\fBxallocx\fR\fB\fR +xallocx() function returns the real size of the resulting resized allocation pointed to by \fIptr\fR, which is a value less than \fIsize\fR if the allocation could not be adequately grown in place\&. .PP The -\fBsallocx\fR\fB\fR +sallocx() function returns the real size of the allocation pointed to by \fIptr\fR\&. .PP The -\fBnallocx\fR\fB\fR +nallocx() returns the real size that would result from a successful equivalent -\fBmallocx\fR\fB\fR +mallocx() function call, or zero if insufficient memory is available to perform the size computation\&. .PP The -\fBmallctl\fR\fB\fR, -\fBmallctlnametomib\fR\fB\fR, and -\fBmallctlbymib\fR\fB\fR +mallctl(), +mallctlnametomib(), and +mallctlbymib() functions return 0 on success; otherwise they return an error value\&. The functions will fail if: .PP EINVAL @@ -2050,12 +2089,12 @@ A memory allocation failure occurred\&. EFAULT .RS 4 An interface with side effects failed in some way not directly related to -\fBmallctl*\fR\fB\fR +mallctl*() read/write processing\&. .RE .PP The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() function returns the usable size of the allocation pointed to by \fIptr\fR\&. .SH "ENVIRONMENT" @@ -2105,31 +2144,31 @@ malloc_conf = "lg_chunk:24"; .SH "STANDARDS" .PP The -\fBmalloc\fR\fB\fR, -\fBcalloc\fR\fB\fR, -\fBrealloc\fR\fB\fR, and -\fBfree\fR\fB\fR +malloc(), +calloc(), +realloc(), and +free() functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&. .PP The -\fBposix_memalign\fR\fB\fR +posix_memalign() function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. .SH "HISTORY" .PP The -\fBmalloc_usable_size\fR\fB\fR +malloc_usable_size() and -\fBposix_memalign\fR\fB\fR +posix_memalign() functions first appeared in FreeBSD 7\&.0\&. .PP The -\fBaligned_alloc\fR\fB\fR, -\fBmalloc_stats_print\fR\fB\fR, and -\fBmallctl*\fR\fB\fR +aligned_alloc(), +malloc_stats_print(), and +mallctl*() functions first appeared in FreeBSD 10\&.0\&. .PP The -\fB*allocx\fR\fB\fR +*allocx() functions first appeared in FreeBSD 11\&.0\&. .SH "AUTHOR" .PP @@ -2140,14 +2179,19 @@ functions first appeared in FreeBSD 11\&.0\&. .IP " 1." 4 jemalloc website .RS 4 -\%http://www.canonware.com/jemalloc/ +\%http://jemalloc.net/ .RE .IP " 2." 4 +JSON format +.RS 4 +\%http://www.json.org/ +.RE +.IP " 3." 4 Valgrind .RS 4 \%http://valgrind.org/ .RE -.IP " 3." 4 +.IP " 4." 4 gperftools package .RS 4 \%http://code.google.com/p/gperftools/ diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h index da6b6d2b6069..a3ba55d88552 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -42,6 +42,7 @@ typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; typedef struct arena_tdata_s arena_tdata_t; @@ -257,6 +258,49 @@ struct arena_bin_info_s { uint32_t reg0_offset; }; +struct arena_decay_s { + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t time; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay.ndirty and + * arena->ndirty to determine how many dirty pages, if any, were + * generated. + */ + size_t ndirty; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; +}; + struct arena_bin_s { /* * All operations on runcur, runs, and stats require that lock be @@ -326,7 +370,7 @@ struct arena_s { * PRNG state for cache index randomization of large allocation base * pointers. */ - uint64_t offset_state; + size_t offset_state; dss_prec_t dss_prec; @@ -394,52 +438,8 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; - /* - * Approximate time in seconds from the creation of a set of unused - * dirty pages until an equivalent set of unused dirty pages is purged - * and/or reused. - */ - ssize_t decay_time; - /* decay_time / SMOOTHSTEP_NSTEPS. */ - nstime_t decay_interval; - /* - * Time at which the current decay interval logically started. We do - * not actually advance to a new epoch until sometime after it starts - * because of scheduling and computation delays, and it is even possible - * to completely skip epochs. In all cases, during epoch advancement we - * merge all relevant activity into the most recently recorded epoch. - */ - nstime_t decay_epoch; - /* decay_deadline randomness generator. */ - uint64_t decay_jitter_state; - /* - * Deadline for current epoch. This is the sum of decay_interval and - * per epoch jitter which is a uniform random variable in - * [0..decay_interval). Epochs always advance by precise multiples of - * decay_interval, but we randomize the deadline to reduce the - * likelihood of arenas purging in lockstep. - */ - nstime_t decay_deadline; - /* - * Number of dirty pages at beginning of current epoch. During epoch - * advancement we use the delta between decay_ndirty and ndirty to - * determine how many dirty pages, if any, were generated, and record - * the result in decay_backlog. - */ - size_t decay_ndirty; - /* - * Memoized result of arena_decay_backlog_npages_limit() corresponding - * to the current contents of decay_backlog, i.e. the limit on how many - * pages are allowed to exist for the decay epochs. - */ - size_t decay_backlog_npages_limit; - /* - * Trailing log of how many unused dirty pages were generated during - * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last - * element is the most recent epoch. Corresponding epoch times are - * relative to decay_epoch. - */ - size_t decay_backlog[SMOOTHSTEP_NSTEPS]; + /* Decay-based purging state. */ + arena_decay_t decay; /* Extant huge allocations. */ ql_head(extent_node_t) huge; @@ -470,10 +470,12 @@ struct arena_s { arena_bin_t bins[NBINS]; /* - * Quantized address-ordered heaps of this arena's available runs. The - * heaps are used for first-best-fit run allocation. + * Size-segregated address-ordered heaps of this arena's available runs, + * used for first-best-fit run allocation. Runs are quantized, i.e. + * they reside in the last heap which corresponds to a size class less + * than or equal to the run size. */ - arena_run_heap_t runs_avail[1]; /* Dynamically sized. */ + arena_run_heap_t runs_avail[NPSIZES]; }; /* Used in conjunction with tsd for fast arena-related context lookup. */ @@ -505,7 +507,6 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ -extern size_t run_quantize_max; /* Max run_quantize_*() input. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ @@ -601,7 +602,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal); void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal); arena_t *arena_new(tsdn_t *tsdn, unsigned ind); -bool arena_boot(void); +void arena_boot(void); void arena_prefork0(tsdn_t *tsdn, arena_t *arena); void arena_prefork1(tsdn_t *tsdn, arena_t *arena); void arena_prefork2(tsdn_t *tsdn, arena_t *arena); diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk.h b/contrib/jemalloc/include/jemalloc/internal/chunk.h index c9fd4ecbd059..38c9a012da11 100644 --- a/contrib/jemalloc/include/jemalloc/internal/chunk.h +++ b/contrib/jemalloc/include/jemalloc/internal/chunk.h @@ -58,7 +58,7 @@ void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - bool *zero, bool dalloc_node); + bool *zero, bool *commit, bool dalloc_node); void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); @@ -71,9 +71,6 @@ bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); -void chunk_prefork(tsdn_t *tsdn); -void chunk_postfork_parent(tsdn_t *tsdn); -void chunk_postfork_child(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h index 724fa579a2cf..da8511ba06b0 100644 --- a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -21,15 +21,13 @@ extern const char *dss_prec_names[]; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -dss_prec_t chunk_dss_prec_get(tsdn_t *tsdn); -bool chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec); +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); -bool chunk_in_dss(tsdn_t *tsdn, void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(tsdn_t *tsdn); -void chunk_dss_postfork_parent(tsdn_t *tsdn); -void chunk_dss_postfork_child(tsdn_t *tsdn); +bool chunk_in_dss(void *chunk); +bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); +void chunk_dss_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/ckh.h b/contrib/jemalloc/include/jemalloc/internal/ckh.h index 46e151cdd7f9..f75ad90b73cc 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ckh.h +++ b/contrib/jemalloc/include/jemalloc/internal/ckh.h @@ -64,13 +64,13 @@ struct ckh_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(tsdn_t *tsdn, ckh_t *ckh); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); size_t ckh_count(ckh_t *ckh); bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); diff --git a/contrib/jemalloc/include/jemalloc/internal/huge.h b/contrib/jemalloc/include/jemalloc/internal/huge.h index b5fa9e63691c..22184d9bbd45 100644 --- a/contrib/jemalloc/include/jemalloc/internal/huge.h +++ b/contrib/jemalloc/include/jemalloc/internal/huge.h @@ -17,7 +17,7 @@ bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET -typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t); +typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(tsdn_t *tsdn, void *ptr); diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h index 49f14a51ee13..15483e271dd6 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -159,7 +159,9 @@ static const bool config_cache_oblivious = #endif #include "jemalloc/internal/ph.h" +#ifndef __PGI #define RB_COMPACT +#endif #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" @@ -182,6 +184,9 @@ static const bool config_cache_oblivious = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Page size index type. */ +typedef unsigned pszind_t; + /* Size class index type. */ typedef unsigned szind_t; @@ -231,7 +236,7 @@ typedef unsigned szind_t; # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# if (defined(__sparc64__) || defined(__sparcv9)) +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -361,6 +366,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -393,6 +399,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -452,11 +459,16 @@ extern unsigned narenas_auto; */ extern arena_t **arenas; +/* + * pind2sz_tab encodes the same information as could be computed by + * pind2sz_compute(). + */ +extern size_t const pind2sz_tab[NPSIZES]; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES+1]; +extern size_t const index2size_tab[NSIZES]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -464,6 +476,7 @@ extern size_t const index2size_tab[NSIZES+1]; */ extern uint8_t const size2index_tab[]; +arena_t *a0get(void); void *a0malloc(size_t size); void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); @@ -489,6 +502,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -521,6 +535,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -540,6 +555,11 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +pszind_t psz2ind(size_t psz); +size_t pind2sz_compute(pszind_t pind); +size_t pind2sz_lookup(pszind_t pind); +size_t pind2sz(pszind_t pind); +size_t psz2u(size_t psz); szind_t size2index_compute(size_t size); szind_t size2index_lookup(size_t size); szind_t size2index(size_t size); @@ -552,7 +572,7 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_ichoose(tsdn_t *tsdn, arena_t *arena); +arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); @@ -560,10 +580,90 @@ ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE pszind_t +psz2ind(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (NPSIZES); + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return (ind); + } +} + +JEMALLOC_INLINE size_t +pind2sz_compute(pszind_t pind) +{ + + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return (sz); + } +} + +JEMALLOC_INLINE size_t +pind2sz_lookup(pszind_t pind) +{ + size_t ret = (size_t)pind2sz_tab[pind]; + assert(ret == pind2sz_compute(pind)); + return (ret); +} + +JEMALLOC_INLINE size_t +pind2sz(pszind_t pind) +{ + + assert(pind < NPSIZES); + return (pind2sz_lookup(pind)); +} + +JEMALLOC_INLINE size_t +psz2u(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (0); + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return (usize); + } +} + JEMALLOC_INLINE szind_t size2index_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (NSIZES); #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; @@ -572,9 +672,7 @@ size2index_compute(size_t size) } #endif { - szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); + szind_t x = lg_floor((size<<1)-1); szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); szind_t grp = shift << LG_SIZE_CLASS_GROUP; @@ -660,6 +758,8 @@ JEMALLOC_ALWAYS_INLINE size_t s2u_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (0); #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; @@ -669,9 +769,7 @@ s2u_compute(size_t size) } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); + size_t x = lg_floor((size<<1)-1); size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta = ZU(1) << lg_delta; @@ -812,14 +910,10 @@ arena_choose(tsd_t *tsd, arena_t *arena) } JEMALLOC_INLINE arena_t * -arena_ichoose(tsdn_t *tsdn, arena_t *arena) +arena_ichoose(tsd_t *tsd, arena_t *arena) { - assert(!tsdn_null(tsdn) || arena != NULL); - - if (!tsdn_null(tsdn)) - return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true)); - return (arena); + return (arena_choose_impl(tsd, arena, true)); } JEMALLOC_INLINE arena_tdata_t * diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index 42d97f2a6459..46266325684c 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -20,8 +20,18 @@ # include # endif # include +# ifdef JEMALLOC_OS_UNFAIR_LOCK +# include +# endif +# ifdef JEMALLOC_GLIBC_MALLOC_HOOK +# include +# endif # include # include +# include +# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# include +# endif #endif #include diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h index 32f6f155d350..7636ac49d745 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h @@ -61,12 +61,20 @@ */ #define JEMALLOC_HAVE_MADVISE +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. */ /* #undef JEMALLOC_OSSPIN */ +/* Defined if syscall(2) is available. */ +#define JEMALLOC_HAVE_SYSCALL + /* * Defined if secure_getenv(3) is available. */ @@ -77,6 +85,21 @@ */ #define JEMALLOC_HAVE_ISSETUGID +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -189,6 +212,12 @@ /* TLS is used to map arenas and magazine caches to threads. */ #define JEMALLOC_TLS +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE abort + /* * ffs*() functions to use for bitmapping. Don't use these directly; instead, * use ffs_*() from util.h. diff --git a/contrib/jemalloc/include/jemalloc/internal/mb.h b/contrib/jemalloc/include/jemalloc/internal/mb.h index 437c86f72341..5384728fd50e 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mb.h +++ b/contrib/jemalloc/include/jemalloc/internal/mb.h @@ -105,8 +105,8 @@ mb_write(void) malloc_mutex_t mtx; malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); - malloc_mutex_lock(NULL, &mtx); - malloc_mutex_unlock(NULL, &mtx); + malloc_mutex_lock(TSDN_NULL, &mtx); + malloc_mutex_unlock(TSDN_NULL, &mtx); } #endif #endif diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex.h b/contrib/jemalloc/include/jemalloc/internal/mutex.h index 60ab0416500c..76518db7575c 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mutex.h +++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h @@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_OSSPIN)) # define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -35,6 +38,8 @@ struct malloc_mutex_s { # else CRITICAL_SECTION lock; # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -86,6 +91,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) # else EnterCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else @@ -107,6 +114,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) # else LeaveCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else diff --git a/contrib/jemalloc/include/jemalloc/internal/nstime.h b/contrib/jemalloc/include/jemalloc/internal/nstime.h index dc293b73b9f3..93b27dc80a05 100644 --- a/contrib/jemalloc/include/jemalloc/internal/nstime.h +++ b/contrib/jemalloc/include/jemalloc/internal/nstime.h @@ -1,9 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - typedef struct nstime_s nstime_t; /* Maximum supported number of seconds (~584 years). */ @@ -34,9 +31,12 @@ void nstime_imultiply(nstime_t *time, uint64_t multiplier); void nstime_idivide(nstime_t *time, uint64_t divisor); uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); #ifdef JEMALLOC_JET +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *nstime_monotonic; typedef bool (nstime_update_t)(nstime_t *); extern nstime_update_t *nstime_update; #else +bool nstime_monotonic(void); bool nstime_update(nstime_t *time); #endif diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index 00bedbabec26..350835cbe641 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -1,4 +1,5 @@ #define a0dalloc JEMALLOC_N(a0dalloc) +#define a0get JEMALLOC_N(a0get) #define a0malloc JEMALLOC_N(a0malloc) #define arena_aalloc JEMALLOC_N(arena_aalloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) @@ -167,20 +168,15 @@ #define chunk_dalloc_wrapper JEMALLOC_N(chunk_dalloc_wrapper) #define chunk_deregister JEMALLOC_N(chunk_deregister) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) -#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) -#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_mergeable JEMALLOC_N(chunk_dss_mergeable) #define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) #define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) -#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_hooks_default JEMALLOC_N(chunk_hooks_default) #define chunk_hooks_get JEMALLOC_N(chunk_hooks_get) #define chunk_hooks_set JEMALLOC_N(chunk_hooks_set) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_lookup JEMALLOC_N(chunk_lookup) #define chunk_npages JEMALLOC_N(chunk_npages) -#define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) -#define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) -#define chunk_prefork JEMALLOC_N(chunk_prefork) #define chunk_purge_wrapper JEMALLOC_N(chunk_purge_wrapper) #define chunk_register JEMALLOC_N(chunk_register) #define chunks_rtree JEMALLOC_N(chunks_rtree) @@ -359,6 +355,7 @@ #define nstime_imultiply JEMALLOC_N(nstime_imultiply) #define nstime_init JEMALLOC_N(nstime_init) #define nstime_init2 JEMALLOC_N(nstime_init2) +#define nstime_monotonic JEMALLOC_N(nstime_monotonic) #define nstime_ns JEMALLOC_N(nstime_ns) #define nstime_nsec JEMALLOC_N(nstime_nsec) #define nstime_sec JEMALLOC_N(nstime_sec) @@ -400,11 +397,22 @@ #define pages_purge JEMALLOC_N(pages_purge) #define pages_trim JEMALLOC_N(pages_trim) #define pages_unmap JEMALLOC_N(pages_unmap) +#define pind2sz JEMALLOC_N(pind2sz) +#define pind2sz_compute JEMALLOC_N(pind2sz_compute) +#define pind2sz_lookup JEMALLOC_N(pind2sz_lookup) +#define pind2sz_tab JEMALLOC_N(pind2sz_tab) #define pow2_ceil_u32 JEMALLOC_N(pow2_ceil_u32) #define pow2_ceil_u64 JEMALLOC_N(pow2_ceil_u64) #define pow2_ceil_zu JEMALLOC_N(pow2_ceil_zu) -#define prng_lg_range JEMALLOC_N(prng_lg_range) -#define prng_range JEMALLOC_N(prng_range) +#define prng_lg_range_u32 JEMALLOC_N(prng_lg_range_u32) +#define prng_lg_range_u64 JEMALLOC_N(prng_lg_range_u64) +#define prng_lg_range_zu JEMALLOC_N(prng_lg_range_zu) +#define prng_range_u32 JEMALLOC_N(prng_range_u32) +#define prng_range_u64 JEMALLOC_N(prng_range_u64) +#define prng_range_zu JEMALLOC_N(prng_range_zu) +#define prng_state_next_u32 JEMALLOC_N(prng_state_next_u32) +#define prng_state_next_u64 JEMALLOC_N(prng_state_next_u64) +#define prng_state_next_zu JEMALLOC_N(prng_state_next_zu) #define prof_active JEMALLOC_N(prof_active) #define prof_active_get JEMALLOC_N(prof_active_get) #define prof_active_get_unlocked JEMALLOC_N(prof_active_get_unlocked) @@ -453,12 +461,13 @@ #define prof_thread_active_set JEMALLOC_N(prof_thread_active_set) #define prof_thread_name_get JEMALLOC_N(prof_thread_name_get) #define prof_thread_name_set JEMALLOC_N(prof_thread_name_set) +#define psz2ind JEMALLOC_N(psz2ind) +#define psz2u JEMALLOC_N(psz2u) #define purge_mode_names JEMALLOC_N(purge_mode_names) #define quarantine JEMALLOC_N(quarantine) #define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) #define quarantine_alloc_hook_work JEMALLOC_N(quarantine_alloc_hook_work) #define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) -#define register_zone JEMALLOC_N(register_zone) #define rtree_child_read JEMALLOC_N(rtree_child_read) #define rtree_child_read_hard JEMALLOC_N(rtree_child_read_hard) #define rtree_child_tryread JEMALLOC_N(rtree_child_tryread) @@ -476,7 +485,6 @@ #define rtree_val_write JEMALLOC_N(rtree_val_write) #define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) #define run_quantize_floor JEMALLOC_N(run_quantize_floor) -#define run_quantize_max JEMALLOC_N(run_quantize_max) #define s2u JEMALLOC_N(s2u) #define s2u_compute JEMALLOC_N(s2u_compute) #define s2u_lookup JEMALLOC_N(s2u_lookup) @@ -486,6 +494,8 @@ #define size2index_compute JEMALLOC_N(size2index_compute) #define size2index_lookup JEMALLOC_N(size2index_lookup) #define size2index_tab JEMALLOC_N(size2index_tab) +#define spin_adaptive JEMALLOC_N(spin_adaptive) +#define spin_init JEMALLOC_N(spin_init) #define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) @@ -544,7 +554,9 @@ #define tsd_cleanup JEMALLOC_N(tsd_cleanup) #define tsd_cleanup_wrapper JEMALLOC_N(tsd_cleanup_wrapper) #define tsd_fetch JEMALLOC_N(tsd_fetch) +#define tsd_fetch_impl JEMALLOC_N(tsd_fetch_impl) #define tsd_get JEMALLOC_N(tsd_get) +#define tsd_get_allocates JEMALLOC_N(tsd_get_allocates) #define tsd_iarena_get JEMALLOC_N(tsd_iarena_get) #define tsd_iarena_set JEMALLOC_N(tsd_iarena_set) #define tsd_iarenap_get JEMALLOC_N(tsd_iarenap_get) @@ -603,9 +615,11 @@ #define witness_lock_error JEMALLOC_N(witness_lock_error) #define witness_lockless_error JEMALLOC_N(witness_lockless_error) #define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) +#define witness_owner JEMALLOC_N(witness_owner) #define witness_owner_error JEMALLOC_N(witness_owner_error) #define witness_postfork_child JEMALLOC_N(witness_postfork_child) #define witness_postfork_parent JEMALLOC_N(witness_postfork_parent) #define witness_prefork JEMALLOC_N(witness_prefork) #define witness_unlock JEMALLOC_N(witness_unlock) #define witnesses_cleanup JEMALLOC_N(witnesses_cleanup) +#define zone_register JEMALLOC_N(zone_register) diff --git a/contrib/jemalloc/include/jemalloc/internal/prng.h b/contrib/jemalloc/include/jemalloc/internal/prng.h index 5830f8b7b327..c2bda19c6b09 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prng.h +++ b/contrib/jemalloc/include/jemalloc/internal/prng.h @@ -19,8 +19,12 @@ * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. */ -#define PRNG_A UINT64_C(6364136223846793005) -#define PRNG_C UINT64_C(1442695040888963407) + +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -35,28 +39,133 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -uint64_t prng_lg_range(uint64_t *state, unsigned lg_range); -uint64_t prng_range(uint64_t *state, uint64_t range); +uint32_t prng_state_next_u32(uint32_t state); +uint64_t prng_state_next_u64(uint64_t state); +size_t prng_state_next_zu(size_t state); + +uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, + bool atomic); +uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); +size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); + +uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); +uint64_t prng_range_u64(uint64_t *state, uint64_t range); +size_t prng_range_zu(size_t *state, size_t range, bool atomic); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) -JEMALLOC_ALWAYS_INLINE uint64_t -prng_lg_range(uint64_t *state, unsigned lg_range) +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) { - uint64_t ret; + + return ((state * PRNG_A_32) + PRNG_C_32); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) +{ + + return ((state * PRNG_A_64) + PRNG_C_64); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) +{ + +#if LG_SIZEOF_PTR == 2 + return ((state * PRNG_A_32) + PRNG_C_32); +#elif LG_SIZEOF_PTR == 3 + return ((state * PRNG_A_64) + PRNG_C_64); +#else +#error Unsupported pointer size +#endif +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) +{ + uint32_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + if (atomic) { + uint32_t state0; + + do { + state0 = atomic_read_uint32(state); + state1 = prng_state_next_u32(state0); + } while (atomic_cas_uint32(state, state0, state1)); + } else { + state1 = prng_state_next_u32(*state); + *state = state1; + } + ret = state1 >> (32 - lg_range); + + return (ret); +} + +/* 64-bit atomic operations cannot be supported on all relevant platforms. */ +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) +{ + uint64_t ret, state1; assert(lg_range > 0); assert(lg_range <= 64); - ret = (*state * PRNG_A) + PRNG_C; - *state = ret; - ret >>= (64 - lg_range); + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) +{ + size_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + if (atomic) { + size_t state0; + + do { + state0 = atomic_read_z(state); + state1 = prng_state_next_zu(state0); + } while (atomic_cas_z(state, state0, state1)); + } else { + state1 = prng_state_next_zu(*state); + *state = state1; + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(uint32_t *state, uint32_t range, bool atomic) +{ + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); return (ret); } JEMALLOC_ALWAYS_INLINE uint64_t -prng_range(uint64_t *state, uint64_t range) +prng_range_u64(uint64_t *state, uint64_t range) { uint64_t ret; unsigned lg_range; @@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range) /* Generate a result in [0..range) via repeated trial. */ do { - ret = prng_lg_range(state, lg_range); + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(size_t *state, size_t range, bool atomic) +{ + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); } while (ret >= range); return (ret); diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h index 21dff5fbcd8c..8293b71edc65 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof.h @@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header; void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); -prof_tdata_t *prof_tdata_init(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); -void prof_reset(tsdn_t *tsdn, size_t lg_sample); +void prof_reset(tsd_t *tsd, size_t lg_sample); void prof_tdata_cleanup(tsd_t *tsd); bool prof_active_get(tsdn_t *tsdn); bool prof_active_set(tsdn_t *tsdn, bool active); @@ -315,7 +315,7 @@ bool prof_gdump_get(tsdn_t *tsdn); bool prof_gdump_set(tsdn_t *tsdn, bool active); void prof_boot0(void); void prof_boot1(void); -bool prof_boot2(tsdn_t *tsdn); +bool prof_boot2(tsd_t *tsd); void prof_prefork0(tsdn_t *tsdn); void prof_prefork1(tsdn_t *tsdn); void prof_postfork_parent(tsdn_t *tsdn); @@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create) if (create) { if (unlikely(tdata == NULL)) { if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd_tsdn(tsd)); + tdata = prof_tdata_init(tsd); tsd_prof_tdata_set(tsd, tdata); } } else if (unlikely(tdata->expired)) { diff --git a/contrib/jemalloc/include/jemalloc/internal/size_classes.h b/contrib/jemalloc/include/jemalloc/internal/size_classes.h index 615586d29eb9..b12d262bb06b 100644 --- a/contrib/jemalloc/include/jemalloc/internal/size_classes.h +++ b/contrib/jemalloc/include/jemalloc/internal/size_classes.h @@ -7,13 +7,13 @@ * be defined prior to inclusion, and it in turn defines: * * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. - * SIZE_CLASSES: Complete table of - * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) - * tuples. + * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz, + * bin, lg_delta_lookup) tuples. * index: Size class index. * lg_grp: Lg group base size (no deltas added). * lg_delta: Lg delta to previous size class. * ndelta: Delta multiplier. size == 1<iteration = 0; +} + +JEMALLOC_INLINE void +spin_adaptive(spin_t *spin) +{ + volatile uint64_t i; + + for (i = 0; i < (KQU(1) << spin->iteration); i++) + CPU_SPINWAIT; + + if (spin->iteration < 63) + spin->iteration++; +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h index 70883b1a7a62..01ba062dea6f 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -145,7 +145,7 @@ tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsdn_t *tsdn, unsigned *r_ind); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); bool tcache_boot(tsdn_t *tsdn); diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index bf1134110c19..9055acafd2e7 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -48,7 +48,7 @@ typedef enum { * * bool example_tsd_boot(void) {...} * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get() {...} + * example_t *example_tsd_get(bool init) {...} * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than @@ -105,7 +105,7 @@ a_name##tsd_boot(void); \ a_attr bool \ a_name##tsd_booted_get(void); \ a_attr a_type * \ -a_name##tsd_get(void); \ +a_name##tsd_get(bool init); \ a_attr void \ a_name##tsd_set(a_type *val); @@ -213,9 +213,15 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -264,9 +270,15 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -325,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ DWORD error = GetLastError(); \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ SetLastError(error); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ wrapper = (a_name##tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ @@ -392,14 +404,22 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -408,7 +428,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -452,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ &a_name##tsd_init_head, &block); \ @@ -520,14 +540,22 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -536,7 +564,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -639,6 +667,7 @@ void tsd_cleanup(void *arg); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) +tsd_t *tsd_fetch_impl(bool init); tsd_t *tsd_fetch(void); tsdn_t *tsd_tsdn(tsd_t *tsd); bool tsd_nominal(tsd_t *tsd); @@ -658,9 +687,13 @@ malloc_tsd_externs(, tsd_t) malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) +tsd_fetch_impl(bool init) { - tsd_t *tsd = tsd_get(); + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) + return (NULL); + assert(tsd != NULL); if (unlikely(tsd->state != tsd_state_nominal)) { if (tsd->state == tsd_state_uninitialized) { @@ -677,6 +710,13 @@ tsd_fetch(void) return (tsd); } +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + + return (tsd_fetch_impl(true)); +} + JEMALLOC_ALWAYS_INLINE tsdn_t * tsd_tsdn(tsd_t *tsd) { @@ -723,7 +763,7 @@ tsdn_fetch(void) if (!tsd_booted_get()) return (NULL); - return (tsd_tsdn(tsd_fetch())); + return (tsd_tsdn(tsd_fetch_impl(false))); } JEMALLOC_ALWAYS_INLINE bool diff --git a/contrib/jemalloc/include/jemalloc/internal/util.h b/contrib/jemalloc/include/jemalloc/internal/util.h index a0c2203dfe1a..aee00d6d9a47 100644 --- a/contrib/jemalloc/include/jemalloc/internal/util.h +++ b/contrib/jemalloc/include/jemalloc/internal/util.h @@ -61,30 +61,20 @@ # define JEMALLOC_CC_SILENCE_INIT(v) #endif -#define JEMALLOC_GNUC_PREREQ(major, minor) \ - (!defined(__clang__) && \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) -#ifndef __has_builtin -# define __has_builtin(builtin) (0) -#endif -#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ - (defined(__clang__) && __has_builtin(builtin)) - #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) -# if JEMALLOC_GNUC_PREREQ(4, 6) || \ - JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) -# define unreachable() __builtin_unreachable() -# else -# define unreachable() abort() -# endif #else # define likely(x) !!(x) # define unlikely(x) !!(x) -# define unreachable() abort() #endif +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure +#endif + +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() + #include "jemalloc/internal/assert.h" /* Use to assert a particular configuration, e.g., cassert(config_debug). */ diff --git a/contrib/jemalloc/include/jemalloc/internal/witness.h b/contrib/jemalloc/include/jemalloc/internal/witness.h index d78dca2d6c7e..cdf15d797d0c 100644 --- a/contrib/jemalloc/include/jemalloc/internal/witness.h +++ b/contrib/jemalloc/include/jemalloc/internal/witness.h @@ -108,6 +108,7 @@ void witness_postfork_child(tsd_t *tsd); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +bool witness_owner(tsd_t *tsd, const witness_t *witness); void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); void witness_assert_lockless(tsdn_t *tsdn); @@ -116,12 +117,25 @@ void witness_unlock(tsdn_t *tsdn, witness_t *witness); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE bool +witness_owner(tsd_t *tsd, const witness_t *witness) +{ + witness_list_t *witnesses; + witness_t *w; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + return (true); + } + + return (false); +} + JEMALLOC_INLINE void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) { tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; if (!config_debug) return; @@ -132,11 +146,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) if (witness->rank == WITNESS_RANK_OMIT) return; - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - return; - } + if (witness_owner(tsd, witness)) + return; witness_owner_error(witness); } @@ -238,10 +249,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness) if (witness->rank == WITNESS_RANK_OMIT) return; - witness_assert_owner(tsdn, witness); - - witnesses = tsd_witnessesp_get(tsd); - ql_remove(witnesses, witness, link); + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(tsd, witness)) { + witnesses = tsd_witnessesp_get(tsd); + ql_remove(witnesses, witness, link); + } else + witness_assert_owner(tsdn, witness); } #endif diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index d2c876e91841..a8b2a6c33998 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -87,12 +87,12 @@ extern "C" { #include #include -#define JEMALLOC_VERSION "4.2.1-0-g3de035335255d553bdb344c32ffdb603816195d8" +#define JEMALLOC_VERSION "4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2" #define JEMALLOC_VERSION_MAJOR 4 -#define JEMALLOC_VERSION_MINOR 2 +#define JEMALLOC_VERSION_MINOR 3 #define JEMALLOC_VERSION_BUGFIX 1 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "3de035335255d553bdb344c32ffdb603816195d8" +#define JEMALLOC_VERSION_GID "0110fa8451af905affd77c3bea0d545fee2251b2" # define MALLOCX_LG_ALIGN(la) ((int)(la)) # if LG_SIZEOF_PTR == 2 diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index ce62590b9200..e196b13378e6 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -21,15 +21,8 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -size_t run_quantize_max; /* Max run_quantize_*() input. */ -static size_t small_maxrun; /* Max run size for small size classes. */ -static bool *small_run_tab; /* Valid small run page multiples. */ -static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ -static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ -static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */ -static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ /******************************************************************************/ /* @@ -37,6 +30,8 @@ static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ * definition. */ +static void arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk); static void arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit); static void arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, @@ -77,83 +72,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a, ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t, ph_link, arena_run_addr_comp) -static size_t -run_quantize_floor_compute(size_t size) -{ - size_t qsize; - - assert(size != 0); - assert(size == PAGE_CEILING(size)); - - /* Don't change sizes that are valid small run sizes. */ - if (size <= small_maxrun && small_run_tab[size >> LG_PAGE]) - return (size); - - /* - * Round down to the nearest run size that can actually be requested - * during normal large allocation. Add large_pad so that cache index - * randomization can offset the allocation from the page boundary. - */ - qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; - if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize_floor_compute(size - large_pad)); - assert(qsize <= size); - return (qsize); -} - -static size_t -run_quantize_ceil_compute_hard(size_t size) -{ - size_t large_run_size_next; - - assert(size != 0); - assert(size == PAGE_CEILING(size)); - - /* - * Return the next quantized size greater than the input size. - * Quantized sizes comprise the union of run sizes that back small - * region runs, and run sizes that back large regions with no explicit - * alignment constraints. - */ - - if (size > SMALL_MAXCLASS) { - large_run_size_next = PAGE_CEILING(index2size(size2index(size - - large_pad) + 1) + large_pad); - } else - large_run_size_next = SIZE_T_MAX; - if (size >= small_maxrun) - return (large_run_size_next); - - while (true) { - size += PAGE; - assert(size <= small_maxrun); - if (small_run_tab[size >> LG_PAGE]) { - if (large_run_size_next < size) - return (large_run_size_next); - return (size); - } - } -} - -static size_t -run_quantize_ceil_compute(size_t size) -{ - size_t qsize = run_quantize_floor_compute(size); - - if (qsize < size) { - /* - * Skip a quantization that may have an adequately large run, - * because under-sized runs may be mixed in. This only happens - * when an unusual size is requested, i.e. for aligned - * allocation, and is just one of several places where linear - * search would potentially find sufficiently aligned available - * memory somewhere lower. - */ - qsize = run_quantize_ceil_compute_hard(qsize); - } - return (qsize); -} - #ifdef JEMALLOC_JET #undef run_quantize_floor #define run_quantize_floor JEMALLOC_N(n_run_quantize_floor) @@ -162,13 +80,27 @@ static size_t run_quantize_floor(size_t size) { size_t ret; + pszind_t pind; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); - ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1]; - assert(ret == run_quantize_floor_compute(size)); + assert(size != 0); + assert(size == PAGE_CEILING(size)); + + pind = psz2ind(size - large_pad + 1); + if (pind == 0) { + /* + * Avoid underflow. This short-circuit would also do the right + * thing for all sizes in the range for which there are + * PAGE-spaced size classes, but it's simplest to just handle + * the one case that would cause erroneous results. + */ + return (size); + } + ret = pind2sz(pind - 1) + large_pad; + assert(ret <= size); return (ret); } #ifdef JEMALLOC_JET @@ -187,11 +119,21 @@ run_quantize_ceil(size_t size) size_t ret; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); - ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1]; - assert(ret == run_quantize_ceil_compute(size)); + ret = run_quantize_floor(size); + if (ret < size) { + /* + * Skip a quantization that may have an adequately large run, + * because under-sized runs may be mixed in. This only happens + * when an unusual size is requested, i.e. for aligned + * allocation, and is just one of several places where linear + * search would potentially find sufficiently aligned available + * memory somewhere lower. + */ + ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad; + } return (ret); } #ifdef JEMALLOC_JET @@ -200,25 +142,17 @@ run_quantize_ceil(size_t size) run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil); #endif -static arena_run_heap_t * -arena_runs_avail_get(arena_t *arena, szind_t ind) -{ - - assert(ind >= runs_avail_bias); - assert(ind - runs_avail_bias < runs_avail_nclasses); - - return (&arena->runs_avail[ind - runs_avail_bias]); -} - static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_insert(arena_runs_avail_get(arena, ind), + assert((npages << LG_PAGE) < chunksize); + assert(pind2sz(pind) <= chunksize); + arena_run_heap_insert(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -226,11 +160,13 @@ static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_remove(arena_runs_avail_get(arena, ind), + assert((npages << LG_PAGE) < chunksize); + assert(pind2sz(pind) <= chunksize); + arena_run_heap_remove(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -649,14 +585,13 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize, - chunksize, zero, true); + chunksize, zero, commit, true); if (chunk != NULL) { if (arena_chunk_register(tsdn, arena, chunk, *zero)) { chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, chunksize, true); return (NULL); } - *commit = true; } if (chunk == NULL) { chunk = arena_chunk_alloc_internal_hard(tsdn, arena, @@ -953,6 +888,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, void *ret; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize = CHUNK_CEILING(usize); + bool commit = true; malloc_mutex_lock(tsdn, &arena->lock); @@ -964,7 +900,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, arena_nactive_add(arena, usize >> LG_PAGE); ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize, - alignment, zero, true); + alignment, zero, &commit, true); malloc_mutex_unlock(tsdn, &arena->lock); if (ret == NULL) { ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks, @@ -1074,6 +1010,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); size_t udiff = usize - oldsize; size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + bool commit = true; malloc_mutex_lock(tsdn, &arena->lock); @@ -1085,7 +1022,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, arena_nactive_add(arena, udiff >> LG_PAGE); err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, - chunksize, zero, true) == NULL); + chunksize, zero, &commit, true) == NULL); malloc_mutex_unlock(tsdn, &arena->lock); if (err) { err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena, @@ -1109,12 +1046,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - szind_t ind, i; + pszind_t pind, i; - ind = size2index(run_quantize_ceil(size)); - for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { + pind = psz2ind(run_quantize_ceil(size)); + + for (i = pind; pind2sz(i) <= chunksize; i++) { arena_chunk_map_misc_t *miscelm = arena_run_heap_first( - arena_runs_avail_get(arena, i)); + &arena->runs_avail[i]); if (miscelm != NULL) return (&miscelm->run); } @@ -1125,7 +1063,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size) static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_run_t *run = arena_run_first_best_fit(arena, s2u(size)); + arena_run_t *run = arena_run_first_best_fit(arena, size); if (run != NULL) { if (arena_run_split_large(arena, run, size, zero)) run = NULL; @@ -1256,14 +1194,14 @@ arena_decay_deadline_init(arena_t *arena) * Generate a new deadline that is uniformly random within the next * epoch after the current one. */ - nstime_copy(&arena->decay_deadline, &arena->decay_epoch); - nstime_add(&arena->decay_deadline, &arena->decay_interval); - if (arena->decay_time > 0) { + nstime_copy(&arena->decay.deadline, &arena->decay.epoch); + nstime_add(&arena->decay.deadline, &arena->decay.interval); + if (arena->decay.time > 0) { nstime_t jitter; - nstime_init(&jitter, prng_range(&arena->decay_jitter_state, - nstime_ns(&arena->decay_interval))); - nstime_add(&arena->decay_deadline, &jitter); + nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state, + nstime_ns(&arena->decay.interval))); + nstime_add(&arena->decay.deadline, &jitter); } } @@ -1273,7 +1211,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) assert(opt_purge == purge_mode_decay); - return (nstime_compare(&arena->decay_deadline, time) <= 0); + return (nstime_compare(&arena->decay.deadline, time) <= 0); } static size_t @@ -1298,92 +1236,103 @@ arena_decay_backlog_npages_limit(const arena_t *arena) */ sum = 0; for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) - sum += arena->decay_backlog[i] * h_steps[i]; + sum += arena->decay.backlog[i] * h_steps[i]; npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); return (npages_limit_backlog); } static void -arena_decay_epoch_advance(arena_t *arena, const nstime_t *time) +arena_decay_backlog_update_last(arena_t *arena) { - uint64_t nadvance_u64; - nstime_t delta; - size_t ndirty_delta; + size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? + arena->ndirty - arena->decay.ndirty : 0; + arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; +} - assert(opt_purge == purge_mode_decay); - assert(arena_decay_deadline_reached(arena, time)); +static void +arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) +{ - nstime_copy(&delta, time); - nstime_subtract(&delta, &arena->decay_epoch); - nadvance_u64 = nstime_divide(&delta, &arena->decay_interval); - assert(nadvance_u64 > 0); - - /* Add nadvance_u64 decay intervals to epoch. */ - nstime_copy(&delta, &arena->decay_interval); - nstime_imultiply(&delta, nadvance_u64); - nstime_add(&arena->decay_epoch, &delta); - - /* Set a new deadline. */ - arena_decay_deadline_init(arena); - - /* Update the backlog. */ if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { - memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) * sizeof(size_t)); } else { size_t nadvance_z = (size_t)nadvance_u64; assert((uint64_t)nadvance_z == nadvance_u64); - memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z], + memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z], (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); if (nadvance_z > 1) { - memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS - + memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS - nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); } } - ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty - - arena->decay_ndirty : 0; - arena->decay_ndirty = arena->ndirty; - arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; - arena->decay_backlog_npages_limit = - arena_decay_backlog_npages_limit(arena); + + arena_decay_backlog_update_last(arena); } -static size_t -arena_decay_npages_limit(arena_t *arena) +static void +arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) { - size_t npages_limit; + uint64_t nadvance_u64; + nstime_t delta; assert(opt_purge == purge_mode_decay); + assert(arena_decay_deadline_reached(arena, time)); - npages_limit = arena->decay_backlog_npages_limit; + nstime_copy(&delta, time); + nstime_subtract(&delta, &arena->decay.epoch); + nadvance_u64 = nstime_divide(&delta, &arena->decay.interval); + assert(nadvance_u64 > 0); - /* Add in any dirty pages created during the current epoch. */ - if (arena->ndirty > arena->decay_ndirty) - npages_limit += arena->ndirty - arena->decay_ndirty; + /* Add nadvance_u64 decay intervals to epoch. */ + nstime_copy(&delta, &arena->decay.interval); + nstime_imultiply(&delta, nadvance_u64); + nstime_add(&arena->decay.epoch, &delta); - return (npages_limit); + /* Set a new deadline. */ + arena_decay_deadline_init(arena); + + /* Update the backlog. */ + arena_decay_backlog_update(arena, nadvance_u64); +} + +static void +arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) +{ + size_t ndirty_limit = arena_decay_backlog_npages_limit(arena); + + if (arena->ndirty > ndirty_limit) + arena_purge_to_limit(tsdn, arena, ndirty_limit); + arena->decay.ndirty = arena->ndirty; +} + +static void +arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) +{ + + arena_decay_epoch_advance_helper(arena, time); + arena_decay_epoch_advance_purge(tsdn, arena); } static void arena_decay_init(arena_t *arena, ssize_t decay_time) { - arena->decay_time = decay_time; + arena->decay.time = decay_time; if (decay_time > 0) { - nstime_init2(&arena->decay_interval, decay_time, 0); - nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); + nstime_init2(&arena->decay.interval, decay_time, 0); + nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS); } - nstime_init(&arena->decay_epoch, 0); - nstime_update(&arena->decay_epoch); - arena->decay_jitter_state = (uint64_t)(uintptr_t)arena; + nstime_init(&arena->decay.epoch, 0); + nstime_update(&arena->decay.epoch); + arena->decay.jitter_state = (uint64_t)(uintptr_t)arena; arena_decay_deadline_init(arena); - arena->decay_ndirty = arena->ndirty; - arena->decay_backlog_npages_limit = 0; - memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); + arena->decay.ndirty = arena->ndirty; + memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); } static bool @@ -1403,7 +1352,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) ssize_t decay_time; malloc_mutex_lock(tsdn, &arena->lock); - decay_time = arena->decay_time; + decay_time = arena->decay.time; malloc_mutex_unlock(tsdn, &arena->lock); return (decay_time); @@ -1464,35 +1413,44 @@ static void arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena) { nstime_t time; - size_t ndirty_limit; assert(opt_purge == purge_mode_decay); /* Purge all or nothing if the option is disabled. */ - if (arena->decay_time <= 0) { - if (arena->decay_time == 0) + if (arena->decay.time <= 0) { + if (arena->decay.time == 0) arena_purge_to_limit(tsdn, arena, 0); return; } - nstime_copy(&time, &arena->decay_epoch); - if (unlikely(nstime_update(&time))) { - /* Time went backwards. Force an epoch advance. */ - nstime_copy(&time, &arena->decay_deadline); + nstime_init(&time, 0); + nstime_update(&time); + if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch, + &time) > 0)) { + /* + * Time went backwards. Move the epoch back in time and + * generate a new deadline, with the expectation that time + * typically flows forward for long enough periods of time that + * epochs complete. Unfortunately, this strategy is susceptible + * to clock jitter triggering premature epoch advances, but + * clock jitter estimation and compensation isn't feasible here + * because calls into this code are event-driven. + */ + nstime_copy(&arena->decay.epoch, &time); + arena_decay_deadline_init(arena); + } else { + /* Verify that time does not go backwards. */ + assert(nstime_compare(&arena->decay.epoch, &time) <= 0); } - if (arena_decay_deadline_reached(arena, &time)) - arena_decay_epoch_advance(arena, &time); - - ndirty_limit = arena_decay_npages_limit(arena); - /* - * Don't try to purge unless the number of purgeable pages exceeds the - * current limit. + * If the deadline has been reached, advance to the current epoch and + * purge to the new limit if necessary. Note that dirty pages created + * during the current epoch are not subject to purge until a future + * epoch, so as a result purging only happens during epoch advances. */ - if (arena->ndirty <= ndirty_limit) - return; - arena_purge_to_limit(tsdn, arena, ndirty_limit); + if (arena_decay_deadline_reached(arena, &time)) + arena_decay_epoch_advance(tsdn, arena, &time); } void @@ -1561,7 +1519,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next; - bool zero; + bool zero, commit; UNUSED void *chunk; npages = extent_node_size_get(chunkselm) >> LG_PAGE; @@ -1575,10 +1533,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, * dalloc_node=false argument to chunk_alloc_cache(). */ zero = false; + commit = false; chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks, extent_node_addr_get(chunkselm), extent_node_size_get(chunkselm), chunksize, &zero, - false); + &commit, false); assert(chunk == extent_node_addr_get(chunkselm)); assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, @@ -1967,7 +1926,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) assert(!arena->purging); arena->nactive = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < NPSIZES; i++) arena_run_heap_new(&arena->runs_avail[i]); malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); @@ -2606,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE); + r = prng_lg_range_zu(&arena->offset_state, LG_PAGE - + LG_CACHELINE, false); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; @@ -3391,7 +3351,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, *nthreads += arena_nthreads_get(arena, false); *dss = dss_prec_names[arena->dss_prec]; *lg_dirty_mult = arena->lg_dirty_mult; - *decay_time = arena->decay_time; + *decay_time = arena->decay.time; *nactive += arena->nactive; *ndirty += arena->ndirty; } @@ -3496,23 +3456,19 @@ arena_t * arena_new(tsdn_t *tsdn, unsigned ind) { arena_t *arena; - size_t arena_size; unsigned i; - /* Compute arena size to incorporate sufficient runs_avail elements. */ - arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) * - runs_avail_nclasses); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { arena = (arena_t *)base_alloc(tsdn, - CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses * - sizeof(malloc_large_stats_t) + nhclasses) * - sizeof(malloc_huge_stats_t)); + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t))) + + (nhclasses * sizeof(malloc_huge_stats_t))); } else - arena = (arena_t *)base_alloc(tsdn, arena_size); + arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t)); if (arena == NULL) return (NULL); @@ -3524,11 +3480,11 @@ arena_new(tsdn_t *tsdn, unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size)); + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size) + + + CACHELINE_CEILING(sizeof(arena_t)) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); @@ -3548,10 +3504,10 @@ arena_new(tsdn_t *tsdn, unsigned ind) * deterministic seed. */ arena->offset_state = config_debug ? ind : - (uint64_t)(uintptr_t)arena; + (size_t)(uintptr_t)arena; } - arena->dss_prec = chunk_dss_prec_get(tsdn); + arena->dss_prec = chunk_dss_prec_get(); ql_new(&arena->achunks); @@ -3562,8 +3518,9 @@ arena_new(tsdn_t *tsdn, unsigned ind) arena->nactive = 0; arena->ndirty = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < NPSIZES; i++) arena_run_heap_new(&arena->runs_avail[i]); + qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); @@ -3693,9 +3650,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * bin_info->reg_interval) - pad_size + bin_info->redzone_size); - if (actual_run_size > small_maxrun) - small_maxrun = actual_run_size; - assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); } @@ -3711,7 +3665,7 @@ bin_info_init(void) bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); #define BIN_INFO_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup) \ BIN_INFO_INIT_bin_##bin(index, (ZU(1)<> - LG_PAGE)); - if (small_run_tab == NULL) - return (true); - -#define TAB_INIT_bin_yes(index, size) { \ - arena_bin_info_t *bin_info = &arena_bin_info[index]; \ - small_run_tab[bin_info->run_size >> LG_PAGE] = true; \ - } -#define TAB_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - TAB_INIT_bin_##bin(index, (ZU(1)<> LG_PAGE)); - if (run_quantize_floor_tab == NULL) - return (true); - - run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) * - (run_quantize_max >> LG_PAGE)); - if (run_quantize_ceil_tab == NULL) - return (true); - - for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) { - size_t run_size = i << LG_PAGE; - - run_quantize_floor_tab[i-1] = - run_quantize_floor_compute(run_size); - run_quantize_ceil_tab[i-1] = - run_quantize_ceil_compute(run_size); - } - - return (false); -} - -bool +void arena_boot(void) { unsigned i; @@ -3822,15 +3721,6 @@ arena_boot(void) nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); - if (small_run_size_init()) - return (true); - if (run_quantize_init()) - return (true); - - runs_avail_bias = size2index(PAGE); - runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias; - - return (false); } void diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c index f292c980c51e..07e26f77c9f3 100644 --- a/contrib/jemalloc/src/chunk.c +++ b/contrib/jemalloc/src/chunk.c @@ -316,10 +316,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, size_t i; size_t *p = (size_t *)(uintptr_t)ret; - JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); } return (ret); } @@ -384,23 +385,21 @@ chunk_alloc_base(size_t size) void * chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node) + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, + bool dalloc_node) { void *ret; - bool commit; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - commit = true; ret = chunk_recycle(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached, &arena->chunks_ad_cached, true, - new_addr, size, alignment, zero, &commit, dalloc_node); + new_addr, size, alignment, zero, commit, dalloc_node); if (ret == NULL) return (NULL); - assert(commit); if (config_valgrind) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); @@ -610,10 +609,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } static bool -chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size) +chunk_dalloc_default_impl(void *chunk, size_t size) { - if (!have_dss || !chunk_in_dss(tsdn, chunk)) + if (!have_dss || !chunk_in_dss(chunk)) return (chunk_dalloc_mmap(chunk, size)); return (true); } @@ -622,11 +621,8 @@ static bool chunk_dalloc_default(void *chunk, size_t size, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_dalloc_default_impl(tsdn, chunk, size)); + return (chunk_dalloc_default_impl(chunk, size)); } void @@ -644,7 +640,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* Try to deallocate. */ if (chunk_hooks->dalloc == chunk_dalloc_default) { /* Call directly to propagate tsdn. */ - err = chunk_dalloc_default_impl(tsdn, chunk, size); + err = chunk_dalloc_default_impl(chunk, size); } else err = chunk_hooks->dalloc(chunk, size, committed, arena->ind); @@ -717,13 +713,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, } static bool -chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b) +chunk_merge_default_impl(void *chunk_a, void *chunk_b) { if (!maps_coalesce) return (true); - if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, - chunk_b)) + if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b)) return (true); return (false); @@ -733,11 +728,8 @@ static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b)); + return (chunk_merge_default_impl(chunk_a, chunk_b)); } static rtree_node_elm_t * @@ -781,32 +773,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (have_dss && chunk_dss_boot()) - return (true); + if (have_dss) + chunk_dss_boot(); if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); } - -void -chunk_prefork(tsdn_t *tsdn) -{ - - chunk_dss_prefork(tsdn); -} - -void -chunk_postfork_parent(tsdn_t *tsdn) -{ - - chunk_dss_postfork_parent(tsdn); -} - -void -chunk_postfork_child(tsdn_t *tsdn) -{ - - chunk_dss_postfork_child(tsdn); -} diff --git a/contrib/jemalloc/src/chunk_dss.c b/contrib/jemalloc/src/chunk_dss.c index 0b1f82bde8b8..85a13548f403 100644 --- a/contrib/jemalloc/src/chunk_dss.c +++ b/contrib/jemalloc/src/chunk_dss.c @@ -10,20 +10,19 @@ const char *dss_prec_names[] = { "N/A" }; -/* Current dss precedence default, used when creating new arenas. */ -static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; - /* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. + * Current dss precedence default, used when creating new arenas. NB: This is + * stored as unsigned rather than dss_prec_t because in principle there's no + * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use + * atomic operations to synchronize the setting. */ -static malloc_mutex_t dss_mtx; +static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT; /* Base address of the DSS. */ static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ +/* Atomic boolean indicating whether the DSS is exhausted. */ +static unsigned dss_exhausted; +/* Atomic current upper limit on DSS addresses. */ static void *dss_max; /******************************************************************************/ @@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment) } dss_prec_t -chunk_dss_prec_get(tsdn_t *tsdn) +chunk_dss_prec_get(void) { dss_prec_t ret; if (!have_dss) return (dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - ret = dss_prec_default; - malloc_mutex_unlock(tsdn, &dss_mtx); + ret = (dss_prec_t)atomic_read_u(&dss_prec_default); return (ret); } bool -chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec) +chunk_dss_prec_set(dss_prec_t dss_prec) { if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - dss_prec_default = dss_prec; - malloc_mutex_unlock(tsdn, &dss_mtx); + atomic_write_u(&dss_prec_default, (unsigned)dss_prec); return (false); } +static void * +chunk_dss_max_update(void *new_addr) +{ + void *max_cur; + spin_t spinner; + + /* + * Get the current end of the DSS as max_cur and assure that dss_max is + * up to date. + */ + spin_init(&spinner); + while (true) { + void *max_prev = atomic_read_p(&dss_max); + + max_cur = chunk_dss_sbrk(0); + if ((uintptr_t)max_prev > (uintptr_t)max_cur) { + /* + * Another thread optimistically updated dss_max. Wait + * for it to finish. + */ + spin_adaptive(&spinner); + continue; + } + if (!atomic_cas_p(&dss_max, max_prev, max_cur)) + break; + } + /* Fixed new_addr can only be supported if it is at the edge of DSS. */ + if (new_addr != NULL && max_cur != new_addr) + return (NULL); + + return (max_cur); +} + void * chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) @@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, if ((intptr_t)size < 0) return (NULL); - malloc_mutex_lock(tsdn, &dss_mtx); - if (dss_prev != (void *)-1) { - + if (!atomic_read_u(&dss_exhausted)) { /* * The loop is necessary to recover from races with other * threads that are using the DSS for something other than * malloc. */ - do { - void *ret, *cpad, *dss_next; + while (true) { + void *ret, *cpad, *max_cur, *dss_next, *dss_prev; size_t gap_size, cpad_size; intptr_t incr; - /* Avoid an unnecessary system call. */ - if (new_addr != NULL && dss_max != new_addr) - break; - /* Get the current end of the DSS. */ - dss_max = chunk_dss_sbrk(0); - - /* Make sure the earlier condition still holds. */ - if (new_addr != NULL && dss_max != new_addr) - break; + max_cur = chunk_dss_max_update(new_addr); + if (max_cur == NULL) + goto label_oom; /* * Calculate how much padding is necessary to @@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) { - /* Wrap-around. */ - malloc_mutex_unlock(tsdn, &dss_mtx); - return (NULL); - } + (uintptr_t)dss_next < (uintptr_t)dss_max) + goto label_oom; /* Wrap-around. */ incr = gap_size + cpad_size + size; + + /* + * Optimistically update dss_max, and roll back below if + * sbrk() fails. No other thread will try to extend the + * DSS while dss_max is greater than the current DSS + * max reported by sbrk(0). + */ + if (atomic_cas_p(&dss_max, max_cur, dss_next)) + continue; + + /* Try to allocate. */ dss_prev = chunk_dss_sbrk(incr); - if (dss_prev == dss_max) { + if (dss_prev == max_cur) { /* Success. */ - dss_max = dss_next; - malloc_mutex_unlock(tsdn, &dss_mtx); if (cpad_size != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; @@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, *commit = pages_decommit(ret, size); return (ret); } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(tsdn, &dss_mtx); + /* + * Failure, whether due to OOM or a race with a raw + * sbrk() call from outside the allocator. Try to roll + * back optimistic dss_max update; if rollback fails, + * it's due to another caller of this function having + * succeeded since this invocation started, in which + * case rollback is not necessary. + */ + atomic_cas_p(&dss_max, dss_next, max_cur); + if (dss_prev == (void *)-1) { + /* OOM. */ + atomic_write_u(&dss_exhausted, (unsigned)true); + goto label_oom; + } + } + } +label_oom: return (NULL); } -bool -chunk_in_dss(tsdn_t *tsdn, void *chunk) +static bool +chunk_in_dss_helper(void *chunk, void *max) { - bool ret; - cassert(have_dss); - - malloc_mutex_lock(tsdn, &dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(tsdn, &dss_mtx); - - return (ret); + return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < + (uintptr_t)max); } bool +chunk_in_dss(void *chunk) +{ + + cassert(have_dss); + + return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max))); +} + +bool +chunk_dss_mergeable(void *chunk_a, void *chunk_b) +{ + void *max; + + cassert(have_dss); + + max = atomic_read_p(&dss_max); + return (chunk_in_dss_helper(chunk_a, max) == + chunk_in_dss_helper(chunk_b, max)); +} + +void chunk_dss_boot(void) { cassert(have_dss); - if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS)) - return (true); dss_base = chunk_dss_sbrk(0); - dss_prev = dss_base; + dss_exhausted = (unsigned)(dss_base == (void *)-1); dss_max = dss_base; - - return (false); -} - -void -chunk_dss_prefork(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_prefork(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_parent(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_parent(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_child(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_child(tsdn, &dss_mtx); } /******************************************************************************/ diff --git a/contrib/jemalloc/src/ckh.c b/contrib/jemalloc/src/ckh.c index 747c1c865fa3..159bd8ae1618 100644 --- a/contrib/jemalloc/src/ckh.c +++ b/contrib/jemalloc/src/ckh.c @@ -40,8 +40,8 @@ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static bool ckh_grow(tsdn_t *tsdn, ckh_t *ckh); -static void ckh_shrink(tsdn_t *tsdn, ckh_t *ckh); +static bool ckh_grow(tsd_t *tsd, ckh_t *ckh); +static void ckh_shrink(tsd_t *tsd, ckh_t *ckh); /******************************************************************************/ @@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); + offset = (unsigned)prng_lg_range_u64(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - i = (unsigned)prng_lg_range(&ckh->prng_state, + i = (unsigned)prng_lg_range_u64(&ckh->prng_state, LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -244,7 +245,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) } static bool -ckh_grow(tsdn_t *tsdn, ckh_t *ckh) +ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; @@ -270,8 +271,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, - true, arena_ichoose(tsdn, NULL)); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, + true, NULL, true, arena_ichoose(tsd, NULL)); if (tab == NULL) { ret = true; goto label_return; @@ -283,12 +284,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsdn, tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -299,7 +300,7 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh) } static void -ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) +ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; size_t usize; @@ -314,8 +315,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; - tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true, - arena_ichoose(tsdn, NULL)); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL, + true, arena_ichoose(tsd, NULL)); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -330,7 +331,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsdn, tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +339,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -347,7 +348,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) } bool -ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) { bool ret; @@ -391,8 +392,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, - true, arena_ichoose(tsdn, NULL)); + ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, + NULL, true, arena_ichoose(tsd, NULL)); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -404,7 +405,7 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, } void -ckh_delete(tsdn_t *tsdn, ckh_t *ckh) +ckh_delete(tsd_t *tsd, ckh_t *ckh) { assert(ckh != NULL); @@ -421,7 +422,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); if (config_debug) memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t)); } @@ -456,7 +457,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) } bool -ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data) +ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) { bool ret; @@ -468,7 +469,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data) #endif while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(tsdn, ckh)) { + if (ckh_grow(tsd, ckh)) { ret = true; goto label_return; } @@ -480,7 +481,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data) } bool -ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, +ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data) { size_t cell; @@ -502,7 +503,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets > ckh->lg_minbuckets) { /* Ignore error due to OOM. */ - ckh_shrink(tsdn, ckh); + ckh_shrink(tsd, ckh); } return (false); diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c index dad800861679..bc78b20558a5 100644 --- a/contrib/jemalloc/src/ctl.c +++ b/contrib/jemalloc/src/ctl.c @@ -1478,7 +1478,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); - if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) { + if (tcaches_create(tsd, &tcache_ind)) { ret = EFAULT; goto label_return; } @@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena); } else { if (dss_prec != dss_prec_limit && - chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) { + chunk_dss_prec_set(dss_prec)) { ret = EFAULT; goto label_return; } - dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd)); + dss_prec_old = chunk_dss_prec_get(); } dss = dss_prec_names[dss_prec_old]; @@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; - prof_reset(tsd_tsdn(tsd), lg_sample); + prof_reset(tsd, lg_sample); ret = 0; label_return: diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c index 3a2877ca8d6e..62e6932b7f00 100644 --- a/contrib/jemalloc/src/huge.c +++ b/contrib/jemalloc/src/huge.c @@ -54,6 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, { void *ret; size_t ausize; + arena_t *iarena; extent_node_t *node; bool is_zeroed; @@ -67,8 +68,9 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, assert(ausize >= chunksize); /* Allocate an extent node with which to track the chunk. */ + iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get(); node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena)); + CACHELINE, false, NULL, true, iarena); if (node == NULL) return (NULL); @@ -114,7 +116,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) #endif static void -huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) +huge_dalloc_junk(void *ptr, size_t usize) { if (config_fill && have_dss && unlikely(opt_junk_free)) { @@ -122,7 +124,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr))) + if (!config_munmap || (have_dss && chunk_in_dss(ptr))) memset(ptr, JEMALLOC_FREE_JUNK, usize); } } @@ -221,7 +223,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { - huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize), + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), sdiff); post_zeroed = false; } else { @@ -402,7 +404,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr) ql_remove(&arena->huge, node, ql_link); malloc_mutex_unlock(tsdn, &arena->huge_mtx); - huge_dalloc_junk(tsdn, extent_node_addr_get(node), + huge_dalloc_junk(extent_node_addr_get(node), extent_node_size_get(node)); arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index 46dd1d12110e..f659b55c395a 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -9,7 +9,11 @@ const char *__malloc_options_1_0 = NULL; __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(weak); +const char *je_malloc_conf +#ifndef _WIN32 + JEMALLOC_ATTR(weak) +#endif + ; bool opt_abort = #ifdef JEMALLOC_DEBUG true @@ -89,14 +93,25 @@ enum { }; static uint8_t malloc_slow_flags; -/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES+1] = { -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ +const size_t pind2sz_tab[NPSIZES] = { +#define PSZ_yes(lg_grp, ndelta, lg_delta) \ + (((ZU(1)<= 2.6 has the CPU_COUNT macro. + * + * glibc's sysconf() uses isspace(). glibc allocates for the first time + * *before* setting up the isspace tables. Therefore we need a + * different method to get the number of CPUs. + */ + { + cpu_set_t set; + + pthread_getaffinity_np(pthread_self(), sizeof(set), &set); + result = CPU_COUNT(&set); + } #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif @@ -1107,8 +1144,7 @@ malloc_conf_init(void) for (i = 0; i < dss_prec_limit; i++) { if (strncmp(dss_prec_names[i], v, vlen) == 0) { - if (chunk_dss_prec_set(NULL, - i)) { + if (chunk_dss_prec_set(i)) { malloc_conf_error( "Error setting dss", k, klen, v, vlen); @@ -1153,9 +1189,20 @@ malloc_conf_init(void) if (config_fill) { if (CONF_MATCH("junk")) { if (CONF_MATCH_VALUE("true")) { - opt_junk = "true"; - opt_junk_alloc = opt_junk_free = - true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "true"; + opt_junk_alloc = true; + opt_junk_free = true; + } } else if (CONF_MATCH_VALUE("false")) { opt_junk = "false"; opt_junk_alloc = opt_junk_free = @@ -1165,9 +1212,20 @@ malloc_conf_init(void) opt_junk_alloc = true; opt_junk_free = false; } else if (CONF_MATCH_VALUE("free")) { - opt_junk = "free"; - opt_junk_alloc = false; - opt_junk_free = true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; + } } else { malloc_conf_error( "Invalid conf value", k, @@ -1253,11 +1311,14 @@ malloc_init_hard_needed(void) } #ifdef JEMALLOC_THREADED_INIT if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { + spin_t spinner; + /* Busy-wait until the initializing thread completes. */ + spin_init(&spinner); do { - malloc_mutex_unlock(NULL, &init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(NULL, &init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); + spin_adaptive(&spinner); + malloc_mutex_lock(TSDN_NULL, &init_lock); } while (!malloc_initialized()); return (false); } @@ -1291,8 +1352,7 @@ malloc_init_hard_a0_locked() return (true); if (config_prof) prof_boot1(); - if (arena_boot()) - return (true); + arena_boot(); if (config_tcache && tcache_boot(TSDN_NULL)) return (true); if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) @@ -1423,7 +1483,7 @@ malloc_init_hard(void) return (true); malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); - if (config_prof && prof_boot2(tsd_tsdn(tsd))) { + if (config_prof && prof_boot2(tsd)) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return (true); } @@ -1998,6 +2058,29 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; # endif + +#ifdef CPU_COUNT +/* + * To enable static linking with glibc, the libc specific malloc interface must + * be implemented also, so none of glibc's malloc.o functions are added to the + * link. + */ +#define ALIAS(je_fn) __attribute__((alias (#je_fn), used)) +/* To force macro expansion of je_ prefix before stringification. */ +#define PREALIAS(je_fn) ALIAS(je_fn) +void *__libc_malloc(size_t size) PREALIAS(je_malloc); +void __libc_free(void* ptr) PREALIAS(je_free); +void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc); +void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc); +void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign); +void *__libc_valloc(size_t size) PREALIAS(je_valloc); +int __posix_memalign(void** r, size_t a, size_t s) + PREALIAS(je_posix_memalign); +#undef PREALIAS +#undef ALIAS + +#endif + #endif /* @@ -2852,7 +2935,6 @@ _malloc_prefork(void) } } base_prefork(tsd_tsdn(tsd)); - chunk_prefork(tsd_tsdn(tsd)); for (i = 0; i < narenas; i++) { if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) arena_prefork3(tsd_tsdn(tsd), arena); @@ -2881,7 +2963,6 @@ _malloc_postfork(void) witness_postfork_parent(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_parent(tsd_tsdn(tsd)); base_postfork_parent(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; @@ -2906,7 +2987,6 @@ jemalloc_postfork_child(void) witness_postfork_child(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_child(tsd_tsdn(tsd)); base_postfork_child(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c index a24e42012d6c..13f8d799de1e 100644 --- a/contrib/jemalloc/src/mutex.c +++ b/contrib/jemalloc/src/mutex.c @@ -91,6 +91,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) _CRT_SPINCOUNT)) return (true); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + mutex->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) diff --git a/contrib/jemalloc/src/nstime.c b/contrib/jemalloc/src/nstime.c index aad2c260e66c..0948e29faffa 100644 --- a/contrib/jemalloc/src/nstime.c +++ b/contrib/jemalloc/src/nstime.c @@ -97,6 +97,76 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) return (time->ns / divisor->ns); } +#ifdef _WIN32 +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + FILETIME ft; + uint64_t ticks_100ns; + + GetSystemTimeAsFileTime(&ft); + ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime; + + nstime_init(time, ticks_100ns * 100); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + + nstime_init(time, mach_absolute_time()); +} +#else +# define NSTIME_MONOTONIC false +static void +nstime_get(nstime_t *time) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000); +} +#endif + +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic) +#endif +bool +nstime_monotonic(void) +{ + + return (NSTIME_MONOTONIC); +#undef NSTIME_MONOTONIC +} +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(nstime_monotonic) +nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic); +#endif + #ifdef JEMALLOC_JET #undef nstime_update #define nstime_update JEMALLOC_N(n_nstime_update) @@ -107,33 +177,7 @@ nstime_update(nstime_t *time) nstime_t old_time; nstime_copy(&old_time, time); - -#ifdef _WIN32 - { - FILETIME ft; - uint64_t ticks; - GetSystemTimeAsFileTime(&ft); - ticks = (((uint64_t)ft.dwHighDateTime) << 32) | - ft.dwLowDateTime; - time->ns = ticks * 100; - } -#elif JEMALLOC_CLOCK_GETTIME - { - struct timespec ts; - - if (sysconf(_SC_MONOTONIC_CLOCK) > 0) - clock_gettime(CLOCK_MONOTONIC, &ts); - else - clock_gettime(CLOCK_REALTIME, &ts); - time->ns = ts.tv_sec * BILLION + ts.tv_nsec; - } -#else - { - struct timeval tv; - gettimeofday(&tv, NULL); - time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000; - } -#endif + nstime_get(time); /* Handle non-monotonic clocks. */ if (unlikely(nstime_compare(&old_time, time) > 0)) { diff --git a/contrib/jemalloc/src/pages.c b/contrib/jemalloc/src/pages.c index 2a9b7e374cd0..647952ac3ef6 100644 --- a/contrib/jemalloc/src/pages.c +++ b/contrib/jemalloc/src/pages.c @@ -207,6 +207,11 @@ os_overcommits_sysctl(void) #endif #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY +/* + * Use syscall(2) rather than {open,read,close}(2) when possible to avoid + * reentry during bootstrapping if another library has interposed system call + * wrappers. + */ static bool os_overcommits_proc(void) { @@ -214,11 +219,26 @@ os_overcommits_proc(void) char buf[1]; ssize_t nread; +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); +#else fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); +#endif if (fd == -1) return (false); /* Error. */ +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read) + nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + if (nread < 1) return (false); /* Error. */ /* diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c index c1f58d462bec..c89dade1f192 100644 --- a/contrib/jemalloc/src/prof.c +++ b/contrib/jemalloc/src/prof.c @@ -125,7 +125,7 @@ static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached); -static void prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, +static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached); static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); @@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ - if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL)) + if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); prof_leave(tsd, tdata_self); /* Destroy gctx. */ @@ -651,7 +651,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.accumobjs == 0); assert(tctx->cnts.accumbytes == 0); - ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL); + ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); @@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tsd_tsdn(tsd), tdata, false); + prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true); @@ -733,7 +733,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, return (true); } btkey.p = &gctx.p->bt; - if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) { + if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true); @@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) /* Link a prof_tctx_t into gctx for this thread. */ ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), size2index(sizeof(prof_tctx_t)), false, NULL, true, - arena_ichoose(tsd_tsdn(tsd), NULL), true); + arena_ichoose(tsd, NULL), true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -810,8 +810,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->prepared = true; ret.p->state = prof_tctx_state_initializing; malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); - error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey, - ret.v); + error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (error) { if (new_gctx) @@ -875,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - r = prng_lg_range(&tdata->prng_state, 53); + r = prng_lg_range_u64(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) @@ -1791,7 +1790,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) } static prof_tdata_t * -prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; @@ -1799,7 +1798,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, cassert(config_prof); /* Initialize an empty cache for this thread. */ - tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t), + tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(TSDN_NULL, 0, true), true); if (tdata == NULL) @@ -1813,9 +1812,9 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, tdata->expired = false; tdata->tctx_uid_next = 0; - if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS, - prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsdn, tdata, NULL, true, true); + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) { + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); return (NULL); } @@ -1829,19 +1828,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, tdata->dumping = false; tdata->active = active; - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_insert(&tdatas, tdata); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); return (tdata); } prof_tdata_t * -prof_tdata_init(tsdn_t *tsdn) +prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL, - prof_thread_active_init_get(tsdn))); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, + NULL, prof_thread_active_init_get(tsd_tsdn(tsd)))); } static bool @@ -1866,31 +1865,29 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, } static void -prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata, +prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_assert_owner(tsdn, &tdatas_mtx); - - assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_remove(&tdatas, tdata); assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); if (tdata->thread_name != NULL) - idalloctm(tsdn, tdata->thread_name, NULL, true, true); - ckh_delete(tsdn, &tdata->bt2tctx); - idalloctm(tsdn, tdata, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); + ckh_delete(tsd, &tdata->bt2tctx); + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); } static void -prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached) +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_lock(tsdn, &tdatas_mtx); - prof_tdata_destroy_locked(tsdn, tdata, even_if_attached); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + prof_tdata_destroy_locked(tsd, tdata, even_if_attached); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); } static void @@ -1913,7 +1910,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) destroy_tdata = false; malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tsd_tsdn(tsd), tdata, true); + prof_tdata_destroy(tsd, tdata, true); } prof_tdata_t * @@ -1926,8 +1923,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) bool active = tdata->active; prof_tdata_detach(tsd, tdata); - return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim, - thread_name, active)); + return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, + active)); } static bool @@ -1956,30 +1953,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) } void -prof_reset(tsdn_t *tsdn, size_t lg_sample) +prof_reset(tsd_t *tsd, size_t lg_sample) { prof_tdata_t *next; assert(lg_sample < (sizeof(uint64_t) << 3)); - malloc_mutex_lock(tsdn, &prof_dump_mtx); - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); lg_prof_sample = lg_sample; next = NULL; do { prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, - prof_tdata_reset_iter, (void *)tsdn); + prof_tdata_reset_iter, (void *)tsd); if (to_destroy != NULL) { next = tdata_tree_next(&tdatas, to_destroy); - prof_tdata_destroy_locked(tsdn, to_destroy, false); + prof_tdata_destroy_locked(tsd, to_destroy, false); } else next = NULL; } while (next != NULL); - malloc_mutex_unlock(tsdn, &tdatas_mtx); - malloc_mutex_unlock(tsdn, &prof_dump_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); } void @@ -2189,7 +2186,7 @@ prof_boot1(void) } bool -prof_boot2(tsdn_t *tsdn) +prof_boot2(tsd_t *tsd) { cassert(config_prof); @@ -2215,7 +2212,7 @@ prof_boot2(tsdn_t *tsdn) WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) return (true); - if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, + if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", @@ -2246,8 +2243,8 @@ prof_boot2(tsdn_t *tsdn) abort(); } - gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS - * sizeof(malloc_mutex_t)); + gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), + PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { @@ -2256,7 +2253,7 @@ prof_boot2(tsdn_t *tsdn) return (true); } - tdata_locks = (malloc_mutex_t *)base_alloc(tsdn, + tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t)); if (tdata_locks == NULL) return (true); diff --git a/contrib/jemalloc/src/rtree.c b/contrib/jemalloc/src/rtree.c index 3166b45faeed..f2e2997d5535 100644 --- a/contrib/jemalloc/src/rtree.c +++ b/contrib/jemalloc/src/rtree.c @@ -96,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) rtree_node_elm_t *node; if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + spin_t spinner; + /* * Another thread is already in the process of initializing. * Spin-wait until initialization is complete. */ + spin_init(&spinner); do { - CPU_SPINWAIT; + spin_adaptive(&spinner); node = atomic_read_p((void **)elmp); } while (node == RTREE_NODE_INITIALIZING); } else { @@ -125,5 +128,5 @@ rtree_node_elm_t * rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - return (rtree_node_init(rtree, level, &elm->child)); + return (rtree_node_init(rtree, level+1, &elm->child)); } diff --git a/contrib/jemalloc/src/spin.c b/contrib/jemalloc/src/spin.c new file mode 100644 index 000000000000..5242d95aa10f --- /dev/null +++ b/contrib/jemalloc/src/spin.c @@ -0,0 +1,2 @@ +#define JEMALLOC_SPIN_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c index 073be4fe1539..bd8af3999ba9 100644 --- a/contrib/jemalloc/src/stats.c +++ b/contrib/jemalloc/src/stats.c @@ -32,86 +32,107 @@ bool opt_stats_print = false; size_t stats_cactive = 0; -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void stats_arena_bins_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_hchunks_print( - void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); -static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large, bool huge); - /******************************************************************************/ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool large, bool huge, unsigned i) { size_t page; - bool config_tcache, in_gap; + bool config_tcache, in_gap, in_gap_prev; unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util nfills nflushes newruns" - " reruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util newruns reruns\n"); - } CTL_GET("arenas.nbins", &nbins, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"bins\": [\n"); + } else { + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util nfills" + " nflushes newruns reruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util newruns" + " reruns\n"); + } + } for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; + size_t reg_size, run_size, curregs; + size_t curruns; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t nreruns; CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, uint64_t); - if (nruns == 0) - in_gap = true; - else { - size_t reg_size, run_size, curregs, availregs, milli; - size_t curruns; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t reruns; - char util[6]; /* "x.yyy". */ + in_gap_prev = in_gap; + in_gap = (nruns == 0); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } - CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); - CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); - CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, - &nmalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, - &ndalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, - &curregs, size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, - &nrequests, uint64_t); + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t); + + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, + &nrequests, uint64_t); + if (config_tcache) { + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, + &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, + &nflushes, uint64_t); + } + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns, + size_t); + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curregs\": %zu,\n" + "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n", + nmalloc, + ndalloc, + curregs, + nrequests); if (config_tcache) { - CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, - j, &nfills, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", - i, j, &nflushes, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" + "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n", + nfills, + nflushes); } - CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, - &reruns, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, - &curruns, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + nreruns, + curruns, + (j + 1 < nbins) ? "," : ""); + } else if (!in_gap) { + size_t availregs, milli; + char util[6]; /* "x.yyy". */ availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs @@ -138,7 +159,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, run_size / page, util, nfills, nflushes, - nruns, reruns); + nruns, nreruns); } else { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64 @@ -147,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"FMTu64"\n", reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, - run_size / page, util, nruns, reruns); + run_size / page, util, nruns, nreruns); } } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", (large || huge) ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool huge, unsigned i) { unsigned nbins, nlruns, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc ndalloc" - " nrequests curruns\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lruns\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: size ind allocated nmalloc" + " ndalloc nrequests curruns\n"); + } for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; @@ -179,17 +210,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); - CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, - &curruns, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns, + size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + curruns, + (j + 1 < nlruns) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -197,25 +236,35 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, ndalloc, nrequests, curruns); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", huge ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_hchunks_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i) + void *cbopaque, bool json, unsigned i) { unsigned nbins, nlruns, nhchunks, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "huge: size ind allocated nmalloc ndalloc" - " nrequests curhchunks\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"hchunks\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc" + " ndalloc nrequests curhchunks\n"); + } for (j = 0, in_gap = false; j < nhchunks; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t hchunk_size, curhchunks; @@ -226,18 +275,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), &ndalloc, uint64_t); CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, - j, &curhchunks, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j, + &curhchunks, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curhchunks\": %zu\n" + "\t\t\t\t\t}%s\n", + curhchunks, + (j + 1 < nhchunks) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -246,15 +302,20 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), nrequests, curhchunks); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]\n"); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large, bool huge) + bool json, unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; @@ -272,100 +333,731 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"nthreads\": %u,\n", nthreads); + } else { + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); + } + CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", - dss); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"dss\": \"%s\",\n", dss); + } else { + malloc_cprintf(write_cb, cbopaque, + "dss allocation precedence: %s\n", dss); + } + CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (opt_purge == purge_mode_ratio) { - if (lg_dirty_mult >= 0) { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); - } else { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult); + } else { + if (opt_purge == purge_mode_ratio) { + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } } } + CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); - if (opt_purge == purge_mode_decay) { - if (decay_time >= 0) { - malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n", - decay_time); - } else - malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n"); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"decay_time\": %zd,\n", decay_time); + } else { + if (opt_purge == purge_mode_decay) { + if (decay_time >= 0) { + malloc_cprintf(write_cb, cbopaque, + "decay time: %zd\n", decay_time); + } else { + malloc_cprintf(write_cb, cbopaque, + "decay time: N/A\n"); + } + } } + CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", " - "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pactive\": %zu,\n", pactive); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pdirty\": %zu,\n", pdirty); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"purged\": %"FMTu64",\n", purged); + } else { + malloc_cprintf(write_cb, cbopaque, + "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64 + ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); + } - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc" - " nrequests\n"); CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, size_t); CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"small\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc" + " ndalloc nrequests\n"); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated, small_nmalloc, small_ndalloc, + small_nrequests); + } + CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, size_t); CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"large\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + large_allocated, large_nmalloc, large_ndalloc, + large_nrequests); + } + CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "huge: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated + huge_allocated, - small_nmalloc + large_nmalloc + huge_nmalloc, - small_ndalloc + large_ndalloc + huge_ndalloc, - small_nrequests + large_nrequests + huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"huge\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); + } + if (!json) { + malloc_cprintf(write_cb, cbopaque, + "active: %12zu\n", pactive * page); + } + CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"mapped\": %zu,\n", mapped); + } else { + malloc_cprintf(write_cb, cbopaque, + "mapped: %12zu\n", mapped); + } + CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"retained\": %zu,\n", retained); + } else { + malloc_cprintf(write_cb, cbopaque, + "retained: %12zu\n", retained); + } + CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, size_t); CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "metadata: mapped: %zu, allocated: %zu\n", - metadata_mapped, metadata_allocated); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"metadata\": {\n"); - if (bins) - stats_arena_bins_print(write_cb, cbopaque, i); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", + metadata_mapped, metadata_allocated); + } + + if (bins) { + stats_arena_bins_print(write_cb, cbopaque, json, large, huge, + i); + } if (large) - stats_arena_lruns_print(write_cb, cbopaque, i); + stats_arena_lruns_print(write_cb, cbopaque, json, huge, i); if (huge) - stats_arena_hchunks_print(write_cb, cbopaque, i); + stats_arena_hchunks_print(write_cb, cbopaque, json, i); +} + +static void +stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged) +{ + const char *cpv; + bool bv; + unsigned uv; + uint32_t u32v; + uint64_t u64v; + ssize_t ssv; + size_t sv, bsz, usz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + usz = sizeof(unsigned); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"version\": \"%s\",\n", cpv); + } else + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + + /* config. */ +#define CONFIG_WRITE_BOOL_JSON(n, c) \ + if (json) { \ + CTL_GET("config."#n, &bv, bool); \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ + (c)); \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"config\": {\n"); + } + + CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") + + CTL_GET("config.debug", &bv, bool); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); + } else { + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + } + + CONFIG_WRITE_BOOL_JSON(fill, ",") + CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"malloc_conf\": \"%s\",\n", + config_malloc_conf); + } else { + malloc_cprintf(write_cb, cbopaque, + "config.malloc_conf: \"%s\"\n", config_malloc_conf); + } + + CONFIG_WRITE_BOOL_JSON(munmap, ",") + CONFIG_WRITE_BOOL_JSON(prof, ",") + CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") + CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") + CONFIG_WRITE_BOOL_JSON(stats, ",") + CONFIG_WRITE_BOOL_JSON(tcache, ",") + CONFIG_WRITE_BOOL_JSON(tls, ",") + CONFIG_WRITE_BOOL_JSON(utrace, ",") + CONFIG_WRITE_BOOL_JSON(valgrind, ",") + CONFIG_WRITE_BOOL_JSON(xmalloc, "") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } +#undef CONFIG_WRITE_BOOL_JSON + + /* opt. */ +#define OPT_WRITE_BOOL(n, c) \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } \ + } +#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ + bool bv2; \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ + } \ +} +#define OPT_WRITE_UNSIGNED(n, c) \ + if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %u\n", uv); \ + } \ + } +#define OPT_WRITE_SIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zu%s\n", sv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } \ + } +#define OPT_WRITE_SSIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } \ + } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", \ + ssv, ssv2); \ + } \ + } \ +} +#define OPT_WRITE_CHAR_P(n, c) \ + if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"opt\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); + } + OPT_WRITE_BOOL(abort, ",") + OPT_WRITE_SIZE_T(lg_chunk, ",") + OPT_WRITE_CHAR_P(dss, ",") + OPT_WRITE_UNSIGNED(narenas, ",") + OPT_WRITE_CHAR_P(purge, ",") + if (json || opt_purge == purge_mode_ratio) { + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, + arenas.lg_dirty_mult, ",") + } + if (json || opt_purge == purge_mode_decay) { + OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",") + } + OPT_WRITE_CHAR_P(junk, ",") + OPT_WRITE_SIZE_T(quarantine, ",") + OPT_WRITE_BOOL(redzone, ",") + OPT_WRITE_BOOL(zero, ",") + OPT_WRITE_BOOL(utrace, ",") + OPT_WRITE_BOOL(xmalloc, ",") + OPT_WRITE_BOOL(tcache, ",") + OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_BOOL(prof, ",") + OPT_WRITE_CHAR_P(prof_prefix, ",") + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, + ",") + OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") + OPT_WRITE_BOOL(prof_accum, ",") + OPT_WRITE_SSIZE_T(lg_prof_interval, ",") + OPT_WRITE_BOOL(prof_gdump, ",") + OPT_WRITE_BOOL(prof_final, ",") + OPT_WRITE_BOOL(prof_leak, ",") + /* + * stats_print is always emitted, so as long as stats_print comes last + * it's safe to unconditionally omit the comma here (rather than having + * to conditionally omit it elsewhere depending on configuration). + */ + OPT_WRITE_BOOL(stats_print, "") + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + /* arenas. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &uv, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"narenas\": %u,\n", uv); + } else + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); + + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_ratio) { + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "%u:1\n", (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "N/A\n"); + } + } + CTL_GET("arenas.decay_time", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"decay_time\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_decay) { + malloc_cprintf(write_cb, cbopaque, + "Unused dirty page decay time: %zd%s\n", + ssv, (ssv < 0) ? " (no decay)" : ""); + } + + CTL_GET("arenas.quantum", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"quantum\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + + CTL_GET("arenas.page", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"page\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"tcache_max\": %zu,\n", sv); + } else { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + } + + if (json) { + unsigned nbins, nlruns, nhchunks, i; + + CTL_GET("arenas.nbins", &nbins, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nbins\": %u,\n", nbins); + + CTL_GET("arenas.nhbins", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhbins\": %u,\n", uv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"bin\": [\n"); + for (i = 0; i < nbins; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu,\n", sv); + + CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + + CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"run_size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nlruns", &nlruns, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nlruns\": %u,\n", nlruns); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lrun\": [\n"); + for (i = 0; i < nlruns; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhchunks\": %u,\n", nhchunks); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"hchunk\": [\n"); + for (i = 0; i < nhchunks; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t]\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + + /* prof. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"prof\": {\n"); + + CTL_GET("prof.thread_active_init", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : + "false"); + + CTL_GET("prof.active", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.gdump", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.interval", &u64v, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"interval\": %"FMTu64",\n", u64v); + + CTL_GET("prof.lg_sample", &ssv, ssize_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_sample\": %zd\n", ssv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (config_stats || merged || unmerged) ? "," : + ""); + } +} + +static void +stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged, bool bins, bool large, bool huge) +{ + size_t *cactive; + size_t allocated, active, metadata, resident, mapped, retained; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + CTL_GET("stats.retained", &retained, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive)); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"allocated\": %zu,\n", allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %zu,\n", active); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"metadata\": %zu,\n", metadata); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"resident\": %zu,\n", resident); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"mapped\": %zu,\n", mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"retained\": %zu\n", retained); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (merged || unmerged) ? "," : ""); + } else { + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, metadata: %zu," + " resident: %zu, mapped: %zu, retained: %zu\n", + allocated, active, metadata, resident, mapped, retained); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", + atomic_read_z(cactive)); + } + + if (merged || unmerged) { + unsigned narenas; + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats.arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i, j, ninitialized; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", (void *)initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"merged\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + } + stats_arena_print(write_cb, cbopaque, json, + narenas, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t}%s\n", (ninitialized > 1) ? + "," : ""); + } + } + + /* Unmerged stats. */ + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + if (json) { + j++; + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t\"%u\": {\n", i); + } else { + malloc_cprintf(write_cb, + cbopaque, "\narenas[%u]:\n", + i); + } + stats_arena_print(write_cb, cbopaque, + json, i, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t}%s\n", (j < + ninitialized) ? "," : ""); + } + } + } + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t}\n"); + } + } } void @@ -375,6 +1067,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; + bool json = false; bool general = true; bool merged = true; bool unmerged = true; @@ -408,6 +1101,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { + case 'J': + json = true; + break; case 'g': general = false; break; @@ -431,246 +1127,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - if (general) { - const char *cpv; - bool bv; - unsigned uv; - ssize_t ssv; - size_t sv, bsz, usz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - usz = sizeof(unsigned); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - CTL_GET("config.debug", &bv, bool); - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); + if (json) { malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - -#define OPT_WRITE_BOOL(n) \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ - bool bv2; \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ -} -#define OPT_WRITE_UNSIGNED(n) \ - if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } -#define OPT_WRITE_SIZE_T(n) \ - if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zu\n", sv); \ - } -#define OPT_WRITE_SSIZE_T(n) \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ -} -#define OPT_WRITE_CHAR_P(n) \ - if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } - + "{\n" + "\t\"jemalloc\": {\n"); + } else { malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_chunk) - OPT_WRITE_CHAR_P(dss) - OPT_WRITE_UNSIGNED(narenas) - OPT_WRITE_CHAR_P(purge) - if (opt_purge == purge_mode_ratio) { - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, - arenas.lg_dirty_mult) - } - if (opt_purge == purge_mode_decay) - OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time) - OPT_WRITE_BOOL(stats_print) - OPT_WRITE_CHAR_P(junk) - OPT_WRITE_SIZE_T(quarantine) - OPT_WRITE_BOOL(redzone) - OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(utrace) - OPT_WRITE_BOOL(valgrind) - OPT_WRITE_BOOL(xmalloc) - OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_max) - OPT_WRITE_BOOL(prof) - OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, - prof.thread_active_init) - OPT_WRITE_SSIZE_T(lg_prof_sample) - OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_interval) - OPT_WRITE_BOOL(prof_gdump) - OPT_WRITE_BOOL(prof_final) - OPT_WRITE_BOOL(prof_leak) - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); - - CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - - malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", - sizeof(void *)); - - CTL_GET("arenas.quantum", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", - sv); - - CTL_GET("arenas.page", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - - CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (opt_purge == purge_mode_ratio) { - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "%u:1\n", (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "N/A\n"); - } - } - CTL_GET("arenas.decay_time", &ssv, ssize_t); - if (opt_purge == purge_mode_decay) { - malloc_cprintf(write_cb, cbopaque, - "Unused dirty page decay time: %zd%s\n", - ssv, (ssv < 0) ? " (no decay)" : ""); - } - if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } - if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { - CTL_GET("prof.lg_sample", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Average profile sample interval: %"FMTu64 - " (2^%zu)\n", (((uint64_t)1U) << sv), sv); - - CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: %"FMTu64 - " (2^%zd)\n", - (((uint64_t)1U) << ssv), ssv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: N/A\n"); - } - } - CTL_GET("opt.lg_chunk", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv); + "___ Begin jemalloc statistics ___\n"); } + if (general) + stats_general_print(write_cb, cbopaque, json, merged, unmerged); if (config_stats) { - size_t *cactive; - size_t allocated, active, metadata, resident, mapped, retained; - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.metadata", &metadata, size_t); - CTL_GET("stats.resident", &resident, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - CTL_GET("stats.retained", &retained, size_t); - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", - atomic_read_z(cactive)); - - if (merged) { - unsigned narenas; - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i, ninitialized; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - if (ninitialized > 1 || !unmerged) { - /* Print merged arena stats. */ - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - stats_arena_print(write_cb, cbopaque, - narenas, bins, large, huge); - } - } - } - - if (unmerged) { - unsigned narenas; - - /* Print stats for each arena. */ - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - - for (i = 0; i < narenas; i++) { - if (initialized[i]) { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", i); - stats_arena_print(write_cb, - cbopaque, i, bins, large, - huge); - } - } - } - } + stats_print_helper(write_cb, cbopaque, json, merged, unmerged, + bins, large, huge); + } + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t}\n" + "}\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "--- End jemalloc statistics ---\n"); } - malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 175759c7c51d..f97aa420ca49 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -445,14 +445,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) } bool -tcaches_create(tsdn_t *tsdn, unsigned *r_ind) +tcaches_create(tsd_t *tsd, unsigned *r_ind) { arena_t *arena; tcache_t *tcache; tcaches_t *elm; if (tcaches == NULL) { - tcaches = base_alloc(tsdn, sizeof(tcache_t *) * + tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1)); if (tcaches == NULL) return (true); @@ -460,10 +460,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - arena = arena_ichoose(tsdn, NULL); + arena = arena_ichoose(tsd, NULL); if (unlikely(arena == NULL)) return (true); - tcache = tcache_create(tsdn, arena); + tcache = tcache_create(tsd_tsdn(tsd), arena); if (tcache == NULL) return (true); diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c index aeaa5e18ffe7..ec69a51c3d36 100644 --- a/contrib/jemalloc/src/tsd.c +++ b/contrib/jemalloc/src/tsd.c @@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) tsd_init_block_t *iter; /* Check whether this thread has already inserted into the list. */ - malloc_mutex_lock(NULL, &head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_foreach(iter, &head->blocks, link) { if (iter->thread == self) { - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (iter->data); } } @@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) ql_elm_new(block, link); block->thread = self; ql_tail_insert(&head->blocks, block, link); - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (NULL); } @@ -190,8 +190,8 @@ void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { - malloc_mutex_lock(NULL, &head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_remove(&head->blocks, block, link); - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif diff --git a/contrib/jemalloc/src/util.c b/contrib/jemalloc/src/util.c index 04f9153dc5c4..bee1c77e7ece 100644 --- a/contrib/jemalloc/src/util.c +++ b/contrib/jemalloc/src/util.c @@ -49,7 +49,7 @@ static void wrtmessage(void *cbopaque, const char *s) { -#ifdef SYS_write +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write) /* * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary diff --git a/lib/libc/stdlib/jemalloc/Makefile.inc b/lib/libc/stdlib/jemalloc/Makefile.inc index 8b22fdac2815..f507e00d2ae0 100644 --- a/lib/libc/stdlib/jemalloc/Makefile.inc +++ b/lib/libc/stdlib/jemalloc/Makefile.inc @@ -4,8 +4,8 @@ JEMALLOCSRCS:= jemalloc.c arena.c atomic.c base.c bitmap.c chunk.c \ chunk_dss.c chunk_mmap.c ckh.c ctl.c extent.c hash.c huge.c mb.c \ - mutex.c nstime.c pages.c prng.c prof.c quarantine.c rtree.c stats.c \ - tcache.c ticker.c tsd.c util.c witness.c + mutex.c nstime.c pages.c prng.c prof.c quarantine.c rtree.c spin.c \ + stats.c tcache.c ticker.c tsd.c util.c witness.c SYM_MAPS+=${LIBC_SRCTOP}/stdlib/jemalloc/Symbol.map