diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index 0cf887c2344a..58e4462be07a 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -4,6 +4,76 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to disovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena..chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + * 4.0.0 (August 17, 2015) This version contains many speed and space optimizations, both minor and diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index 7d6955d773b2..25e823c60dfe 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -47,7 +47,7 @@ index 8fc774b..fdbef95 100644 + diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in -index 7a137b6..b0001e9 100644 +index 8536a3e..0c2a81f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -8,6 +8,9 @@ @@ -111,10 +111,10 @@ index f051f29..561378f 100644 #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt -index dbf6aa7..f87dba8 100644 +index a90021a..34904bf 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt -@@ -277,7 +277,6 @@ iralloct_realign +@@ -280,7 +280,6 @@ iralloct_realign isalloc isdalloct isqalloc @@ -282,7 +282,7 @@ index f943891..47d032c 100755 +#include "jemalloc_FreeBSD.h" EOF diff --git a/src/jemalloc.c b/src/jemalloc.c -index ed7863b..d078a1f 100644 +index 5a2d324..b6cbb79 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,6 +4,10 @@ @@ -296,7 +296,7 @@ index ed7863b..d078a1f 100644 /* Runtime configuration options. */ const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = -@@ -2475,6 +2479,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +@@ -2490,6 +2494,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) */ /******************************************************************************/ /* @@ -404,7 +404,7 @@ index ed7863b..d078a1f 100644 * The following functions are used by threading libraries for protection of * malloc during fork(). */ -@@ -2575,4 +2680,11 @@ jemalloc_postfork_child(void) +@@ -2590,4 +2695,11 @@ jemalloc_postfork_child(void) ctl_postfork_child(); } diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION index 6802c56546d6..210a4fef248a 100644 --- a/contrib/jemalloc/VERSION +++ b/contrib/jemalloc/VERSION @@ -1 +1 @@ -4.0.0-0-g6e98caf8f064482b9ab292ef3638dea67420bbc2 +4.0.2-0-g486d249fb4715fd3de679b6c2a04f7e657883111 diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 index 2afb8628c65a..258206212381 100644 --- a/contrib/jemalloc/doc/jemalloc.3 +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -2,12 +2,12 @@ .\" Title: JEMALLOC .\" Author: Jason Evans .\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 08/18/2015 +.\" Date: 09/21/2015 .\" Manual: User Manual -.\" Source: jemalloc 4.0.0-0-g6e98caf8f064482b9ab292ef3638dea67420bbc2 +.\" Source: jemalloc 4.0.2-0-g486d249fb4715fd3de679b6c2a04f7e657883111 .\" Language: English .\" -.TH "JEMALLOC" "3" "08/18/2015" "jemalloc 4.0.0-0-g6e98caf8f064" "User Manual" +.TH "JEMALLOC" "3" "09/21/2015" "jemalloc 4.0.2-0-g486d249fb471" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 4\&.0\&.0\-0\-g6e98caf8f064482b9ab292ef3638dea67420bbc2\&. More information can be found at the +This manual describes jemalloc 4\&.0\&.2\-0\-g486d249fb4715fd3de679b6c2a04f7e657883111\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h index cb015eedc601..12c617979f9a 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -39,7 +39,7 @@ typedef struct arena_s arena_t; #ifdef JEMALLOC_ARENA_STRUCTS_A struct arena_run_s { /* Index of bin this run is associated with. */ - index_t binind; + szind_t binind; /* Number of free regions in run. */ unsigned nfree; @@ -424,7 +424,7 @@ extern arena_bin_info_t arena_bin_info[NBINS]; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ -extern size_t arena_maxclass; /* Max size class for arenas. */ +extern size_t large_maxclass; /* Max large size class. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ @@ -448,7 +448,7 @@ bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); void arena_maybe_purge(arena_t *arena); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - index_t binind, uint64_t prof_accumbytes); + szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -488,7 +488,7 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache); + size_t size, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); @@ -519,17 +519,19 @@ arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_size_decode(size_t mapbits); size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); +size_t arena_mapbits_size_encode(size_t size); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, @@ -539,21 +541,23 @@ void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - index_t binind); + szind_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, index_t binind, size_t flags); + size_t runind, szind_t binind, size_t flags); void arena_metadata_allocated_add(arena_t *arena, size_t size); void arena_metadata_allocated_sub(arena_t *arena, size_t size); size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -index_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); -void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void arena_prof_tctx_reset(const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx); void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_t *tcache); arena_t *arena_aalloc(const void *ptr); @@ -652,6 +656,22 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); } +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_decode(size_t mapbits) +{ + size_t size; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + size = mapbits & CHUNK_MAP_SIZE_MASK; +#else + size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; +#endif + + return (size); +} + JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { @@ -659,7 +679,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -670,7 +690,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -684,11 +704,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); } -JEMALLOC_ALWAYS_INLINE index_t +JEMALLOC_ALWAYS_INLINE szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - index_t binind; + szind_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -754,6 +774,23 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) *mapbitsp = mapbits; } +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_encode(size_t size) +{ + size_t mapbits; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + mapbits = size << CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + mapbits = size; +#else + mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; +#endif + + assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); + return (mapbits); +} + JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) @@ -761,11 +798,10 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) | + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | CHUNK_MAP_BININD_INVALID | flags); } @@ -777,10 +813,9 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t mapbits = arena_mapbitsp_read(mapbitsp); assert((size & PAGE_MASK) == 0); - assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) | (mapbits - & ~CHUNK_MAP_SIZE_MASK)); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + (mapbits & ~CHUNK_MAP_SIZE_MASK)); } JEMALLOC_ALWAYS_INLINE void @@ -799,18 +834,17 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0); assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) | + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - index_t binind) + szind_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); @@ -824,7 +858,7 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - index_t binind, size_t flags) + szind_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); @@ -901,10 +935,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE index_t +JEMALLOC_ALWAYS_INLINE szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - index_t binind; + szind_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -916,7 +950,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - index_t run_binind, actual_binind; + szind_t run_binind, actual_binind; arena_bin_info_t *bin_info; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -950,10 +984,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) # endif /* JEMALLOC_ARENA_INLINE_A */ # ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE index_t +JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - index_t binind = bin - arena->bins; + szind_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -1060,7 +1094,7 @@ arena_prof_tctx_get(const void *ptr) } JEMALLOC_INLINE void -arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) +arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { arena_chunk_t *chunk; @@ -1070,17 +1104,59 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) { - arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, - pageind); + if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > + (uintptr_t)1U)) { + arena_chunk_map_misc_t *elm; + + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); atomic_write_p(&elm->prof_tctx_pun, tctx); + } else { + /* + * tctx must always be initialized for large runs. + * Assert that the surrounding conditional logic is + * equivalent to checking whether ptr refers to a large + * run. + */ + assert(arena_mapbits_large_get(chunk, pageind) == 0); } } else huge_prof_tctx_set(ptr, tctx); } +JEMALLOC_INLINE void +arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && + (uintptr_t)old_tctx > (uintptr_t)1U))) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind; + arena_chunk_map_misc_t *elm; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, + (prof_tctx_t *)(uintptr_t)1U); + } else + huge_prof_tctx_reset(ptr); + } +} + JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_t *tcache) @@ -1098,7 +1174,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, zero)); } else return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= arena_maxclass)) { + } else if (likely(size <= large_maxclass)) { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. @@ -1131,7 +1207,7 @@ arena_salloc(const void *ptr, bool demote) size_t ret; arena_chunk_t *chunk; size_t pageind; - index_t binind; + szind_t binind; assert(ptr != NULL); @@ -1190,7 +1266,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* Small allocation. */ if (likely(tcache != NULL)) { - index_t binind = arena_ptr_small_binind_get(ptr, + szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tsd, tcache, ptr, binind); } else { @@ -1242,7 +1318,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ if (likely(tcache != NULL)) { - index_t binind = size2index(size); + szind_t binind = size2index(size); tcache_dalloc_small(tsd, tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - diff --git a/contrib/jemalloc/include/jemalloc/internal/huge.h b/contrib/jemalloc/include/jemalloc/internal/huge.h index 8b6c6cec2ae9..ece7af980aa9 100644 --- a/contrib/jemalloc/include/jemalloc/internal/huge.h +++ b/contrib/jemalloc/include/jemalloc/internal/huge.h @@ -13,11 +13,10 @@ void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_t *tcache); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, bool zero, tcache_t *tcache); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t extra, size_t alignment, bool zero, - tcache_t *tcache); + size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; @@ -27,6 +26,7 @@ arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(const void *ptr); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h index d9d8b5bbb6ad..6c3cc181c64e 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -181,7 +181,7 @@ static const bool config_cache_oblivious = #include "jemalloc/internal/jemalloc_internal_macros.h" /* Size class index type. */ -typedef unsigned index_t; +typedef unsigned szind_t; /* * Flags bits: @@ -229,7 +229,7 @@ typedef unsigned index_t; # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# ifdef __sparc64__ +# if (defined(__sparc64__) || defined(__sparcv9)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -508,12 +508,12 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -index_t size2index_compute(size_t size); -index_t size2index_lookup(size_t size); -index_t size2index(size_t size); -size_t index2size_compute(index_t index); -size_t index2size_lookup(index_t index); -size_t index2size(index_t index); +szind_t size2index_compute(size_t size); +szind_t size2index_lookup(size_t size); +szind_t size2index(size_t size); +size_t index2size_compute(szind_t index); +size_t index2size_lookup(szind_t index); +size_t index2size(szind_t index); size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); @@ -524,7 +524,7 @@ arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE index_t +JEMALLOC_INLINE szind_t size2index_compute(size_t size) { @@ -555,7 +555,7 @@ size2index_compute(size_t size) } } -JEMALLOC_ALWAYS_INLINE index_t +JEMALLOC_ALWAYS_INLINE szind_t size2index_lookup(size_t size) { @@ -568,7 +568,7 @@ size2index_lookup(size_t size) } } -JEMALLOC_ALWAYS_INLINE index_t +JEMALLOC_ALWAYS_INLINE szind_t size2index(size_t size) { @@ -579,7 +579,7 @@ size2index(size_t size) } JEMALLOC_INLINE size_t -index2size_compute(index_t index) +index2size_compute(szind_t index) { #if (NTBINS > 0) @@ -606,7 +606,7 @@ index2size_compute(index_t index) } JEMALLOC_ALWAYS_INLINE size_t -index2size_lookup(index_t index) +index2size_lookup(szind_t index) { size_t ret = (size_t)index2size_tab[index]; assert(ret == index2size_compute(index)); @@ -614,7 +614,7 @@ index2size_lookup(index_t index) } JEMALLOC_ALWAYS_INLINE size_t -index2size(index_t index) +index2size(szind_t index) { assert(index < NSIZES); @@ -702,7 +702,7 @@ sa2u(size_t size, size_t alignment) } /* Try for a large size class. */ - if (likely(size <= arena_maxclass) && likely(alignment < chunksize)) { + if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { /* * We can't achieve subpage alignment, so round up alignment * to the minimum that can actually be supported. @@ -973,7 +973,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - index_t binind = size2index(usize); + szind_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; @@ -1093,7 +1093,7 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, zero, tcache, arena)); } - return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, alignment, zero, + return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, tcache)); } diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index 10fab6583c25..ffc4ea253c61 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -50,13 +50,14 @@ #define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) #define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) #define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) +#define arena_mapbits_size_decode JEMALLOC_N(arena_mapbits_size_decode) +#define arena_mapbits_size_encode JEMALLOC_N(arena_mapbits_size_encode) #define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) #define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) #define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) #define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) #define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) #define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_maxrun JEMALLOC_N(arena_maxrun) #define arena_maybe_purge JEMALLOC_N(arena_maybe_purge) #define arena_metadata_allocated_add JEMALLOC_N(arena_metadata_allocated_add) @@ -79,6 +80,7 @@ #define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) #define arena_prof_tctx_get JEMALLOC_N(arena_prof_tctx_get) +#define arena_prof_tctx_reset JEMALLOC_N(arena_prof_tctx_reset) #define arena_prof_tctx_set JEMALLOC_N(arena_prof_tctx_set) #define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) @@ -249,6 +251,7 @@ #define huge_malloc JEMALLOC_N(huge_malloc) #define huge_palloc JEMALLOC_N(huge_palloc) #define huge_prof_tctx_get JEMALLOC_N(huge_prof_tctx_get) +#define huge_prof_tctx_reset JEMALLOC_N(huge_prof_tctx_reset) #define huge_prof_tctx_set JEMALLOC_N(huge_prof_tctx_set) #define huge_ralloc JEMALLOC_N(huge_ralloc) #define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) @@ -282,6 +285,7 @@ #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define large_maxclass JEMALLOC_N(large_maxclass) #define lg_floor JEMALLOC_N(lg_floor) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) @@ -376,6 +380,7 @@ #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tctx_get JEMALLOC_N(prof_tctx_get) +#define prof_tctx_reset JEMALLOC_N(prof_tctx_reset) #define prof_tctx_set JEMALLOC_N(prof_tctx_set) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) #define prof_tdata_get JEMALLOC_N(prof_tdata_get) diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h index 2e2271168d45..e5198c3e8153 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof.h @@ -90,10 +90,11 @@ struct prof_tctx_s { prof_tdata_t *tdata; /* - * Copy of tdata->thr_uid, necessary because tdata may be defunct during - * teardown. + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. */ uint64_t thr_uid; + uint64_t thr_discrim; /* Profiling counters, protected by tdata->lock. */ prof_cnt_t cnts; @@ -330,14 +331,18 @@ bool prof_gdump_get_unlocked(void); prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); prof_tctx_t *prof_tctx_get(const void *ptr); -void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *tctx); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx); + prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, + size_t old_usize, prof_tctx_t *old_tctx); void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif @@ -402,13 +407,24 @@ prof_tctx_get(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(const void *ptr, prof_tctx_t *tctx) +prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); - arena_prof_tctx_set(ptr, tctx); + arena_prof_tctx_set(ptr, usize, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); } JEMALLOC_ALWAYS_INLINE bool @@ -442,7 +458,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, } JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) { prof_tctx_t *ret; prof_tdata_t *tdata; @@ -450,8 +466,8 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) assert(usize == s2u(usize)); - if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd, - usize, update, &tdata))) + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); @@ -473,22 +489,24 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_malloc_sample_object(ptr, usize, tctx); else - prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); + prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); } JEMALLOC_ALWAYS_INLINE void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, - bool updated, size_t old_usize, prof_tctx_t *old_tctx) + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) { + bool sampled, old_sampled; cassert(config_prof); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (!updated && ptr != NULL) { + if (prof_active && !updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* - * Don't sample. The usize passed to PROF_ALLOC_PREP() + * Don't sample. The usize passed to prof_alloc_prep() * was larger than what actually got allocated, so a * backtrace was captured for this allocation, even * though its actual usize was insufficient to cross the @@ -498,12 +516,16 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, } } - if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U)) - prof_free_sampled_object(tsd, old_usize, old_tctx); - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + + if (unlikely(sampled)) prof_malloc_sample_object(ptr, usize, tctx); else - prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); + prof_tctx_reset(ptr, usize, old_ptr, old_tctx); + + if (unlikely(old_sampled)) + prof_free_sampled_object(tsd, old_usize, old_tctx); } JEMALLOC_ALWAYS_INLINE void diff --git a/contrib/jemalloc/include/jemalloc/internal/size_classes.h b/contrib/jemalloc/include/jemalloc/internal/size_classes.h index eed6fbdf342a..4ca58a106d0e 100644 --- a/contrib/jemalloc/include/jemalloc/internal/size_classes.h +++ b/contrib/jemalloc/include/jemalloc/internal/size_classes.h @@ -25,6 +25,7 @@ * LOOKUP_MAXCLASS: Maximum size class included in lookup table. * SMALL_MAXCLASS: Maximum small size class. * LG_LARGE_MINCLASS: Lg of minimum large size class. + * HUGE_MAXCLASS: Maximum (huge) size class. */ #define LG_SIZE_CLASS_GROUP 2 @@ -180,6 +181,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 13) @@ -333,6 +335,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 14) @@ -486,6 +489,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 16) @@ -639,6 +643,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) @@ -789,6 +794,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 13) @@ -939,6 +945,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 14) @@ -1089,6 +1096,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 16) @@ -1239,6 +1247,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) @@ -1387,6 +1396,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 13) @@ -1535,6 +1545,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 14) @@ -1683,6 +1694,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 16) @@ -1831,6 +1843,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) @@ -2144,6 +2157,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 13) @@ -2457,6 +2471,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 14) @@ -2770,6 +2785,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 16) @@ -3083,6 +3099,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) @@ -3393,6 +3410,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 13) @@ -3703,6 +3721,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 14) @@ -4013,6 +4032,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 16) @@ -4323,6 +4343,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) @@ -4631,6 +4652,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) #define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 13) @@ -4939,6 +4961,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 14) + (((size_t)3) << 12)) #define LG_LARGE_MINCLASS 15 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 14) @@ -5247,6 +5270,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 15) + (((size_t)3) << 13)) #define LG_LARGE_MINCLASS 16 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 16) @@ -5555,6 +5579,7 @@ #define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) #define SMALL_MAXCLASS ((((size_t)1) << 17) + (((size_t)3) << 15)) #define LG_LARGE_MINCLASS 18 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #ifndef SIZE_CLASSES_DEFINED diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h index 493f4575b61f..5079cd266888 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -77,7 +77,7 @@ struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ unsigned ev_cnt; /* Event count since incremental GC. */ - index_t next_gc_bin; /* Next bin to GC. */ + szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* * The pointer stacks associated with tbins follow as a contiguous @@ -126,10 +126,10 @@ extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, index_t binind); + tcache_bin_t *tbin, szind_t binind); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - index_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, @@ -161,7 +161,7 @@ void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, bool zero); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - index_t binind); + szind_t binind); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); @@ -267,7 +267,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, bool zero) { void *ret; - index_t binind; + szind_t binind; size_t usize; tcache_bin_t *tbin; @@ -312,7 +312,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, bool zero) { void *ret; - index_t binind; + szind_t binind; size_t usize; tcache_bin_t *tbin; @@ -360,7 +360,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -386,7 +386,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind) JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { - index_t binind; + szind_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index 62a887e617ac..eed7aa013470 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -190,7 +190,7 @@ a_name##tsd_boot0(void) \ return (false); \ } \ a_attr void \ -a_name##tsd_boot1() \ +a_name##tsd_boot1(void) \ { \ \ /* Do nothing. */ \ @@ -235,7 +235,7 @@ a_name##tsd_boot0(void) \ return (false); \ } \ a_attr void \ -a_name##tsd_boot1() \ +a_name##tsd_boot1(void) \ { \ \ /* Do nothing. */ \ @@ -345,7 +345,7 @@ a_name##tsd_boot0(void) \ return (false); \ } \ a_attr void \ -a_name##tsd_boot1() \ +a_name##tsd_boot1(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ wrapper = (a_name##tsd_wrapper_t *) \ @@ -467,7 +467,7 @@ a_name##tsd_boot0(void) \ return (false); \ } \ a_attr void \ -a_name##tsd_boot1() \ +a_name##tsd_boot1(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ wrapper = (a_name##tsd_wrapper_t *) \ diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index cd8f7da8bfe4..95a3a51f5735 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -79,12 +79,12 @@ extern "C" { #include #include -#define JEMALLOC_VERSION "4.0.0-0-g6e98caf8f064482b9ab292ef3638dea67420bbc2" +#define JEMALLOC_VERSION "4.0.2-0-g486d249fb4715fd3de679b6c2a04f7e657883111" #define JEMALLOC_VERSION_MAJOR 4 #define JEMALLOC_VERSION_MINOR 0 -#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_BUGFIX 2 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "6e98caf8f064482b9ab292ef3638dea67420bbc2" +#define JEMALLOC_VERSION_GID "486d249fb4715fd3de679b6c2a04f7e657883111" # define MALLOCX_LG_ALIGN(la) (la) # if LG_SIZEOF_PTR == 2 @@ -234,7 +234,7 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( #ifdef JEMALLOC_OVERRIDE_MEMALIGN JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); #endif #ifdef JEMALLOC_OVERRIDE_VALLOC diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index af48b39d5955..7f4a6caea1b2 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -11,7 +11,7 @@ arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ -size_t arena_maxclass; /* Max size class for arenas. */ +size_t large_maxclass; /* Max large size class. */ static size_t small_maxrun; /* Max run size used for small size classes. */ static bool *small_run_tab; /* Valid small run page multiples. */ unsigned nlclasses; /* Number of large size classes. */ @@ -39,7 +39,7 @@ JEMALLOC_INLINE_C arena_chunk_map_misc_t * arena_miscelm_key_create(size_t size) { - return ((arena_chunk_map_misc_t *)((size << CHUNK_MAP_SIZE_SHIFT) | + return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | CHUNK_MAP_KEY)); } @@ -58,8 +58,7 @@ arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) assert(arena_miscelm_is_key(miscelm)); - return (((uintptr_t)miscelm & CHUNK_MAP_SIZE_MASK) >> - CHUNK_MAP_SIZE_SHIFT); + return (arena_mapbits_size_decode((uintptr_t)miscelm)); } JEMALLOC_INLINE_C size_t @@ -73,7 +72,7 @@ arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); pageind = arena_miscelm_to_pageind(miscelm); mapbits = arena_mapbits_get(chunk, pageind); - return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_INLINE_C int @@ -315,7 +314,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - index_t binind = arena_ptr_small_binind_get(ptr, mapbits); + szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); @@ -426,7 +425,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; - size_t flag_dirty, flag_decommitted, run_ind, need_pages, i; + size_t flag_dirty, flag_decommitted, run_ind, need_pages; size_t flag_unzeroed_mask; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -460,6 +459,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, * The run is clean, so some pages may be zeroed (i.e. * never before touched). */ + size_t i; for (i = 0; i < need_pages; i++) { if (arena_mapbits_unzeroed_get(chunk, run_ind+i) != 0) @@ -508,7 +508,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) static bool arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, - index_t binind) + szind_t binind) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -780,7 +780,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) static void arena_huge_malloc_stats_update(arena_t *arena, size_t usize) { - index_t index = size2index(usize) - nlclasses - NBINS; + szind_t index = size2index(usize) - nlclasses - NBINS; cassert(config_stats); @@ -793,7 +793,7 @@ arena_huge_malloc_stats_update(arena_t *arena, size_t usize) static void arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize) { - index_t index = size2index(usize) - nlclasses - NBINS; + szind_t index = size2index(usize) - nlclasses - NBINS; cassert(config_stats); @@ -806,7 +806,7 @@ arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize) static void arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) { - index_t index = size2index(usize) - nlclasses - NBINS; + szind_t index = size2index(usize) - nlclasses - NBINS; cassert(config_stats); @@ -819,7 +819,7 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) static void arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize) { - index_t index = size2index(usize) - nlclasses - NBINS; + szind_t index = size2index(usize) - nlclasses - NBINS; cassert(config_stats); @@ -1125,7 +1125,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) +arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind) { arena_run_t *run = arena_run_first_best_fit(arena, size); if (run != NULL) { @@ -1136,7 +1136,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1889,7 +1889,7 @@ static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; - index_t binind; + szind_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ @@ -1939,8 +1939,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { - void *ret; - index_t binind; + szind_t binind; arena_bin_info_t *bin_info; arena_run_t *run; @@ -1953,6 +1952,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ + void *ret; assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1986,13 +1986,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; - arena_run_t *run; - void *ptr; assert(tbin->ncached == 0); @@ -2002,6 +2000,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++) { + arena_run_t *run; + void *ptr; if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else @@ -2076,12 +2076,13 @@ arena_redzone_corruption_t *arena_redzone_corruption = static void arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) { - size_t size = bin_info->reg_size; - size_t redzone_size = bin_info->redzone_size; - size_t i; bool error = false; if (opt_junk_alloc) { + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + for (i = 1; i <= redzone_size; i++) { uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); if (*byte != 0xa5) { @@ -2131,7 +2132,7 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small = void arena_quarantine_junk_small(void *ptr, size_t usize) { - index_t binind; + szind_t binind; arena_bin_info_t *bin_info; cassert(config_fill); assert(opt_junk_free); @@ -2149,7 +2150,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) void *ret; arena_bin_t *bin; arena_run_t *run; - index_t binind; + szind_t binind; binind = size2index(size); assert(binind < NBINS); @@ -2233,7 +2234,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset); if (config_stats) { - index_t index = size2index(usize) - NBINS; + szind_t index = size2index(usize) - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2326,7 +2327,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { - index_t index = size2index(usize) - NBINS; + szind_t index = size2index(usize) - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2356,7 +2357,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special run placement. */ ret = arena_malloc(tsd, arena, usize, zero, tcache); - } else if (usize <= arena_maxclass && alignment <= PAGE) { + } else if (usize <= large_maxclass && alignment <= PAGE) { /* * Large; alignment doesn't require special run placement. * However, the cached pointer may be at a random offset from @@ -2367,7 +2368,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (config_cache_oblivious) ret = (void *)((uintptr_t)ret & ~PAGE_MASK); } else { - if (likely(usize <= arena_maxclass)) { + if (likely(usize <= large_maxclass)) { ret = arena_palloc_large(tsd, arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) @@ -2385,7 +2386,7 @@ arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; size_t pageind; - index_t binind; + szind_t binind; cassert(config_prof); assert(ptr != NULL); @@ -2413,7 +2414,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - index_t binind = arena_bin_index(extent_node_arena_get( + szind_t binind = arena_bin_index(extent_node_arena_get( &chunk->node), bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -2477,7 +2478,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - index_t binind; + szind_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); @@ -2559,7 +2560,7 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large = JEMALLOC_N(arena_dalloc_junk_large_impl); #endif -void +static void arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool junked) { @@ -2574,7 +2575,7 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, if (!junked) arena_dalloc_junk_large(ptr, usize); if (config_stats) { - index_t index = size2index(usize) - NBINS; + szind_t index = size2index(usize) - NBINS; arena->stats.ndalloc_large++; arena->stats.allocated_large -= usize; @@ -2621,8 +2622,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size + large_pad, true); if (config_stats) { - index_t oldindex = size2index(oldsize) - NBINS; - index_t index = size2index(size) - NBINS; + szind_t oldindex = size2index(oldsize) - NBINS; + szind_t index = size2index(size) - NBINS; arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; @@ -2641,42 +2642,42 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size, size_t extra, bool zero) + size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = (oldsize + large_pad) >> LG_PAGE; size_t followsize; - size_t usize_min = s2u(size); assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) - large_pad); /* Try to extend the run. */ - assert(usize_min > oldsize); malloc_mutex_lock(&arena->lock); - if (pageind+npages < chunk_npages && - arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && - (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= usize_min - oldsize) { + if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk, + pageind+npages) != 0) + goto label_fail; + followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages); + if (oldsize + followsize >= usize_min) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ arena_run_t *run; - size_t flag_dirty, flag_unzeroed_mask, splitsize, usize; + size_t usize, splitsize, size, flag_dirty, flag_unzeroed_mask; - usize = s2u(size + extra); + usize = usize_max; while (oldsize + followsize < usize) usize = index2size(size2index(usize)-1); assert(usize >= usize_min); + assert(usize >= oldsize); splitsize = usize - oldsize; + if (splitsize == 0) + goto label_fail; run = &arena_miscelm_get(chunk, pageind+npages)->run; - if (arena_run_split_large(arena, run, splitsize, zero)) { - malloc_mutex_unlock(&arena->lock); - return (true); - } + if (arena_run_split_large(arena, run, splitsize, zero)) + goto label_fail; size = oldsize + splitsize; npages = (size + large_pad) >> LG_PAGE; @@ -2700,8 +2701,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind+npages-1))); if (config_stats) { - index_t oldindex = size2index(oldsize) - NBINS; - index_t index = size2index(size) - NBINS; + szind_t oldindex = size2index(oldsize) - NBINS; + szind_t index = size2index(size) - NBINS; arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; @@ -2718,8 +2719,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_unlock(&arena->lock); return (false); } +label_fail: malloc_mutex_unlock(&arena->lock); - return (true); } @@ -2748,98 +2749,107 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large = * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - size_t usize; + arena_chunk_t *chunk; + arena_t *arena; - /* Make sure extra can't cause size_t overflow. */ - if (unlikely(extra >= arena_maxclass)) - return (true); - - usize = s2u(size + extra); - if (usize == oldsize) { - /* Same size class. */ + if (oldsize == usize_max) { + /* Current size class is compatible and maximal. */ return (false); - } else { - arena_chunk_t *chunk; - arena_t *arena; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = extent_node_arena_get(&chunk->node); - - if (usize < oldsize) { - /* Fill before shrinking in order avoid a race. */ - arena_ralloc_junk_large(ptr, oldsize, usize); - arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - usize); - return (false); - } else { - bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, size, extra, zero); - if (config_fill && !ret && !zero) { - if (unlikely(opt_junk_alloc)) { - memset((void *)((uintptr_t)ptr + - oldsize), 0xa5, isalloc(ptr, - config_prof) - oldsize); - } else if (unlikely(opt_zero)) { - memset((void *)((uintptr_t)ptr + - oldsize), 0, isalloc(ptr, - config_prof) - oldsize); - } - } - return (ret); - } } + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = extent_node_arena_get(&chunk->node); + + if (oldsize < usize_max) { + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, + usize_min, usize_max, zero); + if (config_fill && !ret && !zero) { + if (unlikely(opt_junk_alloc)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, + isalloc(ptr, config_prof) - oldsize); + } else if (unlikely(opt_zero)) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + isalloc(ptr, config_prof) - oldsize); + } + } + return (ret); + } + + assert(oldsize > usize_max); + /* Fill before shrinking in order avoid a race. */ + arena_ralloc_junk_large(ptr, oldsize, usize_max); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max); + return (false); } bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { + size_t usize_min, usize_max; - if (likely(size <= arena_maxclass)) { + usize_min = s2u(size); + usize_max = s2u(size + extra); + if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { /* * Avoid moving the allocation if the size class can be left the * same. */ - if (likely(oldsize <= arena_maxclass)) { - if (oldsize <= SMALL_MAXCLASS) { - assert( - arena_bin_info[size2index(oldsize)].reg_size - == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - size2index(size + extra) == - size2index(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) + if (oldsize <= SMALL_MAXCLASS) { + assert(arena_bin_info[size2index(oldsize)].reg_size == + oldsize); + if ((usize_max <= SMALL_MAXCLASS && + size2index(usize_max) == size2index(oldsize)) || + (size <= oldsize && usize_max >= oldsize)) + return (false); + } else { + if (usize_max > SMALL_MAXCLASS) { + if (!arena_ralloc_large(ptr, oldsize, usize_min, + usize_max, zero)) return (false); - } else { - assert(size <= arena_maxclass); - if (size + extra > SMALL_MAXCLASS) { - if (!arena_ralloc_large(ptr, oldsize, - size, extra, zero)) - return (false); - } } } /* Reallocation would require a move. */ return (true); - } else - return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero)); + } else { + return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, + zero)); + } +} + +static void * +arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) +{ + + if (alignment == 0) + return (arena_malloc(tsd, arena, usize, zero, tcache)); + usize = sa2u(usize, alignment); + if (usize == 0) + return (NULL); + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); } void * arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache) + size_t alignment, bool zero, tcache_t *tcache) { void *ret; + size_t usize; - if (likely(size <= arena_maxclass)) { + usize = s2u(size); + if (usize == 0) + return (NULL); + + if (likely(usize <= large_maxclass)) { size_t copysize; /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(ptr, oldsize, size, extra, zero)) + if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) return (ptr); /* @@ -2847,53 +2857,23 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, * the object. In that case, fall back to allocating new space * and copying. */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, tcache, - arena); - } else { - ret = arena_malloc(tsd, arena, size + extra, zero, - tcache); - } - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, - tcache, arena); - } else { - ret = arena_malloc(tsd, arena, size, zero, - tcache); - } - - if (ret == NULL) - return (NULL); - } + ret = arena_ralloc_move_helper(tsd, arena, usize, alignment, + zero, tcache); + if (ret == NULL) + return (NULL); /* * Junk/zero-filling were already done by * ipalloc()/arena_malloc(). */ - /* - * Copy at most size bytes (not size+extra), since the caller - * has no expectation that the extra bytes will be reliably - * preserved. - */ - copysize = (size < oldsize) ? size : oldsize; + copysize = (usize < oldsize) ? usize : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); isqalloc(tsd, ptr, oldsize, tcache); } else { - ret = huge_ralloc(tsd, arena, ptr, oldsize, size, extra, - alignment, zero, tcache); + ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, + zero, tcache); } return (ret); } @@ -3241,7 +3221,6 @@ small_run_size_init(void) bool arena_boot(void) { - size_t header_size; unsigned i; arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); @@ -3260,7 +3239,7 @@ arena_boot(void) */ map_bias = 0; for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map_bits) + + size_t header_size = offsetof(arena_chunk_t, map_bits) + ((sizeof(arena_chunk_map_bits_t) + sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias)); map_bias = (header_size + PAGE_MASK) >> LG_PAGE; @@ -3272,17 +3251,17 @@ arena_boot(void) arena_maxrun = chunksize - (map_bias << LG_PAGE); assert(arena_maxrun > 0); - arena_maxclass = index2size(size2index(chunksize)-1); - if (arena_maxclass > arena_maxrun) { + large_maxclass = index2size(size2index(chunksize)-1); + if (large_maxclass > arena_maxrun) { /* * For small chunk sizes it's possible for there to be fewer * non-header pages available than are necessary to serve the * size classes just below chunksize. */ - arena_maxclass = arena_maxrun; + large_maxclass = arena_maxrun; } - assert(arena_maxclass > 0); - nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); + assert(large_maxclass > 0); + nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS); nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); diff --git a/contrib/jemalloc/src/chunk_dss.c b/contrib/jemalloc/src/chunk_dss.c index de0546d00226..61fc91696192 100644 --- a/contrib/jemalloc/src/chunk_dss.c +++ b/contrib/jemalloc/src/chunk_dss.c @@ -69,8 +69,6 @@ void * chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret; - cassert(have_dss); assert(size > 0 && (size & chunksize_mask) == 0); assert(alignment > 0 && (alignment & chunksize_mask) == 0); @@ -84,9 +82,6 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, malloc_mutex_lock(&dss_mtx); if (dss_prev != (void *)-1) { - size_t gap_size, cpad_size; - void *cpad, *dss_next; - intptr_t incr; /* * The loop is necessary to recover from races with other @@ -94,6 +89,9 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, * malloc. */ do { + void *ret, *cpad, *dss_next; + size_t gap_size, cpad_size; + intptr_t incr; /* Avoid an unnecessary system call. */ if (new_addr != NULL && dss_max != new_addr) break; diff --git a/contrib/jemalloc/src/chunk_mmap.c b/contrib/jemalloc/src/chunk_mmap.c index 36eb07548fc9..b9ba74191a41 100644 --- a/contrib/jemalloc/src/chunk_mmap.c +++ b/contrib/jemalloc/src/chunk_mmap.c @@ -6,14 +6,16 @@ static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret, *pages; - size_t alloc_size, leadsize; + void *ret; + size_t alloc_size; alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); do { + void *pages; + size_t leadsize; pages = pages_map(NULL, alloc_size); if (pages == NULL) return (NULL); diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c index 54c2114ce2ba..f8778db28fc5 100644 --- a/contrib/jemalloc/src/huge.c +++ b/contrib/jemalloc/src/huge.c @@ -126,18 +126,19 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif static void -huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, - size_t size, size_t extra, bool zero) +huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - size_t usize_next; + size_t usize, usize_next; extent_node_t *node; arena_t *arena; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; bool zeroed; /* Increase usize to incorporate extra. */ - while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < oldsize) - usize = usize_next; + for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1)) + <= oldsize; usize = usize_next) + ; /* Do nothing. */ if (oldsize == usize) return; @@ -148,11 +149,12 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, /* Fill if necessary (shrinking). */ if (oldsize > usize) { size_t sdiff = oldsize - usize; - zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr, - CHUNK_CEILING(usize), usize, sdiff); if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); zeroed = false; + } else { + zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr, + CHUNK_CEILING(oldsize), usize, sdiff); } } else zeroed = true; @@ -194,6 +196,8 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) arena = extent_node_arena_get(node); chunk_hooks = chunk_hooks_get(arena); + assert(oldsize > usize); + /* Split excess chunks. */ cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize), @@ -202,14 +206,15 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) if (oldsize > usize) { size_t sdiff = oldsize - usize; - zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, - CHUNK_ADDR2BASE((uintptr_t)ptr + usize), - CHUNK_CEILING(usize), CHUNK_ADDR2OFFSET((uintptr_t)ptr + - usize), sdiff); if (config_fill && unlikely(opt_junk_free)) { huge_dalloc_junk((void *)((uintptr_t)ptr + usize), sdiff); zeroed = false; + } else { + zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + CHUNK_ADDR2BASE((uintptr_t)ptr + usize), + CHUNK_CEILING(oldsize), + CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); } } else zeroed = true; @@ -228,18 +233,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) } static bool -huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { - size_t usize; +huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { extent_node_t *node; arena_t *arena; bool is_zeroed_subchunk, is_zeroed_chunk; - usize = s2u(size); - if (usize == 0) { - /* size_t overflow. */ - return (true); - } - node = huge_node_get(ptr); arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); @@ -280,89 +278,76 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { } bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - size_t usize; - - /* Both allocations must be huge to avoid a move. */ - if (oldsize < chunksize) - return (true); assert(s2u(oldsize) == oldsize); - usize = s2u(size); - if (usize == 0) { - /* size_t overflow. */ + + /* Both allocations must be huge to avoid a move. */ + if (oldsize < chunksize || usize_max < chunksize) return (true); + + if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { + /* Attempt to expand the allocation in-place. */ + if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) + return (false); + /* Try again, this time with usize_min. */ + if (usize_min < usize_max && CHUNK_CEILING(usize_min) > + CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, + oldsize, usize_min, zero)) + return (false); } /* * Avoid moving the allocation if the existing chunk size accommodates * the new size. */ - if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize) - && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(s2u(size+extra))) { - huge_ralloc_no_move_similar(ptr, oldsize, usize, size, extra, + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { + huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, zero); return (false); } /* Attempt to shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)) - return (huge_ralloc_no_move_shrink(ptr, oldsize, usize)); + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) + return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); + return (true); +} - /* Attempt to expand the allocation in-place. */ - if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) { - if (extra == 0) - return (true); +static void * +huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) +{ - /* Try again, this time without extra. */ - return (huge_ralloc_no_move_expand(ptr, oldsize, size, zero)); - } - return (false); + if (alignment <= chunksize) + return (huge_malloc(tsd, arena, usize, zero, tcache)); + return (huge_palloc(tsd, arena, usize, alignment, zero, tcache)); } void * -huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache) +huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(ptr, oldsize, size, extra, zero)) + if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) return (ptr); /* - * size and oldsize are different enough that we need to use a + * usize and oldsize are different enough that we need to use a * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment > chunksize) { - ret = huge_palloc(tsd, arena, size + extra, alignment, zero, - tcache); - } else - ret = huge_malloc(tsd, arena, size + extra, zero, tcache); + ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero, + tcache); + if (ret == NULL) + return (NULL); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment > chunksize) { - ret = huge_palloc(tsd, arena, size, alignment, zero, - tcache); - } else - ret = huge_malloc(tsd, arena, size, zero, tcache); - - if (ret == NULL) - return (NULL); - } - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; + copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); isqalloc(tsd, ptr, oldsize, tcache); return (ret); @@ -439,3 +424,10 @@ huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) extent_node_prof_tctx_set(node, tctx); malloc_mutex_unlock(&arena->huge_mtx); } + +void +huge_prof_tctx_reset(const void *ptr) +{ + + huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); +} diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index d078a1fb8f92..b6cbb79d6369 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -183,13 +183,24 @@ static bool malloc_initializer = NO_INITIALIZER; static malloc_mutex_t init_lock = SRWLOCK_INIT; #else static malloc_mutex_t init_lock; +static bool init_lock_initialized = false; JEMALLOC_ATTR(constructor) static void WINAPI _init_init_lock(void) { - malloc_mutex_init(&init_lock); + /* If another constructor in the same binary is using mallctl to + * e.g. setup chunk hooks, it may end up running before this one, + * and malloc_init_hard will crash trying to lock the uninitialized + * lock. So we force an initialization of the lock in + * malloc_init_hard as well. We don't try to care about atomicity + * of the accessed to the init_lock_initialized boolean, since it + * really only matters early in the process creation, before any + * separate thread normally starts doing anything. */ + if (!init_lock_initialized) + malloc_mutex_init(&init_lock); + init_lock_initialized = true; } #ifdef _MSC_VER @@ -514,17 +525,17 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) assert(ind < narenas_actual || !init_if_missing); narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; - if (!*arenas_cache_bypassp) { + if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { *arenas_cache_bypassp = true; arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * narenas_cache); *arenas_cache_bypassp = false; - } else - arenas_cache = NULL; + } if (arenas_cache == NULL) { /* * This function must always tell the truth, even if - * it's slow, so don't let OOM or recursive allocation + * it's slow, so don't let OOM, thread cleanup (note + * tsd_nominal check), nor recursive allocation * avoidance (note arenas_cache_bypass check) get in the * way. */ @@ -535,6 +546,7 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) malloc_mutex_unlock(&arenas_lock); return (arena); } + assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); tsd_arenas_cache_set(tsd, arenas_cache); tsd_narenas_cache_set(tsd, narenas_cache); } @@ -653,8 +665,10 @@ arenas_cache_cleanup(tsd_t *tsd) arena_t **arenas_cache; arenas_cache = tsd_arenas_cache_get(tsd); - if (arenas_cache != NULL) + if (arenas_cache != NULL) { + tsd_arenas_cache_set(tsd, NULL); a0dalloc(arenas_cache); + } } void @@ -1301,6 +1315,9 @@ static bool malloc_init_hard(void) { +#if defined(_WIN32) && _WIN32_WINNT < 0x0600 + _init_init_lock(); +#endif malloc_mutex_lock(&init_lock); if (!malloc_init_hard_needed()) { malloc_mutex_unlock(&init_lock); @@ -1365,7 +1382,7 @@ imalloc_prof(tsd_t *tsd, size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(tsd, usize, true); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = imalloc_prof_sample(tsd, usize, tctx); else @@ -1455,7 +1472,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(tsd, usize, true); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = imemalign_prof_sample(tsd, alignment, usize, tctx); else @@ -1586,7 +1603,7 @@ icalloc_prof(tsd_t *tsd, size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(tsd, usize, true); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = icalloc_prof_sample(tsd, usize, tctx); else @@ -1669,7 +1686,7 @@ label_return: } static void * -irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize, +irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) { void *p; @@ -1677,31 +1694,36 @@ irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize, if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloc(tsd, oldptr, old_usize, LARGE_MINCLASS, 0, false); + p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = iralloc(tsd, oldptr, old_usize, usize, 0, false); + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize) +irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) { void *p; + bool prof_active; prof_tctx_t *old_tctx, *tctx; - old_tctx = prof_tctx_get(oldptr); - tctx = prof_alloc_prep(tsd, usize, true); + prof_active = prof_active_get_unlocked(); + old_tctx = prof_tctx_get(old_ptr); + tctx = prof_alloc_prep(tsd, usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = irealloc_prof_sample(tsd, oldptr, old_usize, usize, tctx); + p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx); else - p = iralloc(tsd, oldptr, old_usize, usize, 0, false); - if (p == NULL) + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_realloc(tsd, p, usize, tctx, true, old_usize, old_tctx); + } + prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize, + old_tctx); return (p); } @@ -1905,6 +1927,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); *usize = sa2u(size, *alignment); } + assert(*usize != 0); *zero = MALLOCX_ZERO_GET(flags); if ((flags & MALLOCX_TCACHE_MASK) != 0) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) @@ -1947,41 +1970,29 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { - if (alignment != 0) + if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); - if (zero) + if (unlikely(zero)) return (icalloct(tsd, usize, tcache, arena)); return (imalloct(tsd, usize, tcache, arena)); } -JEMALLOC_ALWAYS_INLINE_C void * -imallocx_maybe_flags(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) -{ - - if (likely(flags == 0)) - return (imalloc(tsd, size)); - return (imallocx_flags(tsd, usize, alignment, zero, tcache, arena)); -} - static void * -imallocx_prof_sample(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) +imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imallocx_maybe_flags(tsd, LARGE_MINCLASS, flags, - LARGE_MINCLASS, alignment, zero, tcache, arena); + p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, + arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); - } else { - p = imallocx_maybe_flags(tsd, size, flags, usize, alignment, - zero, tcache, arena); - } + } else + p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); return (p); } @@ -1999,13 +2010,12 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, &zero, &tcache, &arena))) return (NULL); - tctx = prof_alloc_prep(tsd, *usize, true); - if (likely((uintptr_t)tctx == (uintptr_t)1U)) { - p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, - zero, tcache, arena); - } else if ((uintptr_t)tctx > (uintptr_t)1U) { - p = imallocx_prof_sample(tsd, size, flags, *usize, alignment, - zero, tcache, arena); + tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); + if (likely((uintptr_t)tctx == (uintptr_t)1U)) + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + else if ((uintptr_t)tctx > (uintptr_t)1U) { + p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, + arena); } else p = NULL; if (unlikely(p == NULL)) { @@ -2080,8 +2090,8 @@ label_oom: } static void * -irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, - size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena, +irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, prof_tctx_t *tctx) { void *p; @@ -2089,13 +2099,13 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloct(tsd, oldptr, old_usize, LARGE_MINCLASS, alignment, + p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment, zero, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, + p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero, tcache, arena); } @@ -2103,28 +2113,30 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, } JEMALLOC_ALWAYS_INLINE_C void * -irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, +irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache, arena_t *arena) { void *p; + bool prof_active; prof_tctx_t *old_tctx, *tctx; - old_tctx = prof_tctx_get(oldptr); - tctx = prof_alloc_prep(tsd, *usize, false); + prof_active = prof_active_get_unlocked(); + old_tctx = prof_tctx_get(old_ptr); + tctx = prof_alloc_prep(tsd, *usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - p = irallocx_prof_sample(tsd, oldptr, old_usize, size, - alignment, *usize, zero, tcache, arena, tctx); + p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize, + alignment, zero, tcache, arena, tctx); } else { - p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, + p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero, tcache, arena); } if (unlikely(p == NULL)) { - prof_alloc_rollback(tsd, tctx, false); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } - if (p == oldptr && alignment != 0) { + if (p == old_ptr && alignment != 0) { /* * The allocation did not move, so it is possible that the size * class is smaller than would guarantee the requested @@ -2135,7 +2147,8 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, */ *usize = isalloc(p, config_prof); } - prof_realloc(tsd, p, *usize, tctx, false, old_usize, old_tctx); + prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr, + old_usize, old_tctx); return (p); } @@ -2230,26 +2243,13 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, static size_t ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, prof_tctx_t *tctx) + size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; if (tctx == NULL) return (old_usize); - /* Use minimum usize to determine whether promotion may happen. */ - if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= - SMALL_MAXCLASS) { - if (ixalloc(ptr, old_usize, SMALL_MAXCLASS+1, - (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - - (SMALL_MAXCLASS+1), alignment, zero)) - return (old_usize); - usize = isalloc(ptr, config_prof); - if (max_usize < LARGE_MINCLASS) - arena_prof_promoted(ptr, usize); - } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); - } + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); return (usize); } @@ -2258,9 +2258,11 @@ JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero) { - size_t max_usize, usize; + size_t usize_max, usize; + bool prof_active; prof_tctx_t *old_tctx, *tctx; + prof_active = prof_active_get_unlocked(); old_tctx = prof_tctx_get(ptr); /* * usize isn't knowable before ixalloc() returns when extra is non-zero. @@ -2268,21 +2270,23 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ - max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - tctx = prof_alloc_prep(tsd, max_usize, false); + assert(usize_max != 0); + tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, - alignment, zero, max_usize, tctx); + alignment, zero, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); } - if (unlikely(usize == old_usize)) { + if (usize == old_usize) { prof_alloc_rollback(tsd, tctx, false); return (usize); } - prof_realloc(tsd, ptr, usize, tctx, false, old_usize, old_tctx); + prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize, + old_tctx); return (usize); } @@ -2304,6 +2308,17 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) tsd = tsd_fetch(); old_usize = isalloc(ptr, config_prof); + + /* Clamp extra if necessary to avoid (size + extra) overflow. */ + if (unlikely(size + extra > HUGE_MAXCLASS)) { + /* Check for size overflow. */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; + } + extra = HUGE_MAXCLASS - size; + } + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c index a05792fd6b5b..0a08062cc392 100644 --- a/contrib/jemalloc/src/prof.c +++ b/contrib/jemalloc/src/prof.c @@ -139,9 +139,16 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) uint64_t b_thr_uid = b->thr_uid; int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid); if (ret == 0) { - uint64_t a_tctx_uid = a->tctx_uid; - uint64_t b_tctx_uid = b->tctx_uid; - ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < b_tctx_uid); + uint64_t a_thr_discrim = a->thr_discrim; + uint64_t b_thr_discrim = b->thr_discrim; + ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim < + b_thr_discrim); + if (ret == 0) { + uint64_t a_tctx_uid = a->tctx_uid; + uint64_t b_tctx_uid = b->tctx_uid; + ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < + b_tctx_uid); + } } return (ret); } @@ -202,7 +209,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) */ tdata = prof_tdata_get(tsd, true); if (tdata != NULL) - prof_sample_threshold_update(tctx->tdata); + prof_sample_threshold_update(tdata); } if ((uintptr_t)tctx > (uintptr_t)1U) { @@ -219,7 +226,7 @@ void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { - prof_tctx_set(ptr, tctx); + prof_tctx_set(ptr, usize, tctx); malloc_mutex_lock(tctx->tdata->lock); tctx->cnts.curobjs++; @@ -791,6 +798,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) } ret.p->tdata = tdata; ret.p->thr_uid = tdata->thr_uid; + ret.p->thr_discrim = tdata->thr_discrim; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; ret.p->tctx_uid = tdata->tctx_uid_next++; @@ -1569,7 +1577,6 @@ prof_idump(void) { tsd_t *tsd; prof_tdata_t *tdata; - char filename[PATH_MAX + 1]; cassert(config_prof); @@ -1585,6 +1592,7 @@ prof_idump(void) } if (opt_prof_prefix[0] != '\0') { + char filename[PATH_MAX + 1]; malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; @@ -1623,7 +1631,6 @@ prof_gdump(void) { tsd_t *tsd; prof_tdata_t *tdata; - char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); @@ -1639,6 +1646,7 @@ prof_gdump(void) } if (opt_prof_prefix[0] != '\0') { + char filename[DUMP_FILENAME_BUFSIZE]; malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 3814365ce4f9..fdafd0c620a6 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -32,7 +32,7 @@ size_t tcache_salloc(const void *ptr) void tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { - index_t binind = tcache->next_gc_bin; + szind_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; @@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) void * tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, index_t binind) + tcache_bin_t *tbin, szind_t binind) { void *ret; @@ -87,7 +87,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - index_t binind, unsigned rem) + szind_t binind, unsigned rem) { arena_t *arena; void *ptr; @@ -166,7 +166,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } void -tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, +tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache) { arena_t *arena; @@ -496,13 +496,13 @@ tcache_boot(void) unsigned i; /* - * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is * known. */ if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; + else if ((1U << opt_lg_tcache_max) > large_maxclass) + tcache_maxclass = large_maxclass; else tcache_maxclass = (1U << opt_lg_tcache_max);