From c5deaf04527232761dc96f4b4e97cb9e81bdd3f4 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 8 Jun 2018 00:15:08 +0000 Subject: [PATCH] UMA memory debugging enabled with INVARIANTS consists of two things: trashing freed memory and checking that allocated memory is properly trashed, and also of keeping a bitset of freed items. Trashing/checking creates a lot of CPU cache poisoning, while keeping debugging bitsets consistent creates a lot of contention on UMA zone lock(s). The performance difference between INVARIANTS kernel and normal one is mostly attributed to UMA debugging, rather than to all KASSERT checks in the kernel. Add loader tunable vm.debug.divisor that allows either to turn off UMA debugging completely, or turn it on only for a fraction of allocations, while still running all KASSERTs in kernel. That allows to run INVARIANTS kernels in production environments without reducing load by orders of magnitude, but still doing useful extra checks. Default value is 1, meaning debug every allocation. Value of 0 would disable UMA debugging completely. Values above 1 enable debugging only for every N-th item. It isn't possible to strictly follow the number, but still amount of debugging is reduced roughly by (N-1)/N percent. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D15199 --- sys/vm/uma_core.c | 144 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 20 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index a0d51124e1f5..3714fa98cae8 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -273,8 +273,25 @@ static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); #ifdef INVARIANTS +static bool uma_dbg_kskip(uma_keg_t keg, void *mem); +static bool uma_dbg_zskip(uma_zone_t zone, void *mem); static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); + +static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0, + "Memory allocation debugging"); + +static u_int dbg_divisor = 1; +SYSCTL_UINT(_vm_debug, OID_AUTO, divisor, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0, + "Debug & thrash every this item in memory allocator"); + +static counter_u64_t uma_dbg_cnt = EARLY_COUNTER; +static counter_u64_t uma_skip_cnt = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD, + &uma_dbg_cnt, "memory items debugged"); +SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD, + &uma_skip_cnt, "memory items skipped, not debugged"); #endif SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); @@ -854,6 +871,18 @@ keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start) i = start; if (keg->uk_fini != NULL) { for (i--; i > -1; i--) +#ifdef INVARIANTS + /* + * trash_fini implies that dtor was trash_dtor. trash_fini + * would check that memory hasn't been modified since free, + * which executed trash_dtor. + * That's why we need to run uma_dbg_kskip() check here, + * albeit we don't make skip check for other init/fini + * invocations. + */ + if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) || + keg->uk_fini != trash_fini) +#endif keg->uk_fini(slab->us_data + (keg->uk_rsize * i), keg->uk_size); } @@ -1958,9 +1987,14 @@ static void uma_startup3(void) { - booted = BOOT_RUNNING; +#ifdef INVARIANTS + TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor); + uma_dbg_cnt = counter_u64_alloc(M_WAITOK); + uma_skip_cnt = counter_u64_alloc(M_WAITOK); +#endif callout_init(&uma_callout, 1); callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); + booted = BOOT_RUNNING; } static uma_keg_t @@ -2205,6 +2239,9 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) uma_cache_t cache; void *item; int cpu, domain, lockfail; +#ifdef INVARIANTS + bool skipdbg; +#endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); @@ -2264,14 +2301,22 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); cache->uc_allocs++; critical_exit(); +#ifdef INVARIANTS + skipdbg = uma_dbg_zskip(zone, item); +#endif if (zone->uz_ctor != NULL && +#ifdef INVARIANTS + (!skipdbg || zone->uz_ctor != trash_ctor || + zone->uz_dtor != trash_dtor) && +#endif zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { atomic_add_long(&zone->uz_fails, 1); zone_free_item(zone, item, udata, SKIP_DTOR); return (NULL); } #ifdef INVARIANTS - uma_dbg_alloc(zone, NULL, item); + if (!skipdbg) + uma_dbg_alloc(zone, NULL, item); #endif if (flags & M_ZERO) uma_zero_item(item, zone); @@ -2794,6 +2839,9 @@ static void * zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) { void *item; +#ifdef INVARIANTS + bool skipdbg; +#endif item = NULL; @@ -2801,6 +2849,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) goto fail; atomic_add_long(&zone->uz_allocs, 1); +#ifdef INVARIANTS + skipdbg = uma_dbg_zskip(zone, item); +#endif /* * We have to call both the zone's init (not the keg's init) * and the zone's ctor. This is because the item is going from @@ -2813,14 +2864,18 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) goto fail; } } - if (zone->uz_ctor != NULL) { - if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { - zone_free_item(zone, item, udata, SKIP_DTOR); - goto fail; - } + if (zone->uz_ctor != NULL && +#ifdef INVARIANTS + (!skipdbg || zone->uz_ctor != trash_ctor || + zone->uz_dtor != trash_dtor) && +#endif + zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { + zone_free_item(zone, item, udata, SKIP_DTOR); + goto fail; } #ifdef INVARIANTS - uma_dbg_alloc(zone, NULL, item); + if (!skipdbg) + uma_dbg_alloc(zone, NULL, item); #endif if (flags & M_ZERO) uma_zero_item(item, zone); @@ -2845,6 +2900,9 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata) uma_bucket_t bucket; uma_zone_domain_t zdom; int cpu, domain, lockfail; +#ifdef INVARIANTS + bool skipdbg; +#endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); @@ -2869,12 +2927,18 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata) } #endif #ifdef INVARIANTS - if (zone->uz_flags & UMA_ZONE_MALLOC) - uma_dbg_free(zone, udata, item); - else - uma_dbg_free(zone, NULL, item); -#endif + skipdbg = uma_dbg_zskip(zone, item); + if (skipdbg == false) { + if (zone->uz_flags & UMA_ZONE_MALLOC) + uma_dbg_free(zone, udata, item); + else + uma_dbg_free(zone, NULL, item); + } + if (zone->uz_dtor != NULL && (!skipdbg || + zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor)) +#else if (zone->uz_dtor != NULL) +#endif zone->uz_dtor(item, zone->uz_size, udata); /* @@ -3138,16 +3202,23 @@ zone_release(uma_zone_t zone, void **bucket, int cnt) static void zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) { - #ifdef INVARIANTS - if (skip == SKIP_NONE) { + bool skipdbg; + + skipdbg = uma_dbg_zskip(zone, item); + if (skip == SKIP_NONE && !skipdbg) { if (zone->uz_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); } + + if (skip < SKIP_DTOR && zone->uz_dtor != NULL && + (!skipdbg || zone->uz_dtor != trash_dtor || + zone->uz_ctor != trash_ctor)) +#else + if (skip < SKIP_DTOR && zone->uz_dtor != NULL) #endif - if (skip < SKIP_DTOR && zone->uz_dtor) zone->uz_dtor(item, zone->uz_size, udata); if (skip < SKIP_FINI && zone->uz_fini) @@ -3865,6 +3936,43 @@ uma_dbg_getslab(uma_zone_t zone, void *item) return (slab); } +static bool +uma_dbg_zskip(uma_zone_t zone, void *mem) +{ + uma_keg_t keg; + + if ((keg = zone_first_keg(zone)) == NULL) + return (true); + + return (uma_dbg_kskip(keg, mem)); +} + +static bool +uma_dbg_kskip(uma_keg_t keg, void *mem) +{ + uintptr_t idx; + + if (dbg_divisor == 0) + return (true); + + if (dbg_divisor == 1) + return (false); + + idx = (uintptr_t)mem >> PAGE_SHIFT; + if (keg->uk_ipers > 1) { + idx *= keg->uk_ipers; + idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize; + } + + if ((idx / dbg_divisor) * dbg_divisor != idx) { + counter_u64_add(uma_skip_cnt, 1); + return (true); + } + counter_u64_add(uma_dbg_cnt, 1); + + return (false); +} + /* * Set up the slab's freei data such that uma_dbg_free can function. * @@ -3875,8 +3983,6 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) uma_keg_t keg; int freei; - if (zone_first_keg(zone) == NULL) - return; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) @@ -3905,8 +4011,6 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) uma_keg_t keg; int freei; - if (zone_first_keg(zone) == NULL) - return; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL)