UMA memory debugging enabled with INVARIANTS consists of two things:

trashing freed memory and checking that allocated memory is properly
trashed, and also of keeping a bitset of freed items. Trashing/checking
creates a lot of CPU cache poisoning, while keeping debugging bitsets
consistent creates a lot of contention on UMA zone lock(s). The performance
difference between INVARIANTS kernel and normal one is mostly attributed
to UMA debugging, rather than to all KASSERT checks in the kernel.

Add loader tunable vm.debug.divisor that allows either to turn off UMA
debugging completely, or turn it on only for a fraction of allocations,
while still running all KASSERTs in kernel. That allows to run INVARIANTS
kernels in production environments without reducing load by orders of
magnitude, but still doing useful extra checks.

Default value is 1, meaning debug every allocation. Value of 0 would
disable UMA debugging completely. Values above 1 enable debugging only
for every N-th item. It isn't possible to strictly follow the number,
but still amount of debugging is reduced roughly by (N-1)/N percent.

Sponsored by:		Netflix
Differential Revision:	https://reviews.freebsd.org/D15199
This commit is contained in:
Gleb Smirnoff 2018-06-08 00:15:08 +00:00
parent a08d12d3f2
commit c5deaf0452

View File

@ -273,8 +273,25 @@ static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
#ifdef INVARIANTS
static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
"Memory allocation debugging");
static u_int dbg_divisor = 1;
SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
"Debug & thrash every this item in memory allocator");
static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
&uma_dbg_cnt, "memory items debugged");
SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
&uma_skip_cnt, "memory items skipped, not debugged");
#endif
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
@ -854,6 +871,18 @@ keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
i = start;
if (keg->uk_fini != NULL) {
for (i--; i > -1; i--)
#ifdef INVARIANTS
/*
* trash_fini implies that dtor was trash_dtor. trash_fini
* would check that memory hasn't been modified since free,
* which executed trash_dtor.
* That's why we need to run uma_dbg_kskip() check here,
* albeit we don't make skip check for other init/fini
* invocations.
*/
if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) ||
keg->uk_fini != trash_fini)
#endif
keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
keg->uk_size);
}
@ -1958,9 +1987,14 @@ static void
uma_startup3(void)
{
booted = BOOT_RUNNING;
#ifdef INVARIANTS
TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
uma_skip_cnt = counter_u64_alloc(M_WAITOK);
#endif
callout_init(&uma_callout, 1);
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
booted = BOOT_RUNNING;
}
static uma_keg_t
@ -2205,6 +2239,9 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
uma_cache_t cache;
void *item;
int cpu, domain, lockfail;
#ifdef INVARIANTS
bool skipdbg;
#endif
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
@ -2264,14 +2301,22 @@ zalloc_start:
KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
cache->uc_allocs++;
critical_exit();
#ifdef INVARIANTS
skipdbg = uma_dbg_zskip(zone, item);
#endif
if (zone->uz_ctor != NULL &&
#ifdef INVARIANTS
(!skipdbg || zone->uz_ctor != trash_ctor ||
zone->uz_dtor != trash_dtor) &&
#endif
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
atomic_add_long(&zone->uz_fails, 1);
zone_free_item(zone, item, udata, SKIP_DTOR);
return (NULL);
}
#ifdef INVARIANTS
uma_dbg_alloc(zone, NULL, item);
if (!skipdbg)
uma_dbg_alloc(zone, NULL, item);
#endif
if (flags & M_ZERO)
uma_zero_item(item, zone);
@ -2794,6 +2839,9 @@ static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
{
void *item;
#ifdef INVARIANTS
bool skipdbg;
#endif
item = NULL;
@ -2801,6 +2849,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
goto fail;
atomic_add_long(&zone->uz_allocs, 1);
#ifdef INVARIANTS
skipdbg = uma_dbg_zskip(zone, item);
#endif
/*
* We have to call both the zone's init (not the keg's init)
* and the zone's ctor. This is because the item is going from
@ -2813,14 +2864,18 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
goto fail;
}
}
if (zone->uz_ctor != NULL) {
if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
zone_free_item(zone, item, udata, SKIP_DTOR);
goto fail;
}
if (zone->uz_ctor != NULL &&
#ifdef INVARIANTS
(!skipdbg || zone->uz_ctor != trash_ctor ||
zone->uz_dtor != trash_dtor) &&
#endif
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
zone_free_item(zone, item, udata, SKIP_DTOR);
goto fail;
}
#ifdef INVARIANTS
uma_dbg_alloc(zone, NULL, item);
if (!skipdbg)
uma_dbg_alloc(zone, NULL, item);
#endif
if (flags & M_ZERO)
uma_zero_item(item, zone);
@ -2845,6 +2900,9 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
uma_bucket_t bucket;
uma_zone_domain_t zdom;
int cpu, domain, lockfail;
#ifdef INVARIANTS
bool skipdbg;
#endif
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
@ -2869,12 +2927,18 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
}
#endif
#ifdef INVARIANTS
if (zone->uz_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
#endif
skipdbg = uma_dbg_zskip(zone, item);
if (skipdbg == false) {
if (zone->uz_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
}
if (zone->uz_dtor != NULL && (!skipdbg ||
zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor))
#else
if (zone->uz_dtor != NULL)
#endif
zone->uz_dtor(item, zone->uz_size, udata);
/*
@ -3138,16 +3202,23 @@ zone_release(uma_zone_t zone, void **bucket, int cnt)
static void
zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
{
#ifdef INVARIANTS
if (skip == SKIP_NONE) {
bool skipdbg;
skipdbg = uma_dbg_zskip(zone, item);
if (skip == SKIP_NONE && !skipdbg) {
if (zone->uz_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
}
if (skip < SKIP_DTOR && zone->uz_dtor != NULL &&
(!skipdbg || zone->uz_dtor != trash_dtor ||
zone->uz_ctor != trash_ctor))
#else
if (skip < SKIP_DTOR && zone->uz_dtor != NULL)
#endif
if (skip < SKIP_DTOR && zone->uz_dtor)
zone->uz_dtor(item, zone->uz_size, udata);
if (skip < SKIP_FINI && zone->uz_fini)
@ -3865,6 +3936,43 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
return (slab);
}
static bool
uma_dbg_zskip(uma_zone_t zone, void *mem)
{
uma_keg_t keg;
if ((keg = zone_first_keg(zone)) == NULL)
return (true);
return (uma_dbg_kskip(keg, mem));
}
static bool
uma_dbg_kskip(uma_keg_t keg, void *mem)
{
uintptr_t idx;
if (dbg_divisor == 0)
return (true);
if (dbg_divisor == 1)
return (false);
idx = (uintptr_t)mem >> PAGE_SHIFT;
if (keg->uk_ipers > 1) {
idx *= keg->uk_ipers;
idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
}
if ((idx / dbg_divisor) * dbg_divisor != idx) {
counter_u64_add(uma_skip_cnt, 1);
return (true);
}
counter_u64_add(uma_dbg_cnt, 1);
return (false);
}
/*
* Set up the slab's freei data such that uma_dbg_free can function.
*
@ -3875,8 +3983,6 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
uma_keg_t keg;
int freei;
if (zone_first_keg(zone) == NULL)
return;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)
@ -3905,8 +4011,6 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
uma_keg_t keg;
int freei;
if (zone_first_keg(zone) == NULL)
return;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)