Add accounting to per-domain UMA full bucket caches.

In particular, track the current size of the cache and maintain an
estimate of its working set size.  This will be used to decide how
much to shrink various caches when the kernel attempts to reclaim
pages.  As a secondary effect, it makes statistics aggregation (done
by, e.g., vmstat -z) cheaper since sysctl_vm_zone_stats() no longer
needs to iterate over lists of cached buckets.

Discussed with:	alc, glebius, jeff
Tested by:	pho (previous version)
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D16666
This commit is contained in:
Mark Johnston 2018-11-13 19:44:40 +00:00
parent a82296c2df
commit 0f9b7bf37a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=340405
2 changed files with 84 additions and 36 deletions

View File

@ -459,6 +459,36 @@ bucket_zone_drain(void)
zone_drain(ubz->ubz_zone);
}
static uma_bucket_t
zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
{
uma_bucket_t bucket;
ZONE_LOCK_ASSERT(zone);
if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
LIST_REMOVE(bucket, ub_link);
zdom->uzd_nitems -= bucket->ub_cnt;
if (ws && zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
}
return (bucket);
}
static void
zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
const bool ws)
{
ZONE_LOCK_ASSERT(zone);
LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
zdom->uzd_nitems += bucket->ub_cnt;
if (ws && zdom->uzd_imax < zdom->uzd_nitems)
zdom->uzd_imax = zdom->uzd_nitems;
}
static void
zone_log_warning(uma_zone_t zone)
{
@ -508,6 +538,23 @@ uma_timeout(void *unused)
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
}
/*
* Update the working set size estimate for the zone's bucket cache.
* The constants chosen here are somewhat arbitrary. With an update period of
* 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
* last 100s.
*/
static void
zone_domain_update_wss(uma_zone_domain_t zdom)
{
long wss;
MPASS(zdom->uzd_imax >= zdom->uzd_imin);
wss = zdom->uzd_imax - zdom->uzd_imin;
zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
}
/*
* Routine to perform timeout driven calculations. This expands the
* hashes and does per cpu statistics aggregation.
@ -560,8 +607,14 @@ keg_timeout(uma_keg_t keg)
static void
zone_timeout(uma_zone_t zone)
{
int i;
zone_foreach_keg(zone, &keg_timeout);
ZONE_LOCK(zone);
for (i = 0; i < vm_ndomains; i++)
zone_domain_update_wss(&zone->uz_domain[i]);
ZONE_UNLOCK(zone);
}
/*
@ -772,16 +825,16 @@ cache_drain_safe_cpu(uma_zone_t zone)
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket) {
if (cache->uc_allocbucket->ub_cnt != 0)
LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
cache->uc_allocbucket, ub_link);
zone_put_bucket(zone, &zone->uz_domain[domain],
cache->uc_allocbucket, false);
else
b1 = cache->uc_allocbucket;
cache->uc_allocbucket = NULL;
}
if (cache->uc_freebucket) {
if (cache->uc_freebucket->ub_cnt != 0)
LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
cache->uc_freebucket, ub_link);
zone_put_bucket(zone, &zone->uz_domain[domain],
cache->uc_freebucket, false);
else
b2 = cache->uc_freebucket;
cache->uc_freebucket = NULL;
@ -844,8 +897,8 @@ bucket_cache_drain(uma_zone_t zone)
*/
for (i = 0; i < vm_ndomains; i++) {
zdom = &zone->uz_domain[i];
while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
LIST_REMOVE(bucket, ub_link);
while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
NULL) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
@ -2523,11 +2576,9 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
zdom = &zone->uz_domain[0];
else
zdom = &zone->uz_domain[domain];
if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
LIST_REMOVE(bucket, ub_link);
cache->uc_allocbucket = bucket;
ZONE_UNLOCK(zone);
goto zalloc_start;
@ -2556,6 +2607,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
/*
* See if we lost the race or were migrated. Cache the
* initialized bucket to make this less likely or claim
@ -2565,6 +2617,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
domain == PCPU_GET(domain))) {
cache->uc_allocbucket = bucket;
zdom->uzd_imax += bucket->ub_cnt;
} else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
critical_exit();
ZONE_UNLOCK(zone);
@ -2572,7 +2625,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
bucket_free(zone, bucket, udata);
goto zalloc_restart;
} else
LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
}
@ -3200,7 +3253,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
bucket_free(zone, bucket, udata);
goto zfree_restart;
} else
LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
zone_put_bucket(zone, zdom, bucket, true);
}
/*
@ -3649,6 +3702,7 @@ uma_reclaim_locked(bool kmem_danger)
cache_drain_safe(NULL);
zone_foreach(zone_drain);
}
/*
* Some slabs may have been freed but this zone will be visited early
* we visit again so that we can free pages that are empty once other
@ -3882,7 +3936,7 @@ uma_print_zone(uma_zone_t zone)
* directly so that we don't have to.
*/
static void
uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
uint64_t *freesp, uint64_t *sleepsp)
{
uma_cache_t cache;
@ -3937,7 +3991,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
struct uma_stream_header ush;
struct uma_type_header uth;
struct uma_percpu_stat *ups;
uma_bucket_t bucket;
uma_zone_domain_t zdom;
struct sbuf sbuf;
uma_cache_t cache;
@ -3997,9 +4050,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
for (i = 0; i < vm_ndomains; i++) {
zdom = &z->uz_domain[i];
LIST_FOREACH(bucket, &zdom->uzd_buckets,
ub_link)
uth.uth_zone_free += bucket->ub_cnt;
uth.uth_zone_free += zdom->uzd_nitems;
}
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
@ -4199,12 +4250,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
#ifdef DDB
DB_SHOW_COMMAND(uma, db_show_uma)
{
uma_bucket_t bucket;
uma_keg_t kz;
uma_zone_t z;
uma_zone_domain_t zdom;
uint64_t allocs, frees, sleeps;
int cachefree, i;
long cachefree;
int i;
db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
"Free", "Requests", "Sleeps", "Bucket");
@ -4221,13 +4271,10 @@ DB_SHOW_COMMAND(uma, db_show_uma)
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
for (i = 0; i < vm_ndomains; i++) {
zdom = &z->uz_domain[i];
LIST_FOREACH(bucket, &zdom->uzd_buckets,
ub_link)
cachefree += bucket->ub_cnt;
}
db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
for (i = 0; i < vm_ndomains; i++)
cachefree += z->uz_domain[i].uzd_nitems;
db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n",
z->uz_name, (uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, sleeps, z->uz_count);
@ -4239,22 +4286,18 @@ DB_SHOW_COMMAND(uma, db_show_uma)
DB_SHOW_COMMAND(umacache, db_show_umacache)
{
uma_bucket_t bucket;
uma_zone_t z;
uma_zone_domain_t zdom;
uint64_t allocs, frees;
int cachefree, i;
long cachefree;
int i;
db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
"Requests", "Bucket");
LIST_FOREACH(z, &uma_cachezones, uz_link) {
uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
for (i = 0; i < vm_ndomains; i++) {
zdom = &z->uz_domain[i];
LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
cachefree += bucket->ub_cnt;
}
db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
for (i = 0; i < vm_ndomains; i++)
cachefree += z->uz_domain[i].uzd_nitems;
db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
z->uz_name, (uintmax_t)z->uz_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, z->uz_count);

View File

@ -304,6 +304,10 @@ typedef struct uma_klink *uma_klink_t;
struct uma_zone_domain {
LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */
long uzd_nitems; /* total item count */
long uzd_imax; /* maximum item count this period */
long uzd_imin; /* minimum item count this period */
long uzd_wss; /* working set size estimate */
};
typedef struct uma_zone_domain * uma_zone_domain_t;
@ -423,11 +427,12 @@ void uma_large_free(uma_slab_t slab);
mtx_init(&(z)->uz_lock, (z)->uz_name, \
"UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
#define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr)
#define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr)
#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr)
#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
#define ZONE_LOCK_ASSERT(z) mtx_assert((z)->uz_lockptr, MA_OWNED)
/*
* Find a slab within a hash table. This is used for OFFPAGE zones to lookup