Introduce a new sysctl, vm.zone_stats, which exports UMA(9) allocator

statistics via a binary structure stream:

- Add structure 'uma_stream_header', which defines a stream version,
  definition of MAXCPUs used in the stream, and the number of zone
  records in the stream.

- Add structure 'uma_type_header', which defines the name, alignment,
  size, resource allocation limits, current pages allocated, preferred
  bucket size, and central zone + keg statistics.

- Add structure 'uma_percpu_stat', which, for each per-CPU cache,
  includes the number of allocations and frees, as well as the number
  of free items in the cache.

- When the sysctl is queried, return a stream header, followed by a
  series of type descriptions, each consisting of a type header
  followed by a series of MAXCPUs uma_percpu_stat structures holding
  per-CPU allocation information.  Typical values of MAXCPU will be
  1 (UP compiled kernel) and 16 (SMP compiled kernel).

This query mechanism allows user space monitoring tools to extract
memory allocation statistics in a machine-readable form, and to do so
at a per-CPU granularity, allowing monitoring of allocation patterns
across CPUs in order to better understand the distribution of work and
memory flow over multiple CPUs.

While here, also export the number of UMA zones as a sysctl
vm.uma_count, in order to assist in sizing user swpace buffers to
receive the stream.

A follow-up commit of libmemstat(3), a library to monitor kernel memory
allocation, will occur in the next few days.  This change directly
supports converting netstat(1)'s "-mb" mode to using UMA-sourced stats
rather than separately maintained mbuf allocator statistics.

MFC after:	1 week
This commit is contained in:
Robert Watson 2005-07-14 16:35:13 +00:00
parent 773df9ab16
commit 7a52a97eb3
3 changed files with 240 additions and 17 deletions

View File

@ -509,4 +509,51 @@ void uma_prealloc(uma_zone_t zone, int itemcnt);
*/
u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item);
/*
* Exported statistics structures to be used by user space monitoring tools.
* Statistics stream consusts of a uma_stream_header, followed by a series of
* alternative uma_type_header and uma_type_stat structures. Statistics
* structures
*/
#define UMA_STREAM_VERSION 0x00000001
struct uma_stream_header {
u_int32_t ush_version; /* Stream format version. */
u_int32_t ush_maxcpus; /* Value of MAXCPU for stream. */
u_int32_t ush_count; /* Number of records. */
u_int32_t _ush_pad; /* Pad/reserved field. */
};
#define UMA_MAX_NAME 32
struct uma_type_header {
/*
* Static per-zone data, some extracted from the supporting keg.
*/
char uth_name[UMA_MAX_NAME];
u_int32_t uth_align; /* Keg: alignment. */
u_int32_t uth_size; /* Keg: requested size of item. */
u_int32_t uth_rsize; /* Keg: real size of item. */
u_int32_t uth_maxpages; /* Keg: maximum number of pages. */
u_int32_t uth_limit; /* Keg: max items to allocate. */
/*
* Current dynamic zone/keg-derived statistics.
*/
u_int32_t uth_pages; /* Keg: pages allocated. */
u_int32_t uth_keg_free; /* Keg: items free. */
u_int32_t uth_zone_free; /* Zone: items free. */
u_int32_t uth_bucketsize; /* Zone: desired bucket size. */
u_int32_t _uth_reserved0; /* Reserved. */
u_int64_t uth_allocs; /* Zone: number of allocations. */
u_int64_t uth_frees; /* Zone: number of frees. */
u_int64_t _uth_reserved1[4]; /* Reserved. */
};
struct uma_percpu_stat {
u_int64_t ups_allocs; /* Cache: number of alloctions. */
u_int64_t ups_frees; /* Cache: number of frees. */
u_int64_t ups_cache_free; /* Cache: free items in cache. */
u_int64_t _ups_reserved[5]; /* Reserved. */
};
#endif

View File

@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
#include <sys/smp.h>
#include <sys/vmmeter.h>
@ -234,6 +235,8 @@ static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
void uma_print_zone(uma_zone_t);
void uma_print_stats(void);
static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
#ifdef WITNESS
static int nosleepwithlocks = 1;
@ -248,6 +251,12 @@ SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
NULL, 0, sysctl_vm_zone, "A", "Zone Info");
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
/*
* This routine checks to see whether or not it's safe to enable buckets.
*/
@ -2750,6 +2759,48 @@ uma_print_zone(uma_zone_t zone)
}
}
/*
* Generate statistics across both the zone and its per-cpu cache's. Return
* desired statistics if the pointer is non-NULL for that statistic.
*
* Note: does not update the zone statistics, as it can't safely clear the
* per-CPU cache statistic.
*
* XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
* safe from off-CPU; we should modify the caches to track this information
* directly so that we don't have to.
*/
static void
uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
u_int64_t *freesp)
{
uma_cache_t cache;
u_int64_t allocs, frees;
int cachefree, cpu;
allocs = frees = 0;
cachefree = 0;
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
cache = &z->uz_cpu[cpu];
if (cache->uc_allocbucket != NULL)
cachefree += cache->uc_allocbucket->ub_cnt;
if (cache->uc_freebucket != NULL)
cachefree += cache->uc_freebucket->ub_cnt;
allocs += cache->uc_allocs;
frees += cache->uc_frees;
}
allocs += z->uz_allocs;
frees += z->uz_frees;
if (cachefreep != NULL)
*cachefreep = cachefree;
if (allocsp != NULL)
*allocsp = allocs;
if (freesp != NULL)
*freesp = frees;
}
/*
* Sysctl handler for vm.zone
*
@ -2765,11 +2816,9 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
uma_zone_t z;
uma_keg_t zk;
char *p;
int cpu;
int cachefree;
uma_bucket_t bucket;
uma_cache_t cache;
u_int64_t alloc;
u_int64_t allocs, frees;
cnt = 0;
mtx_lock(&uma_mtx);
@ -2795,20 +2844,12 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
break;
ZONE_LOCK(z);
cachefree = 0;
alloc = 0;
if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
cache = &z->uz_cpu[cpu];
if (cache->uc_allocbucket != NULL)
cachefree += cache->uc_allocbucket->ub_cnt;
if (cache->uc_freebucket != NULL)
cachefree += cache->uc_freebucket->ub_cnt;
alloc += cache->uc_allocs;
}
uma_zone_sumstat(z, &cachefree, &allocs, &frees);
} else {
allocs = z->uz_allocs;
frees = z->uz_frees;
}
alloc += z->uz_allocs;
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
cachefree += bucket->ub_cnt;
@ -2820,7 +2861,7 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
zk->uk_maxpages * zk->uk_ipers,
(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
totalfree,
(unsigned long long)alloc);
(unsigned long long)allocs);
ZONE_UNLOCK(z);
for (p = offset + 12; p > offset && *p == ' '; --p)
/* nothing */ ;
@ -2836,3 +2877,138 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
FREE(tmpbuf, M_TEMP);
return (error);
}
static int
sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
{
uma_keg_t kz;
uma_zone_t z;
int count;
count = 0;
mtx_lock(&uma_mtx);
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
count++;
}
mtx_unlock(&uma_mtx);
return (sysctl_handle_int(oidp, &count, 0, req));
}
static int
sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
{
struct uma_stream_header ush;
struct uma_type_header uth;
struct uma_percpu_stat ups;
uma_bucket_t bucket;
struct sbuf sbuf;
uma_cache_t cache;
uma_keg_t kz;
uma_zone_t z;
char *buffer;
int buflen, count, error, i;
mtx_lock(&uma_mtx);
restart:
mtx_assert(&uma_mtx, MA_OWNED);
count = 0;
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
count++;
}
mtx_unlock(&uma_mtx);
buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
MAXCPU) + 1;
buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
mtx_lock(&uma_mtx);
i = 0;
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
i++;
}
if (i > count) {
free(buffer, M_TEMP);
goto restart;
}
count = i;
sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
/*
* Insert stream header.
*/
bzero(&ush, sizeof(ush));
ush.ush_version = UMA_STREAM_VERSION;
ush.ush_maxcpus = MAXCPU;
ush.ush_count = count;
if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
mtx_unlock(&uma_mtx);
error = ENOMEM;
goto out;
}
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
bzero(&uth, sizeof(uth));
ZONE_LOCK(z);
strlcpy(uth.uth_name, z->uz_name, UMA_MAX_NAME);
uth.uth_align = kz->uk_align;
uth.uth_pages = kz->uk_pages;
uth.uth_keg_free = kz->uk_free;
uth.uth_size = kz->uk_size;
uth.uth_rsize = kz->uk_rsize;
uth.uth_maxpages = kz->uk_maxpages;
if (kz->uk_ppera > 1)
uth.uth_limit = kz->uk_maxpages /
kz->uk_ppera;
else
uth.uth_limit = kz->uk_maxpages *
kz->uk_ipers;
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
uth.uth_zone_free += bucket->ub_cnt;
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
ZONE_UNLOCK(z);
if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
mtx_unlock(&uma_mtx);
error = ENOMEM;
goto out;
}
/*
* XXXRW: Should not access bucket fields from
* non-local CPU. Instead need to modify the caches
* to directly maintain these statistics so we don't
* have to.
*/
for (i = 0; i < MAXCPU; i++) {
bzero(&ups, sizeof(ups));
if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
goto skip;
cache = &z->uz_cpu[i];
if (cache->uc_allocbucket != NULL)
ups.ups_cache_free +=
cache->uc_allocbucket->ub_cnt;
if (cache->uc_freebucket != NULL)
ups.ups_cache_free +=
cache->uc_freebucket->ub_cnt;
ups.ups_allocs = cache->uc_allocs;
ups.ups_frees = cache->uc_frees;
skip:
if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
mtx_unlock(&uma_mtx);
error = ENOMEM;
goto out;
}
}
}
}
mtx_unlock(&uma_mtx);
sbuf_finish(&sbuf);
error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
out:
free(buffer, M_TEMP);
return (error);
}

View File

@ -304,7 +304,7 @@ struct uma_zone {
uma_fini uz_fini; /* Discards memory */
u_int64_t uz_allocs; /* Total number of allocations */
u_int64_t uz_frees; /* total number of frees */
u_int64_t uz_frees; /* Total number of frees */
uint16_t uz_fills; /* Outstanding bucket fills */
uint16_t uz_count; /* Highest value ub_ptr can have */