Modify UMA to use critical sections to protect per-CPU caches, rather than

mutexes, which offers lower overhead on both UP and SMP.  When allocating
from or freeing to the per-cpu cache, without INVARIANTS enabled, we now
no longer perform any mutex operations, which offers a 1%-3% performance
improvement in a variety of micro-benchmarks.  We rely on critical
sections to prevent (a) preemption resulting in reentrant access to UMA on
a single CPU, and (b) migration of the thread during access.  In the event
we need to go back to the zone for a new bucket, we release the critical
section to acquire the global zone mutex, and must re-acquire the critical
section and re-evaluate which cache we are accessing in case migration has
occured, or circumstances have changed in the current cache.

Per-CPU cache statistics are now gathered lock-free by the sysctl, which
can result in small races in statistics reporting for caches.

Reviewed by:	bmilekic, jeff (somewhat)
Tested by:	rwatson, kris, gnn, scottl, mike at sentex dot net, others
This commit is contained in:
Robert Watson 2005-04-29 18:56:36 +00:00
parent 97c1144eb2
commit 5d1ae027f0
2 changed files with 120 additions and 113 deletions

View File

@ -1,4 +1,5 @@
/*-
* Copyright (c) 2004-2005 Robert N. M. Watson
* Copyright (c) 2004, 2005,
* Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
* Copyright (c) 2002, 2003, 2004, 2005,
@ -119,9 +120,6 @@ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
/* This mutex protects the keg list */
static struct mtx uma_mtx;
/* These are the pcpu cache locks */
static struct mtx uma_pcpu_mtx[MAXCPU];
/* Linked list of boot time pages */
static LIST_HEAD(,uma_slab) uma_boot_pages =
LIST_HEAD_INITIALIZER(&uma_boot_pages);
@ -384,40 +382,11 @@ static void
zone_timeout(uma_zone_t zone)
{
uma_keg_t keg;
uma_cache_t cache;
u_int64_t alloc;
int cpu;
keg = zone->uz_keg;
alloc = 0;
/*
* Aggregate per cpu cache statistics back to the zone.
*
* XXX This should be done in the sysctl handler.
*
* I may rewrite this to set a flag in the per cpu cache instead of
* locking. If the flag is not cleared on the next round I will have
* to lock and do it here instead so that the statistics don't get too
* far out of sync.
*/
if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
CPU_LOCK(cpu);
cache = &zone->uz_cpu[cpu];
/* Add them up, and reset */
alloc += cache->uc_allocs;
cache->uc_allocs = 0;
CPU_UNLOCK(cpu);
}
}
/* Now push these stats back into the zone.. */
ZONE_LOCK(zone);
zone->uz_allocs += alloc;
/*
* Expand the zone hash table.
*
@ -425,7 +394,7 @@ zone_timeout(uma_zone_t zone)
* What I'm trying to do here is completely reduce collisions. This
* may be a little aggressive. Should I allow for two collisions max?
*/
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_HASH &&
keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
struct uma_hash newhash;
@ -613,6 +582,10 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
/*
* Drains the per cpu caches for a zone.
*
* NOTE: This may only be called while the zone is being turn down, and not
* during normal operation. This is necessary in order that we do not have
* to migrate CPUs to drain the per-CPU caches.
*
* Arguments:
* zone The zone to drain, must be unlocked.
*
@ -626,12 +599,20 @@ cache_drain(uma_zone_t zone)
int cpu;
/*
* We have to lock each cpu cache before locking the zone
* XXX: It is safe to not lock the per-CPU caches, because we're
* tearing down the zone anyway. I.e., there will be no further use
* of the caches at this point.
*
* XXX: It would good to be able to assert that the zone is being
* torn down to prevent improper use of cache_drain().
*
* XXX: We lock the zone before passing into bucket_cache_drain() as
* it is used elsewhere. Should the tear-down path be made special
* there in some form?
*/
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
CPU_LOCK(cpu);
cache = &zone->uz_cpu[cpu];
bucket_drain(zone, cache->uc_allocbucket);
bucket_drain(zone, cache->uc_freebucket);
@ -644,11 +625,6 @@ cache_drain(uma_zone_t zone)
ZONE_LOCK(zone);
bucket_cache_drain(zone);
ZONE_UNLOCK(zone);
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
CPU_UNLOCK(cpu);
}
}
/*
@ -828,7 +804,8 @@ slab_zalloc(uma_zone_t zone, int wait)
&flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
uma_zfree_internal(keg->uk_slabzone, slab, NULL,
SKIP_NONE);
ZONE_LOCK(zone);
return (NULL);
}
@ -1643,10 +1620,6 @@ uma_startup(void *bootmem)
#ifdef UMA_DEBUG
printf("Initializing pcpu cache locks.\n");
#endif
/* Initialize the pcpu cache lock set once and for all */
for (i = 0; i <= mp_maxid; i++)
CPU_LOCK_INIT(i);
#ifdef UMA_DEBUG
printf("Creating slab and hash zones.\n");
#endif
@ -1827,9 +1800,20 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
}
}
/*
* If possible, allocate from the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
* accessing the cache must not be preempted or yield during access,
* and (2) the thread must not migrate CPUs without switching which
* cache it accesses. We rely on a critical section to prevent
* preemption and migration. We release the critical section in
* order to acquire the zone mutex if we are unable to allocate from
* the current cache; when we re-acquire the critical section, we
* must detect and handle migration if it has occurred.
*/
zalloc_restart:
cpu = PCPU_GET(cpuid);
CPU_LOCK(cpu);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
zalloc_start:
@ -1845,12 +1829,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
KASSERT(item != NULL,
("uma_zalloc: Bucket pointer mangled."));
cache->uc_allocs++;
critical_exit();
#ifdef INVARIANTS
ZONE_LOCK(zone);
uma_dbg_alloc(zone, NULL, item);
ZONE_UNLOCK(zone);
#endif
CPU_UNLOCK(cpu);
if (zone->uz_ctor != NULL) {
if (zone->uz_ctor(item, zone->uz_keg->uk_size,
udata, flags) != 0) {
@ -1880,7 +1864,33 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
}
}
}
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
* we must go back to the zone. This requires the zone lock, so we
* must drop the critical section, then re-acquire it when we go back
* to the cache. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
critical_exit();
ZONE_LOCK(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
bucket = cache->uc_allocbucket;
if (bucket != NULL) {
if (bucket->ub_cnt > 0) {
ZONE_UNLOCK(zone);
goto zalloc_start;
}
bucket = cache->uc_freebucket;
if (bucket != NULL && bucket->ub_cnt > 0) {
ZONE_UNLOCK(zone);
goto zalloc_start;
}
}
/* Since we have locked the zone we may as well send back our stats */
zone->uz_allocs += cache->uc_allocs;
cache->uc_allocs = 0;
@ -1904,8 +1914,8 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
ZONE_UNLOCK(zone);
goto zalloc_start;
}
/* We are no longer associated with this cpu!!! */
CPU_UNLOCK(cpu);
/* We are no longer associated with this CPU. */
critical_exit();
/* Bump up our uz_count so we get here less */
if (zone->uz_count < BUCKET_MAX)
@ -2228,10 +2238,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
uma_bucket_t bucket;
int bflags;
int cpu;
enum zfreeskip skip;
/* This is the fast path free */
skip = SKIP_NONE;
keg = zone->uz_keg;
#ifdef UMA_DEBUG_ALLOC_1
@ -2240,22 +2247,37 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
zone->uz_name);
if (zone->uz_dtor)
zone->uz_dtor(item, keg->uk_size, udata);
#ifdef INVARIANTS
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
ZONE_UNLOCK(zone);
#endif
/*
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
if (keg->uk_flags & UMA_ZFLAG_FULL)
goto zfree_internal;
if (zone->uz_dtor) {
zone->uz_dtor(item, keg->uk_size, udata);
skip = SKIP_DTOR;
}
/*
* If possible, free to the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
* accessing the cache must not be preempted or yield during access,
* and (2) the thread must not migrate CPUs without switching which
* cache it accesses. We rely on a critical section to prevent
* preemption and migration. We release the critical section in
* order to acquire the zone mutex if we are unable to free to the
* current cache; when we re-acquire the critical section, we must
* detect and handle migration if it has occurred.
*/
zfree_restart:
cpu = PCPU_GET(cpuid);
CPU_LOCK(cpu);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
zfree_start:
@ -2272,15 +2294,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
("uma_zfree: Freeing to non free bucket index."));
bucket->ub_bucket[bucket->ub_cnt] = item;
bucket->ub_cnt++;
#ifdef INVARIANTS
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
ZONE_UNLOCK(zone);
#endif
CPU_UNLOCK(cpu);
critical_exit();
return;
} else if (cache->uc_allocbucket) {
#ifdef UMA_DEBUG_ALLOC
@ -2304,9 +2318,32 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
*
* 1) The buckets are NULL
* 2) The alloc and free buckets are both somewhat full.
*
* We must go back the zone, which requires acquiring the zone lock,
* which in turn means we must release and re-acquire the critical
* section. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
critical_exit();
ZONE_LOCK(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
if (cache->uc_freebucket != NULL) {
if (cache->uc_freebucket->ub_cnt <
cache->uc_freebucket->ub_entries) {
ZONE_UNLOCK(zone);
goto zfree_start;
}
if (cache->uc_allocbucket != NULL &&
(cache->uc_allocbucket->ub_cnt <
cache->uc_freebucket->ub_cnt)) {
ZONE_UNLOCK(zone);
goto zfree_start;
}
}
bucket = cache->uc_freebucket;
cache->uc_freebucket = NULL;
@ -2328,8 +2365,8 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
cache->uc_freebucket = bucket;
goto zfree_start;
}
/* We're done with this CPU now */
CPU_UNLOCK(cpu);
/* We are no longer associated with this CPU. */
critical_exit();
/* And the zone.. */
ZONE_UNLOCK(zone);
@ -2353,26 +2390,8 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
/*
* If nothing else caught this, we'll just do an internal free.
*/
zfree_internal:
#ifdef INVARIANTS
/*
* If we need to skip the dtor and the uma_dbg_free in
* uma_zfree_internal because we've already called the dtor
* above, but we ended up here, then we need to make sure
* that we take care of the uma_dbg_free immediately.
*/
if (skip) {
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
ZONE_UNLOCK(zone);
}
#endif
uma_zfree_internal(zone, item, udata, skip);
uma_zfree_internal(zone, item, udata, SKIP_DTOR);
return;
}
@ -2655,7 +2674,7 @@ uma_large_malloc(int size, int wait)
slab->us_flags = flags | UMA_SLAB_MALLOC;
slab->us_size = size;
} else {
uma_zfree_internal(slabzone, slab, NULL, 0);
uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
}
return (mem);
@ -2666,7 +2685,7 @@ uma_large_free(uma_slab_t slab)
{
vsetobj((vm_offset_t)slab->us_data, kmem_object);
page_free(slab->us_data, slab->us_size, slab->us_flags);
uma_zfree_internal(slabzone, slab, NULL, 0);
uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
}
void
@ -2743,6 +2762,7 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
int cachefree;
uma_bucket_t bucket;
uma_cache_t cache;
u_int64_t alloc;
cnt = 0;
mtx_lock(&uma_mtx);
@ -2766,15 +2786,9 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
LIST_FOREACH(z, &zk->uk_zones, uz_link) {
if (cnt == 0) /* list may have changed size */
break;
if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
CPU_LOCK(cpu);
}
}
ZONE_LOCK(z);
cachefree = 0;
alloc = 0;
if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
@ -2784,9 +2798,12 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
cachefree += cache->uc_allocbucket->ub_cnt;
if (cache->uc_freebucket != NULL)
cachefree += cache->uc_freebucket->ub_cnt;
CPU_UNLOCK(cpu);
alloc += cache->uc_allocs;
cache->uc_allocs = 0;
}
}
alloc += z->uz_allocs;
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
cachefree += bucket->ub_cnt;
}
@ -2797,7 +2814,7 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
zk->uk_maxpages * zk->uk_ipers,
(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
totalfree,
(unsigned long long)z->uz_allocs);
(unsigned long long)alloc);
ZONE_UNLOCK(z);
for (p = offset + 12; p > offset && *p == ' '; --p)
/* nothing */ ;

View File

@ -342,16 +342,6 @@ void uma_large_free(uma_slab_t slab);
#define ZONE_LOCK(z) mtx_lock((z)->uz_lock)
#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock)
#define CPU_LOCK_INIT(cpu) \
mtx_init(&uma_pcpu_mtx[(cpu)], "UMA pcpu", "UMA pcpu", \
MTX_DEF | MTX_DUPOK)
#define CPU_LOCK(cpu) \
mtx_lock(&uma_pcpu_mtx[(cpu)])
#define CPU_UNLOCK(cpu) \
mtx_unlock(&uma_pcpu_mtx[(cpu)])
/*
* Find a slab within a hash table. This is used for OFFPAGE zones to lookup
* the slab structure.