Modify UMA to use critical sections to protect per-CPU caches, rather than
mutexes, which offers lower overhead on both UP and SMP. When allocating from or freeing to the per-cpu cache, without INVARIANTS enabled, we now no longer perform any mutex operations, which offers a 1%-3% performance improvement in a variety of micro-benchmarks. We rely on critical sections to prevent (a) preemption resulting in reentrant access to UMA on a single CPU, and (b) migration of the thread during access. In the event we need to go back to the zone for a new bucket, we release the critical section to acquire the global zone mutex, and must re-acquire the critical section and re-evaluate which cache we are accessing in case migration has occured, or circumstances have changed in the current cache. Per-CPU cache statistics are now gathered lock-free by the sysctl, which can result in small races in statistics reporting for caches. Reviewed by: bmilekic, jeff (somewhat) Tested by: rwatson, kris, gnn, scottl, mike at sentex dot net, others
This commit is contained in:
parent
76b17ed5dc
commit
bb1e0b257a
@ -1,4 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2004-2005 Robert N. M. Watson
|
||||
* Copyright (c) 2004, 2005,
|
||||
* Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
|
||||
* Copyright (c) 2002, 2003, 2004, 2005,
|
||||
@ -119,9 +120,6 @@ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
|
||||
/* This mutex protects the keg list */
|
||||
static struct mtx uma_mtx;
|
||||
|
||||
/* These are the pcpu cache locks */
|
||||
static struct mtx uma_pcpu_mtx[MAXCPU];
|
||||
|
||||
/* Linked list of boot time pages */
|
||||
static LIST_HEAD(,uma_slab) uma_boot_pages =
|
||||
LIST_HEAD_INITIALIZER(&uma_boot_pages);
|
||||
@ -384,40 +382,11 @@ static void
|
||||
zone_timeout(uma_zone_t zone)
|
||||
{
|
||||
uma_keg_t keg;
|
||||
uma_cache_t cache;
|
||||
u_int64_t alloc;
|
||||
int cpu;
|
||||
|
||||
keg = zone->uz_keg;
|
||||
alloc = 0;
|
||||
|
||||
/*
|
||||
* Aggregate per cpu cache statistics back to the zone.
|
||||
*
|
||||
* XXX This should be done in the sysctl handler.
|
||||
*
|
||||
* I may rewrite this to set a flag in the per cpu cache instead of
|
||||
* locking. If the flag is not cleared on the next round I will have
|
||||
* to lock and do it here instead so that the statistics don't get too
|
||||
* far out of sync.
|
||||
*/
|
||||
if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (CPU_ABSENT(cpu))
|
||||
continue;
|
||||
CPU_LOCK(cpu);
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
/* Add them up, and reset */
|
||||
alloc += cache->uc_allocs;
|
||||
cache->uc_allocs = 0;
|
||||
CPU_UNLOCK(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now push these stats back into the zone.. */
|
||||
ZONE_LOCK(zone);
|
||||
zone->uz_allocs += alloc;
|
||||
|
||||
/*
|
||||
* Expand the zone hash table.
|
||||
*
|
||||
@ -425,7 +394,7 @@ zone_timeout(uma_zone_t zone)
|
||||
* What I'm trying to do here is completely reduce collisions. This
|
||||
* may be a little aggressive. Should I allow for two collisions max?
|
||||
*/
|
||||
|
||||
ZONE_LOCK(zone);
|
||||
if (keg->uk_flags & UMA_ZONE_HASH &&
|
||||
keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
|
||||
struct uma_hash newhash;
|
||||
@ -613,6 +582,10 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
|
||||
/*
|
||||
* Drains the per cpu caches for a zone.
|
||||
*
|
||||
* NOTE: This may only be called while the zone is being turn down, and not
|
||||
* during normal operation. This is necessary in order that we do not have
|
||||
* to migrate CPUs to drain the per-CPU caches.
|
||||
*
|
||||
* Arguments:
|
||||
* zone The zone to drain, must be unlocked.
|
||||
*
|
||||
@ -626,12 +599,20 @@ cache_drain(uma_zone_t zone)
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* We have to lock each cpu cache before locking the zone
|
||||
* XXX: It is safe to not lock the per-CPU caches, because we're
|
||||
* tearing down the zone anyway. I.e., there will be no further use
|
||||
* of the caches at this point.
|
||||
*
|
||||
* XXX: It would good to be able to assert that the zone is being
|
||||
* torn down to prevent improper use of cache_drain().
|
||||
*
|
||||
* XXX: We lock the zone before passing into bucket_cache_drain() as
|
||||
* it is used elsewhere. Should the tear-down path be made special
|
||||
* there in some form?
|
||||
*/
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (CPU_ABSENT(cpu))
|
||||
continue;
|
||||
CPU_LOCK(cpu);
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
bucket_drain(zone, cache->uc_allocbucket);
|
||||
bucket_drain(zone, cache->uc_freebucket);
|
||||
@ -644,11 +625,6 @@ cache_drain(uma_zone_t zone)
|
||||
ZONE_LOCK(zone);
|
||||
bucket_cache_drain(zone);
|
||||
ZONE_UNLOCK(zone);
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (CPU_ABSENT(cpu))
|
||||
continue;
|
||||
CPU_UNLOCK(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -828,7 +804,8 @@ slab_zalloc(uma_zone_t zone, int wait)
|
||||
&flags, wait);
|
||||
if (mem == NULL) {
|
||||
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
|
||||
uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
|
||||
uma_zfree_internal(keg->uk_slabzone, slab, NULL,
|
||||
SKIP_NONE);
|
||||
ZONE_LOCK(zone);
|
||||
return (NULL);
|
||||
}
|
||||
@ -1643,10 +1620,6 @@ uma_startup(void *bootmem)
|
||||
#ifdef UMA_DEBUG
|
||||
printf("Initializing pcpu cache locks.\n");
|
||||
#endif
|
||||
/* Initialize the pcpu cache lock set once and for all */
|
||||
for (i = 0; i <= mp_maxid; i++)
|
||||
CPU_LOCK_INIT(i);
|
||||
|
||||
#ifdef UMA_DEBUG
|
||||
printf("Creating slab and hash zones.\n");
|
||||
#endif
|
||||
@ -1827,9 +1800,20 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If possible, allocate from the per-CPU cache. There are two
|
||||
* requirements for safe access to the per-CPU cache: (1) the thread
|
||||
* accessing the cache must not be preempted or yield during access,
|
||||
* and (2) the thread must not migrate CPUs without switching which
|
||||
* cache it accesses. We rely on a critical section to prevent
|
||||
* preemption and migration. We release the critical section in
|
||||
* order to acquire the zone mutex if we are unable to allocate from
|
||||
* the current cache; when we re-acquire the critical section, we
|
||||
* must detect and handle migration if it has occurred.
|
||||
*/
|
||||
zalloc_restart:
|
||||
cpu = PCPU_GET(cpuid);
|
||||
CPU_LOCK(cpu);
|
||||
critical_enter();
|
||||
cpu = curcpu;
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
|
||||
zalloc_start:
|
||||
@ -1845,12 +1829,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
||||
KASSERT(item != NULL,
|
||||
("uma_zalloc: Bucket pointer mangled."));
|
||||
cache->uc_allocs++;
|
||||
critical_exit();
|
||||
#ifdef INVARIANTS
|
||||
ZONE_LOCK(zone);
|
||||
uma_dbg_alloc(zone, NULL, item);
|
||||
ZONE_UNLOCK(zone);
|
||||
#endif
|
||||
CPU_UNLOCK(cpu);
|
||||
if (zone->uz_ctor != NULL) {
|
||||
if (zone->uz_ctor(item, zone->uz_keg->uk_size,
|
||||
udata, flags) != 0) {
|
||||
@ -1880,7 +1864,33 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Attempt to retrieve the item from the per-CPU cache has failed, so
|
||||
* we must go back to the zone. This requires the zone lock, so we
|
||||
* must drop the critical section, then re-acquire it when we go back
|
||||
* to the cache. Since the critical section is released, we may be
|
||||
* preempted or migrate. As such, make sure not to maintain any
|
||||
* thread-local state specific to the cache from prior to releasing
|
||||
* the critical section.
|
||||
*/
|
||||
critical_exit();
|
||||
ZONE_LOCK(zone);
|
||||
critical_enter();
|
||||
cpu = curcpu;
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
bucket = cache->uc_allocbucket;
|
||||
if (bucket != NULL) {
|
||||
if (bucket->ub_cnt > 0) {
|
||||
ZONE_UNLOCK(zone);
|
||||
goto zalloc_start;
|
||||
}
|
||||
bucket = cache->uc_freebucket;
|
||||
if (bucket != NULL && bucket->ub_cnt > 0) {
|
||||
ZONE_UNLOCK(zone);
|
||||
goto zalloc_start;
|
||||
}
|
||||
}
|
||||
|
||||
/* Since we have locked the zone we may as well send back our stats */
|
||||
zone->uz_allocs += cache->uc_allocs;
|
||||
cache->uc_allocs = 0;
|
||||
@ -1904,8 +1914,8 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
||||
ZONE_UNLOCK(zone);
|
||||
goto zalloc_start;
|
||||
}
|
||||
/* We are no longer associated with this cpu!!! */
|
||||
CPU_UNLOCK(cpu);
|
||||
/* We are no longer associated with this CPU. */
|
||||
critical_exit();
|
||||
|
||||
/* Bump up our uz_count so we get here less */
|
||||
if (zone->uz_count < BUCKET_MAX)
|
||||
@ -2228,10 +2238,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
uma_bucket_t bucket;
|
||||
int bflags;
|
||||
int cpu;
|
||||
enum zfreeskip skip;
|
||||
|
||||
/* This is the fast path free */
|
||||
skip = SKIP_NONE;
|
||||
keg = zone->uz_keg;
|
||||
|
||||
#ifdef UMA_DEBUG_ALLOC_1
|
||||
@ -2240,22 +2247,37 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
|
||||
zone->uz_name);
|
||||
|
||||
if (zone->uz_dtor)
|
||||
zone->uz_dtor(item, keg->uk_size, udata);
|
||||
#ifdef INVARIANTS
|
||||
ZONE_LOCK(zone);
|
||||
if (keg->uk_flags & UMA_ZONE_MALLOC)
|
||||
uma_dbg_free(zone, udata, item);
|
||||
else
|
||||
uma_dbg_free(zone, NULL, item);
|
||||
ZONE_UNLOCK(zone);
|
||||
#endif
|
||||
/*
|
||||
* The race here is acceptable. If we miss it we'll just have to wait
|
||||
* a little longer for the limits to be reset.
|
||||
*/
|
||||
|
||||
if (keg->uk_flags & UMA_ZFLAG_FULL)
|
||||
goto zfree_internal;
|
||||
|
||||
if (zone->uz_dtor) {
|
||||
zone->uz_dtor(item, keg->uk_size, udata);
|
||||
skip = SKIP_DTOR;
|
||||
}
|
||||
|
||||
/*
|
||||
* If possible, free to the per-CPU cache. There are two
|
||||
* requirements for safe access to the per-CPU cache: (1) the thread
|
||||
* accessing the cache must not be preempted or yield during access,
|
||||
* and (2) the thread must not migrate CPUs without switching which
|
||||
* cache it accesses. We rely on a critical section to prevent
|
||||
* preemption and migration. We release the critical section in
|
||||
* order to acquire the zone mutex if we are unable to free to the
|
||||
* current cache; when we re-acquire the critical section, we must
|
||||
* detect and handle migration if it has occurred.
|
||||
*/
|
||||
zfree_restart:
|
||||
cpu = PCPU_GET(cpuid);
|
||||
CPU_LOCK(cpu);
|
||||
critical_enter();
|
||||
cpu = curcpu;
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
|
||||
zfree_start:
|
||||
@ -2272,15 +2294,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
("uma_zfree: Freeing to non free bucket index."));
|
||||
bucket->ub_bucket[bucket->ub_cnt] = item;
|
||||
bucket->ub_cnt++;
|
||||
#ifdef INVARIANTS
|
||||
ZONE_LOCK(zone);
|
||||
if (keg->uk_flags & UMA_ZONE_MALLOC)
|
||||
uma_dbg_free(zone, udata, item);
|
||||
else
|
||||
uma_dbg_free(zone, NULL, item);
|
||||
ZONE_UNLOCK(zone);
|
||||
#endif
|
||||
CPU_UNLOCK(cpu);
|
||||
critical_exit();
|
||||
return;
|
||||
} else if (cache->uc_allocbucket) {
|
||||
#ifdef UMA_DEBUG_ALLOC
|
||||
@ -2304,9 +2318,32 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
*
|
||||
* 1) The buckets are NULL
|
||||
* 2) The alloc and free buckets are both somewhat full.
|
||||
*
|
||||
* We must go back the zone, which requires acquiring the zone lock,
|
||||
* which in turn means we must release and re-acquire the critical
|
||||
* section. Since the critical section is released, we may be
|
||||
* preempted or migrate. As such, make sure not to maintain any
|
||||
* thread-local state specific to the cache from prior to releasing
|
||||
* the critical section.
|
||||
*/
|
||||
|
||||
critical_exit();
|
||||
ZONE_LOCK(zone);
|
||||
critical_enter();
|
||||
cpu = curcpu;
|
||||
cache = &zone->uz_cpu[cpu];
|
||||
if (cache->uc_freebucket != NULL) {
|
||||
if (cache->uc_freebucket->ub_cnt <
|
||||
cache->uc_freebucket->ub_entries) {
|
||||
ZONE_UNLOCK(zone);
|
||||
goto zfree_start;
|
||||
}
|
||||
if (cache->uc_allocbucket != NULL &&
|
||||
(cache->uc_allocbucket->ub_cnt <
|
||||
cache->uc_freebucket->ub_cnt)) {
|
||||
ZONE_UNLOCK(zone);
|
||||
goto zfree_start;
|
||||
}
|
||||
}
|
||||
|
||||
bucket = cache->uc_freebucket;
|
||||
cache->uc_freebucket = NULL;
|
||||
@ -2328,8 +2365,8 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
cache->uc_freebucket = bucket;
|
||||
goto zfree_start;
|
||||
}
|
||||
/* We're done with this CPU now */
|
||||
CPU_UNLOCK(cpu);
|
||||
/* We are no longer associated with this CPU. */
|
||||
critical_exit();
|
||||
|
||||
/* And the zone.. */
|
||||
ZONE_UNLOCK(zone);
|
||||
@ -2353,26 +2390,8 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
/*
|
||||
* If nothing else caught this, we'll just do an internal free.
|
||||
*/
|
||||
|
||||
zfree_internal:
|
||||
|
||||
#ifdef INVARIANTS
|
||||
/*
|
||||
* If we need to skip the dtor and the uma_dbg_free in
|
||||
* uma_zfree_internal because we've already called the dtor
|
||||
* above, but we ended up here, then we need to make sure
|
||||
* that we take care of the uma_dbg_free immediately.
|
||||
*/
|
||||
if (skip) {
|
||||
ZONE_LOCK(zone);
|
||||
if (keg->uk_flags & UMA_ZONE_MALLOC)
|
||||
uma_dbg_free(zone, udata, item);
|
||||
else
|
||||
uma_dbg_free(zone, NULL, item);
|
||||
ZONE_UNLOCK(zone);
|
||||
}
|
||||
#endif
|
||||
uma_zfree_internal(zone, item, udata, skip);
|
||||
uma_zfree_internal(zone, item, udata, SKIP_DTOR);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -2655,7 +2674,7 @@ uma_large_malloc(int size, int wait)
|
||||
slab->us_flags = flags | UMA_SLAB_MALLOC;
|
||||
slab->us_size = size;
|
||||
} else {
|
||||
uma_zfree_internal(slabzone, slab, NULL, 0);
|
||||
uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
|
||||
}
|
||||
|
||||
return (mem);
|
||||
@ -2666,7 +2685,7 @@ uma_large_free(uma_slab_t slab)
|
||||
{
|
||||
vsetobj((vm_offset_t)slab->us_data, kmem_object);
|
||||
page_free(slab->us_data, slab->us_size, slab->us_flags);
|
||||
uma_zfree_internal(slabzone, slab, NULL, 0);
|
||||
uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
|
||||
}
|
||||
|
||||
void
|
||||
@ -2743,6 +2762,7 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
|
||||
int cachefree;
|
||||
uma_bucket_t bucket;
|
||||
uma_cache_t cache;
|
||||
u_int64_t alloc;
|
||||
|
||||
cnt = 0;
|
||||
mtx_lock(&uma_mtx);
|
||||
@ -2766,15 +2786,9 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
|
||||
LIST_FOREACH(z, &zk->uk_zones, uz_link) {
|
||||
if (cnt == 0) /* list may have changed size */
|
||||
break;
|
||||
if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (CPU_ABSENT(cpu))
|
||||
continue;
|
||||
CPU_LOCK(cpu);
|
||||
}
|
||||
}
|
||||
ZONE_LOCK(z);
|
||||
cachefree = 0;
|
||||
alloc = 0;
|
||||
if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (CPU_ABSENT(cpu))
|
||||
@ -2784,9 +2798,12 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
|
||||
cachefree += cache->uc_allocbucket->ub_cnt;
|
||||
if (cache->uc_freebucket != NULL)
|
||||
cachefree += cache->uc_freebucket->ub_cnt;
|
||||
CPU_UNLOCK(cpu);
|
||||
alloc += cache->uc_allocs;
|
||||
cache->uc_allocs = 0;
|
||||
}
|
||||
}
|
||||
alloc += z->uz_allocs;
|
||||
|
||||
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
|
||||
cachefree += bucket->ub_cnt;
|
||||
}
|
||||
@ -2797,7 +2814,7 @@ sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
|
||||
zk->uk_maxpages * zk->uk_ipers,
|
||||
(zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
|
||||
totalfree,
|
||||
(unsigned long long)z->uz_allocs);
|
||||
(unsigned long long)alloc);
|
||||
ZONE_UNLOCK(z);
|
||||
for (p = offset + 12; p > offset && *p == ' '; --p)
|
||||
/* nothing */ ;
|
||||
|
@ -342,16 +342,6 @@ void uma_large_free(uma_slab_t slab);
|
||||
#define ZONE_LOCK(z) mtx_lock((z)->uz_lock)
|
||||
#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock)
|
||||
|
||||
#define CPU_LOCK_INIT(cpu) \
|
||||
mtx_init(&uma_pcpu_mtx[(cpu)], "UMA pcpu", "UMA pcpu", \
|
||||
MTX_DEF | MTX_DUPOK)
|
||||
|
||||
#define CPU_LOCK(cpu) \
|
||||
mtx_lock(&uma_pcpu_mtx[(cpu)])
|
||||
|
||||
#define CPU_UNLOCK(cpu) \
|
||||
mtx_unlock(&uma_pcpu_mtx[(cpu)])
|
||||
|
||||
/*
|
||||
* Find a slab within a hash table. This is used for OFFPAGE zones to lookup
|
||||
* the slab structure.
|
||||
|
Loading…
Reference in New Issue
Block a user