UMA NUMA flag day. UMA_ZONE_NUMA was a source of confusion. Make the names

more consistent with other NUMA features as UMA_ZONE_FIRSTTOUCH and
UMA_ZONE_ROUNDROBIN.  The system will now pick a select a default depending
on kernel configuration.  API users need only specify one if they want to
override the default.

Remove the UMA_XDOMAIN and UMA_FIRSTTOUCH kernel options and key only off
of NUMA.  XDOMAIN is now fast enough in all cases to enable whenever NUMA
is.

Reviewed by:	markj
Discussed with:	rlibby
Differential Revision:	https://reviews.freebsd.org/D22831
This commit is contained in:
Jeff Roberson 2020-01-04 18:48:13 +00:00
parent 91d947bfbe
commit dfe13344f5
5 changed files with 72 additions and 68 deletions

View File

@ -621,8 +621,6 @@ NO_SWAPPING opt_vm.h
MALLOC_MAKE_FAILURES opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h
MALLOC_PROFILE opt_vm.h MALLOC_PROFILE opt_vm.h
MALLOC_DEBUG_MAXZONES opt_vm.h MALLOC_DEBUG_MAXZONES opt_vm.h
UMA_XDOMAIN opt_vm.h
UMA_FIRSTTOUCH opt_vm.h
# The MemGuard replacement allocator used for tamper-after-free detection # The MemGuard replacement allocator used for tamper-after-free detection
DEBUG_MEMGUARD opt_vm.h DEBUG_MEMGUARD opt_vm.h

View File

@ -268,11 +268,9 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
#define UMA_ZONE_PCPU 0x8000 /* #define UMA_ZONE_PCPU 0x8000 /*
* Allocates mp_maxid + 1 slabs of PAGE_SIZE * Allocates mp_maxid + 1 slabs of PAGE_SIZE
*/ */
#define UMA_ZONE_NUMA 0x10000 /* #define UMA_ZONE_MINBUCKET 0x10000 /* Use smallest buckets. */
* NUMA aware Zone. Implements a best #define UMA_ZONE_FIRSTTOUCH 0x20000 /* First touch NUMA policy */
* effort first-touch policy. #define UMA_ZONE_ROUNDROBIN 0x40000 /* Round-robin NUMA policy. */
*/
#define UMA_ZONE_MINBUCKET 0x20000 /* Use smallest buckets. */
/* /*
* These flags are shared between the keg and zone. In zones wishing to add * These flags are shared between the keg and zone. In zones wishing to add
@ -281,7 +279,8 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
*/ */
#define UMA_ZONE_INHERIT \ #define UMA_ZONE_INHERIT \
(UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \ (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | UMA_ZONE_NUMA) UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | \
UMA_ZONE_FIRSTTOUCH | UMA_ZONE_ROUNDROBIN)
/* Definitions for align */ /* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */

View File

@ -360,7 +360,8 @@ bucket_init(void)
size += sizeof(void *) * ubz->ubz_entries; size += sizeof(void *) * ubz->ubz_entries;
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA); UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET |
UMA_ZONE_FIRSTTOUCH);
} }
} }
@ -387,11 +388,9 @@ bucket_zone_max(uma_zone_t zone, int nitems)
int bpcpu; int bpcpu;
bpcpu = 2; bpcpu = 2;
#ifdef UMA_XDOMAIN if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
/* Count the cross-domain bucket. */ /* Count the cross-domain bucket. */
bpcpu++; bpcpu++;
#endif
for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems) if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
@ -637,7 +636,7 @@ cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b)
cache_bucket_load(&cache->uc_freebucket, b); cache_bucket_load(&cache->uc_freebucket, b);
} }
#ifdef UMA_XDOMAIN #ifdef NUMA
static inline void static inline void
cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b) cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
{ {
@ -999,7 +998,7 @@ cache_drain_safe_cpu(uma_zone_t zone, void *unused)
b1 = b2 = b3 = NULL; b1 = b2 = b3 = NULL;
ZONE_LOCK(zone); ZONE_LOCK(zone);
critical_enter(); critical_enter();
if (zone->uz_flags & UMA_ZONE_NUMA) if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
domain = PCPU_GET(domain); domain = PCPU_GET(domain);
else else
domain = 0; domain = 0;
@ -1905,8 +1904,8 @@ keg_ctor(void *mem, int size, void *udata, int flags)
/* /*
* We use a global round-robin policy by default. Zones with * We use a global round-robin policy by default. Zones with
* UMA_ZONE_NUMA set will use first-touch instead, in which case the * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which
* iterator is never run. * case the iterator is never run.
*/ */
keg->uk_dr.dr_policy = DOMAINSET_RR(); keg->uk_dr.dr_policy = DOMAINSET_RR();
keg->uk_dr.dr_iter = 0; keg->uk_dr.dr_iter = 0;
@ -1943,12 +1942,18 @@ keg_ctor(void *mem, int size, void *udata, int flags)
} }
/* /*
* Sets all kegs with memory that comes from the page array to a * Use a first-touch NUMA policy for all kegs that pmap_extract()
* first-touch domain policy. * will work on with the exception of critical VM structures
* necessary for paging.
*
* Zones may override the default by specifying either.
*/ */
#ifdef UMA_FIRSTTOUCH #ifdef NUMA
if ((keg->uk_flags & UMA_ZONE_HASH) == 0) if ((keg->uk_flags &
keg->uk_flags |= UMA_ZONE_NUMA; (UMA_ZONE_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0)
keg->uk_flags |= UMA_ZONE_FIRSTTOUCH;
else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0)
keg->uk_flags |= UMA_ZONE_ROUNDROBIN;
#endif #endif
if (keg->uk_flags & UMA_ZONE_OFFPAGE) if (keg->uk_flags & UMA_ZONE_OFFPAGE)
@ -2154,7 +2159,7 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
*/ */
domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
OID_AUTO, "domain", CTLFLAG_RD, NULL, ""); OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
if ((zone->uz_flags & UMA_ZONE_NUMA) == 0) if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
domains = 1; domains = 1;
for (i = 0; i < domains; i++) { for (i = 0; i < domains; i++) {
zdom = &zone->uz_domain[i]; zdom = &zone->uz_domain[i];
@ -2994,7 +2999,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
/* /*
* We can not get a bucket so try to return a single item. * We can not get a bucket so try to return a single item.
*/ */
if (uz_flags & UMA_ZONE_NUMA) if (uz_flags & UMA_ZONE_FIRSTTOUCH)
domain = PCPU_GET(domain); domain = PCPU_GET(domain);
else else
domain = UMA_ANYDOMAIN; domain = UMA_ANYDOMAIN;
@ -3068,7 +3073,7 @@ cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
/* /*
* Check the zone's cache of buckets. * Check the zone's cache of buckets.
*/ */
if (zone->uz_flags & UMA_ZONE_NUMA) { if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) {
domain = PCPU_GET(domain); domain = PCPU_GET(domain);
zdom = &zone->uz_domain[domain]; zdom = &zone->uz_domain[domain];
} else { } else {
@ -3114,7 +3119,7 @@ cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
critical_enter(); critical_enter();
cache = &zone->uz_cpu[curcpu]; cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket.ucb_bucket == NULL && if (cache->uc_allocbucket.ucb_bucket == NULL &&
((zone->uz_flags & UMA_ZONE_NUMA) == 0 || ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0 ||
domain == PCPU_GET(domain))) { domain == PCPU_GET(domain))) {
cache_bucket_load_alloc(cache, bucket); cache_bucket_load_alloc(cache, bucket);
zdom->uzd_imax += bucket->ub_cnt; zdom->uzd_imax += bucket->ub_cnt;
@ -3338,7 +3343,7 @@ zone_import(void *arg, void **bucket, int max, int domain, int flags)
* produces more fragmentation and requires more cpu * produces more fragmentation and requires more cpu
* time but yields better distribution. * time but yields better distribution.
*/ */
if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 && if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 &&
vm_ndomains > 1 && --stripe == 0) vm_ndomains > 1 && --stripe == 0)
break; break;
#endif #endif
@ -3698,27 +3703,32 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
* detect and handle migration if it has occurred. * detect and handle migration if it has occurred.
*/ */
domain = itemdomain = 0; domain = itemdomain = 0;
#ifdef NUMA
if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
#endif
critical_enter(); critical_enter();
do { do {
cache = &zone->uz_cpu[curcpu]; cache = &zone->uz_cpu[curcpu];
bucket = &cache->uc_allocbucket; #ifdef NUMA
#ifdef UMA_XDOMAIN domain = PCPU_GET(domain);
if ((uz_flags & UMA_ZONE_NUMA) != 0) { if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item)); domain != itemdomain) {
domain = PCPU_GET(domain);
}
if ((uz_flags & UMA_ZONE_NUMA) != 0 && domain != itemdomain) {
bucket = &cache->uc_crossbucket; bucket = &cache->uc_crossbucket;
} else } else
#endif #endif
{
/* /*
* Try to free into the allocbucket first to give LIFO ordering * Try to free into the allocbucket first to give LIFO
* for cache-hot datastructures. Spill over into the freebucket * ordering for cache-hot datastructures. Spill over
* if necessary. Alloc will swap them if one runs dry. * into the freebucket if necessary. Alloc will swap
*/ * them if one runs dry.
if (__predict_false(bucket->ucb_cnt >= bucket->ucb_entries)) */
bucket = &cache->uc_freebucket; bucket = &cache->uc_allocbucket;
if (__predict_false(bucket->ucb_cnt >=
bucket->ucb_entries))
bucket = &cache->uc_freebucket;
}
if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) { if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
cache_bucket_push(cache, bucket, item); cache_bucket_push(cache, bucket, item);
critical_exit(); critical_exit();
@ -3734,7 +3744,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
zone_free_item(zone, item, udata, SKIP_DTOR); zone_free_item(zone, item, udata, SKIP_DTOR);
} }
#ifdef UMA_XDOMAIN #ifdef NUMA
/* /*
* sort crossdomain free buckets to domain correct buckets and cache * sort crossdomain free buckets to domain correct buckets and cache
* them. * them.
@ -3809,7 +3819,7 @@ zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
{ {
uma_zone_domain_t zdom; uma_zone_domain_t zdom;
#ifdef UMA_XDOMAIN #ifdef NUMA
/* /*
* Buckets coming from the wrong domain will be entirely for the * Buckets coming from the wrong domain will be entirely for the
* only other domain on two domain systems. In this case we can * only other domain on two domain systems. In this case we can
@ -3864,6 +3874,7 @@ static __noinline bool
cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item, cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
int itemdomain) int itemdomain)
{ {
uma_cache_bucket_t cbucket;
uma_bucket_t bucket; uma_bucket_t bucket;
int domain; int domain;
@ -3875,27 +3886,24 @@ cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
cache = &zone->uz_cpu[curcpu]; cache = &zone->uz_cpu[curcpu];
/* /*
* NUMA domains need to free to the correct zdom. When XDOMAIN * FIRSTTOUCH domains need to free to the correct zdom. When
* is enabled this is the zdom of the item and the bucket may be * enabled this is the zdom of the item. The bucket is the
* the cross bucket if they do not match. * cross bucket if the current domain and itemdomain do not match.
*/ */
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) cbucket = &cache->uc_freebucket;
#ifdef UMA_XDOMAIN #ifdef NUMA
if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
domain = PCPU_GET(domain); domain = PCPU_GET(domain);
#else if (domain != itemdomain) {
itemdomain = domain = PCPU_GET(domain); cbucket = &cache->uc_crossbucket;
#endif if (cbucket->ucb_cnt != 0)
else atomic_add_64(&zone->uz_xdomain,
itemdomain = domain = 0; cbucket->ucb_cnt);
#ifdef UMA_XDOMAIN }
if (domain != itemdomain) {
bucket = cache_bucket_unload_cross(cache);
if (bucket != NULL)
atomic_add_64(&zone->uz_xdomain, bucket->ub_cnt);
} else } else
#endif #endif
bucket = cache_bucket_unload_free(cache); itemdomain = domain = 0;
bucket = cache_bucket_unload(cbucket);
/* We are no longer associated with this CPU. */ /* We are no longer associated with this CPU. */
critical_exit(); critical_exit();
@ -3910,13 +3918,13 @@ cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
if (bucket == NULL) if (bucket == NULL)
return (false); return (false);
cache = &zone->uz_cpu[curcpu]; cache = &zone->uz_cpu[curcpu];
#ifdef UMA_XDOMAIN #ifdef NUMA
/* /*
* Check to see if we should be populating the cross bucket. If it * Check to see if we should be populating the cross bucket. If it
* is already populated we will fall through and attempt to populate * is already populated we will fall through and attempt to populate
* the free bucket. * the free bucket.
*/ */
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) { if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
domain = PCPU_GET(domain); domain = PCPU_GET(domain);
if (domain != itemdomain && if (domain != itemdomain &&
cache->uc_crossbucket.ucb_bucket == NULL) { cache->uc_crossbucket.ucb_bucket == NULL) {
@ -4092,11 +4100,9 @@ uma_zone_set_maxcache(uma_zone_t zone, int nitems)
ubz = bucket_zone_max(zone, nitems); ubz = bucket_zone_max(zone, nitems);
if (ubz != NULL) { if (ubz != NULL) {
bpcpu = 2; bpcpu = 2;
#ifdef UMA_XDOMAIN if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
/* Count the cross-domain bucket. */ /* Count the cross-domain bucket. */
bpcpu++; bpcpu++;
#endif
nitems -= ubz->ubz_entries * bpcpu * mp_ncpus; nitems -= ubz->ubz_entries * bpcpu * mp_ncpus;
zone->uz_bucket_size_max = ubz->ubz_entries; zone->uz_bucket_size_max = ubz->ubz_entries;
} else { } else {

View File

@ -497,8 +497,9 @@ struct uma_zone {
"\33CACHE" \ "\33CACHE" \
"\32LIMIT" \ "\32LIMIT" \
"\31CTORDTOR" \ "\31CTORDTOR" \
"\22MINBUCKET" \ "\23ROUNDROBIN" \
"\21NUMA" \ "\22FIRSTTOUCH" \
"\21MINBUCKET" \
"\20PCPU" \ "\20PCPU" \
"\17NODUMP" \ "\17NODUMP" \
"\16VTOSLAB" \ "\16VTOSLAB" \

View File

@ -474,7 +474,7 @@ kstack_cache_init(void *null)
kstack_cache = uma_zcache_create("kstack_cache", kstack_cache = uma_zcache_create("kstack_cache",
kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL, kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL,
kstack_import, kstack_release, NULL, kstack_import, kstack_release, NULL,
UMA_ZONE_NUMA); UMA_ZONE_FIRSTTOUCH);
kstack_cache_size = imax(128, mp_ncpus * 4); kstack_cache_size = imax(128, mp_ncpus * 4);
uma_zone_set_maxcache(kstack_cache, kstack_cache_size); uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
} }