Fix some problems that manifest when NUMA domain 0 is empty.

- In uma_prealloc(), we need to check for an empty domain before the
  first allocation attempt, not after.  Fix this by switching
  uma_prealloc() to use a vm_domainset iterator, which addresses the
  secondary issue of using a signed domain identifier in round-robin
  iteration.
- Don't automatically create a page daemon for domain 0.
- In domainset_empty_vm(), recompute ds_cnt and ds_order after
  excluding empty domains; otherwise we may frequently specify an empty
  domain when calling in to the page allocator, wasting CPU time.
  Convert DOMAINSET_PREF() policies for empty domains to round-robin.
- When freeing bootstrap pages, don't count them towards the per-domain
  total page counts for now: some vm_phys segments are created before
  the SRAT is parsed and are thus always identified as being in domain 0
  even when they are not.  Then, when bootstrap pages are freed, they
  are added to a domain that we had previously thought was empty.  Until
  this is corrected, we simply exclude them from the per-domain page
  count.

Reported and tested by:	Rajesh Kumar <rajfbsd@gmail.com>
Reviewed by:	gallatin
MFC after:	2 weeks
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D17704
This commit is contained in:
Mark Johnston 2018-10-30 17:57:40 +00:00
parent e35079db73
commit 920239efde
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=339925
6 changed files with 56 additions and 45 deletions

View File

@ -492,20 +492,29 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist)
}
/*
* Are any of the domains in the mask empty? If so, silently
* remove them. If only empty domains are present, we must
* return failure.
* Are any of the domains in the mask empty? If so, silently
* remove them and update the domainset accordingly. If only empty
* domains are present, we must return failure.
*/
static bool
domainset_empty_vm(struct domainset *domain)
{
int i, max;
int i, j, max;
max = DOMAINSET_FLS(&domain->ds_mask) + 1;
for (i = 0; i < max; i++) {
if (DOMAINSET_ISSET(i, &domain->ds_mask) &&
VM_DOMAIN_EMPTY(i))
for (i = 0; i < max; i++)
if (DOMAINSET_ISSET(i, &domain->ds_mask) && VM_DOMAIN_EMPTY(i))
DOMAINSET_CLR(i, &domain->ds_mask);
domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
max = DOMAINSET_FLS(&domain->ds_mask) + 1;
for (i = j = 0; i < max; i++) {
if (DOMAINSET_ISSET(i, &domain->ds_mask))
domain->ds_order[j++] = i;
else if (domain->ds_policy == DOMAINSET_POLICY_PREFER &&
domain->ds_prefer == i && domain->ds_cnt > 1) {
domain->ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
domain->ds_prefer = -1;
}
}
return (DOMAINSET_EMPTY(&domain->ds_mask));
@ -1378,7 +1387,7 @@ cpuset_setithread(lwpid_t id, int cpu)
/*
* Initialize static domainsets after NUMA information is available. This is
* called very early during boot.
* called before memory allocators are initialized.
*/
void
domainset_init(void)
@ -1407,7 +1416,7 @@ domainset_init(void)
void
domainset_zero(void)
{
struct domainset *dset;
struct domainset *dset, *tmp;
mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE);
@ -1422,8 +1431,9 @@ domainset_zero(void)
kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL);
/* Remove empty domains from the global policies. */
LIST_FOREACH(dset, &cpuset_domains, ds_link)
(void)domainset_empty_vm(dset);
LIST_FOREACH_SAFE(dset, &cpuset_domains, ds_link, tmp)
if (domainset_empty_vm(dset))
LIST_REMOVE(dset, ds_link);
}
/*

View File

@ -3608,29 +3608,30 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
void
uma_prealloc(uma_zone_t zone, int items)
{
struct vm_domainset_iter di;
uma_domain_t dom;
uma_slab_t slab;
uma_keg_t keg;
int domain, slabs;
int domain, flags, slabs;
keg = zone_first_keg(zone);
if (keg == NULL)
return;
KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
domain = 0;
if (slabs * keg->uk_ipers < items)
slabs++;
flags = M_WAITOK;
vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &flags);
while (slabs-- > 0) {
slab = keg_alloc_slab(keg, zone, domain, M_WAITOK);
slab = keg_alloc_slab(keg, zone, domain, flags);
if (slab == NULL)
return;
MPASS(slab->us_keg == keg);
dom = &keg->uk_domain[slab->us_domain];
LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
do {
domain = (domain + 1) % vm_ndomains;
} while (VM_DOMAIN_EMPTY(domain));
if (vm_domainset_iter_policy(&di, &domain) != 0)
break;
}
KEG_UNLOCK(keg);
}

View File

@ -123,12 +123,18 @@ vm_mem_init(void *dummy)
domainset_init();
/*
* Initializes resident memory structures. From here on, all physical
* Initialize resident memory structures. From here on, all physical
* memory is accounted for, and we use only virtual addresses.
*/
vm_set_page_size();
virtual_avail = vm_page_startup(virtual_avail);
/*
* Set an initial domain policy for thread0 so that allocations
* can work.
*/
domainset_zero();
#ifdef UMA_MD_SMALL_ALLOC
/* Announce page availability to UMA. */
uma_startup1();

View File

@ -800,7 +800,6 @@ kmem_bootstrap_free(vm_offset_t start, vm_size_t size)
vmd = vm_pagequeue_domain(m);
vm_domain_free_lock(vmd);
vm_phys_free_pages(m, 0);
vmd->vmd_page_count++;
vm_domain_free_unlock(vmd);
vm_domain_freecnt_inc(vmd, 1);

View File

@ -855,11 +855,6 @@ vm_page_startup(vm_offset_t vaddr)
*/
vm_reserv_init();
#endif
/*
* Set an initial domain policy for thread0 so that allocations
* can work.
*/
domainset_zero();
return (vaddr);
}

View File

@ -2072,41 +2072,41 @@ vm_pageout_init(void)
static void
vm_pageout(void)
{
int error;
int i;
struct proc *p;
struct thread *td;
int error, first, i;
p = curproc;
td = curthread;
swap_pager_swap_init();
snprintf(curthread->td_name, sizeof(curthread->td_name), "dom0");
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
0, 0, "laundry: dom0");
if (error != 0)
panic("starting laundry for domain 0, error %d", error);
for (i = 1; i < vm_ndomains; i++) {
for (first = -1, i = 0; i < vm_ndomains; i++) {
if (VM_DOMAIN_EMPTY(i)) {
if (bootverbose)
printf("domain %d empty; skipping pageout\n",
i);
continue;
}
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);
if (error != 0) {
panic("starting pageout for domain %d, error %d\n",
i, error);
if (first == -1)
first = i;
else {
error = kthread_add(vm_pageout_worker,
(void *)(uintptr_t)i, p, NULL, 0, 0, "dom%d", i);
if (error != 0)
panic("starting pageout for domain %d: %d\n",
i, error);
}
error = kthread_add(vm_pageout_laundry_worker,
(void *)(uintptr_t)i, curproc, NULL, 0, 0,
"laundry: dom%d", i);
(void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i);
if (error != 0)
panic("starting laundry for domain %d, error %d",
i, error);
panic("starting laundry for domain %d: %d", i, error);
}
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");
error = kthread_add(uma_reclaim_worker, NULL, p, NULL, 0, 0, "uma");
if (error != 0)
panic("starting uma_reclaim helper, error %d\n", error);
vm_pageout_worker((void *)(uintptr_t)0);
snprintf(td->td_name, sizeof(td->td_name), "dom%d", first);
vm_pageout_worker((void *)(uintptr_t)first);
}
/*