Refactor domainset iterators for use by malloc(9) and UMA.

Before this change we had two flavours of vm_domainset iterators: "page"
and "malloc".  The latter was only used for kmem_*() and hard-coded its
behaviour based on kernel_object's policy.  Moreover, its use contained
a race similar to that fixed by r338755 since the kernel_object's
iterator was being run without the object lock.

In some cases it is useful to be able to explicitly specify a policy
(domainset) or policy+iterator (domainset_ref) when performing memory
allocations.  To that end, refactor the vm_dominset_* KPI to permit
this, and get rid of the "malloc" domainset_iter KPI in the process.

Reviewed by:	jeff (previous version)
Tested by:	pho (part of a larger patch)
MFC after:	2 weeks
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D17417
This commit is contained in:
Mark Johnston 2018-10-23 16:35:58 +00:00
parent 1a7d3c055b
commit 4c29d2de67
7 changed files with 88 additions and 70 deletions

View File

@ -54,7 +54,7 @@ typedef struct _domainset domainset_t;
struct domainset; struct domainset;
struct domainset_ref { struct domainset_ref {
struct domainset * volatile dr_policy; struct domainset * volatile dr_policy;
unsigned int dr_iterator; unsigned int dr_iter;
}; };
#endif /* !_SYS__DOMAINSET_H_ */ #endif /* !_SYS__DOMAINSET_H_ */

View File

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h> #include <sys/lock.h>
#include <sys/mutex.h> #include <sys/mutex.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/rwlock.h>
#include <sys/vmmeter.h> #include <sys/vmmeter.h>
#include <vm/vm.h> #include <vm/vm.h>
@ -62,26 +63,13 @@ static int vm_domainset_default_stride = 64;
* Determine which policy is to be used for this allocation. * Determine which policy is to be used for this allocation.
*/ */
static void static void
vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
vm_pindex_t pindex) int *iter, struct vm_object *obj, vm_pindex_t pindex)
{ {
struct domainset *domain;
struct thread *td;
/* di->di_domain = ds;
* object policy takes precedence over thread policy. The policies di->di_iter = iter;
* are immutable and unsynchronized. Updates can race but pointer di->di_policy = ds->ds_policy;
* loads are assumed to be atomic.
*/
if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
di->di_domain = domain;
di->di_iter = &obj->domain.dr_iterator;
} else {
td = curthread;
di->di_domain = td->td_domain.dr_policy;
di->di_iter = &td->td_domain.dr_iterator;
}
di->di_policy = di->di_domain->ds_policy;
if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
#if VM_NRESERVLEVEL > 0 #if VM_NRESERVLEVEL > 0
if (vm_object_reserv(obj)) { if (vm_object_reserv(obj)) {
@ -211,33 +199,39 @@ void
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
vm_pindex_t pindex, int *domain, int *req) vm_pindex_t pindex, int *domain, int *req)
{ {
struct domainset_ref *dr;
vm_domainset_iter_init(di, obj, pindex); /*
* Object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
* loads are assumed to be atomic.
*/
if (obj != NULL && obj->domain.dr_policy != NULL)
dr = &obj->domain;
else
dr = &curthread->td_domain;
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
di->di_flags = *req; di->di_flags = *req;
*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
VM_ALLOC_NOWAIT; VM_ALLOC_NOWAIT;
vm_domainset_iter_first(di, domain); vm_domainset_iter_first(di, domain);
if (vm_page_count_min_domain(*domain)) if (vm_page_count_min_domain(*domain))
vm_domainset_iter_page(di, domain, req); vm_domainset_iter_page(di, obj, domain);
} }
int int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain)
{ {
/*
* If we exhausted all options with NOWAIT and did a WAITFAIL it
* is time to return an error to the caller.
*/
if ((*req & VM_ALLOC_WAITFAIL) != 0)
return (ENOMEM);
/* If there are more domains to visit we run the iterator. */ /* If there are more domains to visit we run the iterator. */
while (--di->di_n != 0) { while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain); vm_domainset_iter_next(di, domain);
if (!di->di_minskip || !vm_page_count_min_domain(*domain)) if (!di->di_minskip || !vm_page_count_min_domain(*domain))
return (0); return (0);
} }
/* If we skipped domains below min restart the search. */
if (di->di_minskip) { if (di->di_minskip) {
di->di_minskip = false; di->di_minskip = false;
vm_domainset_iter_first(di, domain); vm_domainset_iter_first(di, domain);
@ -248,34 +242,53 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
return (ENOMEM); return (ENOMEM);
/* /* Wait for one of the domains to accumulate some free pages. */
* We have visited all domains with non-blocking allocations, try if (obj != NULL)
* from the beginning with a blocking allocation. VM_OBJECT_WUNLOCK(obj);
*/ vm_wait_doms(&di->di_domain->ds_mask);
if (obj != NULL)
VM_OBJECT_WLOCK(obj);
if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0)
return (ENOMEM);
/* Restart the search. */
vm_domainset_iter_first(di, domain); vm_domainset_iter_first(di, domain);
*req = di->di_flags;
return (0); return (0);
} }
static void
void _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, int *flags)
struct vm_object *obj, int *domain, int *flags)
{ {
vm_domainset_iter_init(di, obj, 0);
if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE)
di->di_policy = DOMAINSET_POLICY_ROUNDROBIN;
di->di_flags = *flags; di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
vm_domainset_iter_first(di, domain); vm_domainset_iter_first(di, domain);
if (vm_page_count_min_domain(*domain)) if (vm_page_count_min_domain(*domain))
vm_domainset_iter_malloc(di, domain, flags); vm_domainset_iter_policy(di, domain);
}
void
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
_vm_domainset_iter_policy_init(di, domain, flags);
}
void
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
_vm_domainset_iter_policy_init(di, domain, flags);
} }
int int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{ {
/* If there are more domains to visit we run the iterator. */ /* If there are more domains to visit we run the iterator. */
@ -296,45 +309,46 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
if ((di->di_flags & M_WAITOK) == 0) if ((di->di_flags & M_WAITOK) == 0)
return (ENOMEM); return (ENOMEM);
/* /* Wait for one of the domains to accumulate some free pages. */
* We have visited all domains with non-blocking allocations, try vm_wait_doms(&di->di_domain->ds_mask);
* from the beginning with a blocking allocation.
*/ /* Restart the search. */
vm_domainset_iter_first(di, domain); vm_domainset_iter_first(di, domain);
*flags = di->di_flags;
return (0); return (0);
} }
#else /* !NUMA */ #else /* !NUMA */
int int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain)
{ {
return (EJUSTRETURN); return (EJUSTRETURN);
} }
void void
vm_domainset_iter_page_init(struct vm_domainset_iter *di, vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) vm_pindex_t pindex, int *domain, int *flags)
{ {
*domain = 0; *domain = 0;
} }
int int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{ {
return (EJUSTRETURN); return (EJUSTRETURN);
} }
void void
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags) struct domainset *ds, int *domain, int *flags)
{ {
*domain = 0; *domain = 0;
} }
#endif #endif /* NUMA */

View File

@ -40,12 +40,15 @@ struct vm_domainset_iter {
bool di_minskip; bool di_minskip;
}; };
int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *); int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *,
int *);
void vm_domainset_iter_page_init(struct vm_domainset_iter *, void vm_domainset_iter_page_init(struct vm_domainset_iter *,
struct vm_object *, vm_pindex_t, int *, int *); struct vm_object *, vm_pindex_t, int *, int *);
int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *); int vm_domainset_iter_policy(struct vm_domainset_iter *, int *);
void vm_domainset_iter_malloc_init(struct vm_domainset_iter *, void vm_domainset_iter_policy_init(struct vm_domainset_iter *,
struct vm_object *, int *, int *); struct domainset *, int *, int *);
void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
struct domainset_ref *, int *, int *);
void vm_wait_doms(const domainset_t *); void vm_wait_doms(const domainset_t *);

View File

@ -377,7 +377,7 @@ vm_thread_new(struct thread *td, int pages)
*/ */
if (vm_ndomains > 1) { if (vm_ndomains > 1) {
ksobj->domain.dr_policy = DOMAINSET_RR(); ksobj->domain.dr_policy = DOMAINSET_RR();
ksobj->domain.dr_iterator = ksobj->domain.dr_iter =
atomic_fetchadd_int(&kstack_domain_iter, 1); atomic_fetchadd_int(&kstack_domain_iter, 1);
} }

View File

@ -235,13 +235,13 @@ kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
vm_offset_t addr; vm_offset_t addr;
int domain; int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do { do {
addr = kmem_alloc_attr_domain(domain, size, flags, low, high, addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
memattr); memattr);
if (addr != 0) if (addr != 0)
break; break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); } while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr); return (addr);
} }
@ -319,13 +319,13 @@ kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
vm_offset_t addr; vm_offset_t addr;
int domain; int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do { do {
addr = kmem_alloc_contig_domain(domain, size, flags, low, high, addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
alignment, boundary, memattr); alignment, boundary, memattr);
if (addr != 0) if (addr != 0)
break; break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); } while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr); return (addr);
} }
@ -406,12 +406,12 @@ kmem_malloc(vm_size_t size, int flags)
vm_offset_t addr; vm_offset_t addr;
int domain; int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do { do {
addr = kmem_malloc_domain(domain, size, flags); addr = kmem_malloc_domain(domain, size, flags);
if (addr != 0) if (addr != 0)
break; break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); } while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr); return (addr);
} }

View File

@ -274,6 +274,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
panic("_vm_object_allocate: type %d is undefined", type); panic("_vm_object_allocate: type %d is undefined", type);
} }
object->size = size; object->size = size;
object->domain.dr_policy = NULL;
object->generation = 1; object->generation = 1;
object->ref_count = 1; object->ref_count = 1;
object->memattr = VM_MEMATTR_DEFAULT; object->memattr = VM_MEMATTR_DEFAULT;

View File

@ -1753,7 +1753,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
mpred); mpred);
if (m != NULL) if (m != NULL)
break; break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0); } while (vm_domainset_iter_page(&di, object, &domain) == 0);
return (m); return (m);
} }
@ -1990,7 +1990,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
npages, low, high, alignment, boundary, memattr); npages, low, high, alignment, boundary, memattr);
if (m != NULL) if (m != NULL)
break; break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0); } while (vm_domainset_iter_page(&di, object, &domain) == 0);
return (m); return (m);
} }
@ -2191,7 +2191,7 @@ vm_page_alloc_freelist(int freelist, int req)
m = vm_page_alloc_freelist_domain(domain, freelist, req); m = vm_page_alloc_freelist_domain(domain, freelist, req);
if (m != NULL) if (m != NULL)
break; break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0); } while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
return (m); return (m);
} }
@ -2830,7 +2830,7 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
high, alignment, boundary); high, alignment, boundary);
if (ret) if (ret)
break; break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0); } while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
return (ret); return (ret);
} }