Refactor domainset iterators for use by malloc(9) and UMA.

Before this change we had two flavours of vm_domainset iterators: "page"
and "malloc".  The latter was only used for kmem_*() and hard-coded its
behaviour based on kernel_object's policy.  Moreover, its use contained
a race similar to that fixed by r338755 since the kernel_object's
iterator was being run without the object lock.

In some cases it is useful to be able to explicitly specify a policy
(domainset) or policy+iterator (domainset_ref) when performing memory
allocations.  To that end, refactor the vm_dominset_* KPI to permit
this, and get rid of the "malloc" domainset_iter KPI in the process.

Reviewed by:	jeff (previous version)
Tested by:	pho (part of a larger patch)
MFC after:	2 weeks
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D17417
This commit is contained in:
markj 2018-10-23 16:35:58 +00:00
parent c41c1dd360
commit 6c262608dd
7 changed files with 88 additions and 70 deletions

View File

@ -54,7 +54,7 @@ typedef struct _domainset domainset_t;
struct domainset;
struct domainset_ref {
struct domainset * volatile dr_policy;
unsigned int dr_iterator;
unsigned int dr_iter;
};
#endif /* !_SYS__DOMAINSET_H_ */

View File

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/rwlock.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
@ -62,26 +63,13 @@ static int vm_domainset_default_stride = 64;
* Determine which policy is to be used for this allocation.
*/
static void
vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
vm_pindex_t pindex)
vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
int *iter, struct vm_object *obj, vm_pindex_t pindex)
{
struct domainset *domain;
struct thread *td;
/*
* object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
* loads are assumed to be atomic.
*/
if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
di->di_domain = domain;
di->di_iter = &obj->domain.dr_iterator;
} else {
td = curthread;
di->di_domain = td->td_domain.dr_policy;
di->di_iter = &td->td_domain.dr_iterator;
}
di->di_policy = di->di_domain->ds_policy;
di->di_domain = ds;
di->di_iter = iter;
di->di_policy = ds->ds_policy;
if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
#if VM_NRESERVLEVEL > 0
if (vm_object_reserv(obj)) {
@ -211,33 +199,39 @@ void
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
vm_pindex_t pindex, int *domain, int *req)
{
struct domainset_ref *dr;
vm_domainset_iter_init(di, obj, pindex);
/*
* Object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
* loads are assumed to be atomic.
*/
if (obj != NULL && obj->domain.dr_policy != NULL)
dr = &obj->domain;
else
dr = &curthread->td_domain;
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
di->di_flags = *req;
*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
VM_ALLOC_NOWAIT;
vm_domainset_iter_first(di, domain);
if (vm_page_count_min_domain(*domain))
vm_domainset_iter_page(di, domain, req);
vm_domainset_iter_page(di, obj, domain);
}
int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain)
{
/*
* If we exhausted all options with NOWAIT and did a WAITFAIL it
* is time to return an error to the caller.
*/
if ((*req & VM_ALLOC_WAITFAIL) != 0)
return (ENOMEM);
/* If there are more domains to visit we run the iterator. */
while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
if (!di->di_minskip || !vm_page_count_min_domain(*domain))
return (0);
}
/* If we skipped domains below min restart the search. */
if (di->di_minskip) {
di->di_minskip = false;
vm_domainset_iter_first(di, domain);
@ -248,34 +242,53 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
return (ENOMEM);
/*
* We have visited all domains with non-blocking allocations, try
* from the beginning with a blocking allocation.
*/
/* Wait for one of the domains to accumulate some free pages. */
if (obj != NULL)
VM_OBJECT_WUNLOCK(obj);
vm_wait_doms(&di->di_domain->ds_mask);
if (obj != NULL)
VM_OBJECT_WLOCK(obj);
if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0)
return (ENOMEM);
/* Restart the search. */
vm_domainset_iter_first(di, domain);
*req = di->di_flags;
return (0);
}
void
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags)
static void
_vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
int *flags)
{
vm_domainset_iter_init(di, obj, 0);
if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE)
di->di_policy = DOMAINSET_POLICY_ROUNDROBIN;
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
vm_domainset_iter_first(di, domain);
if (vm_page_count_min_domain(*domain))
vm_domainset_iter_malloc(di, domain, flags);
vm_domainset_iter_policy(di, domain);
}
void
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
_vm_domainset_iter_policy_init(di, domain, flags);
}
void
vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
struct domainset_ref *dr, int *domain, int *flags)
{
vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
_vm_domainset_iter_policy_init(di, domain, flags);
}
int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{
/* If there are more domains to visit we run the iterator. */
@ -296,45 +309,46 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
if ((di->di_flags & M_WAITOK) == 0)
return (ENOMEM);
/*
* We have visited all domains with non-blocking allocations, try
* from the beginning with a blocking allocation.
*/
/* Wait for one of the domains to accumulate some free pages. */
vm_wait_doms(&di->di_domain->ds_mask);
/* Restart the search. */
vm_domainset_iter_first(di, domain);
*flags = di->di_flags;
return (0);
}
#else /* !NUMA */
int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags)
vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain)
{
return (EJUSTRETURN);
}
void
vm_domainset_iter_page_init(struct vm_domainset_iter *di,
struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags)
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
vm_pindex_t pindex, int *domain, int *flags)
{
*domain = 0;
}
int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
{
return (EJUSTRETURN);
}
void
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags)
vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
struct domainset *ds, int *domain, int *flags)
{
*domain = 0;
}
#endif
#endif /* NUMA */

View File

@ -40,12 +40,15 @@ struct vm_domainset_iter {
bool di_minskip;
};
int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *);
int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *,
int *);
void vm_domainset_iter_page_init(struct vm_domainset_iter *,
struct vm_object *, vm_pindex_t, int *, int *);
int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *);
void vm_domainset_iter_malloc_init(struct vm_domainset_iter *,
struct vm_object *, int *, int *);
int vm_domainset_iter_policy(struct vm_domainset_iter *, int *);
void vm_domainset_iter_policy_init(struct vm_domainset_iter *,
struct domainset *, int *, int *);
void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *,
struct domainset_ref *, int *, int *);
void vm_wait_doms(const domainset_t *);

View File

@ -377,7 +377,7 @@ vm_thread_new(struct thread *td, int pages)
*/
if (vm_ndomains > 1) {
ksobj->domain.dr_policy = DOMAINSET_RR();
ksobj->domain.dr_iterator =
ksobj->domain.dr_iter =
atomic_fetchadd_int(&kstack_domain_iter, 1);
}

View File

@ -235,13 +235,13 @@ kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
vm_offset_t addr;
int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do {
addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
memattr);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
} while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr);
}
@ -319,13 +319,13 @@ kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
vm_offset_t addr;
int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do {
addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
alignment, boundary, memattr);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
} while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr);
}
@ -406,12 +406,12 @@ kmem_malloc(vm_size_t size, int flags)
vm_offset_t addr;
int domain;
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags);
do {
addr = kmem_malloc_domain(domain, size, flags);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
} while (vm_domainset_iter_policy(&di, &domain) == 0);
return (addr);
}

View File

@ -274,6 +274,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
panic("_vm_object_allocate: type %d is undefined", type);
}
object->size = size;
object->domain.dr_policy = NULL;
object->generation = 1;
object->ref_count = 1;
object->memattr = VM_MEMATTR_DEFAULT;

View File

@ -1753,7 +1753,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
mpred);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0);
} while (vm_domainset_iter_page(&di, object, &domain) == 0);
return (m);
}
@ -1990,7 +1990,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
npages, low, high, alignment, boundary, memattr);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0);
} while (vm_domainset_iter_page(&di, object, &domain) == 0);
return (m);
}
@ -2191,7 +2191,7 @@ vm_page_alloc_freelist(int freelist, int req)
m = vm_page_alloc_freelist_domain(domain, freelist, req);
if (m != NULL)
break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0);
} while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
return (m);
}
@ -2830,7 +2830,7 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
high, alignment, boundary);
if (ret)
break;
} while (vm_domainset_iter_page(&di, &domain, &req) == 0);
} while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
return (ret);
}