Move domain iterators into the page layer where domain selection should take

place.  This makes the majority of the phys layer explicitly domain specific.

Reviewed by:	markj, kib (some objections)
Discussed with:	alc
Tested by:	pho
Sponsored by:	Netflix & Dell EMC Isilon
Differential Revision:	https://reviews.freebsd.org/D13014
This commit is contained in:
Jeff Roberson 2017-11-28 23:18:35 +00:00
parent cc58910608
commit ef435ae7de
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=326346
8 changed files with 336 additions and 271 deletions

View File

@ -61,6 +61,118 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_domain.h>
/*
* Default to first-touch + round-robin.
*/
static struct mtx vm_default_policy_mtx;
MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
MTX_DEF);
#ifdef VM_NUMA_ALLOC
static struct vm_domain_policy vm_default_policy =
VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
#else
/* Use round-robin so the domain policy code will only try once per allocation */
static struct vm_domain_policy vm_default_policy =
VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
#endif
static int
sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
{
char policy_name[32];
int error;
mtx_lock(&vm_default_policy_mtx);
/* Map policy to output string */
switch (vm_default_policy.p.policy) {
case VM_POLICY_FIRST_TOUCH:
strcpy(policy_name, "first-touch");
break;
case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
strcpy(policy_name, "first-touch-rr");
break;
case VM_POLICY_ROUND_ROBIN:
default:
strcpy(policy_name, "rr");
break;
}
mtx_unlock(&vm_default_policy_mtx);
error = sysctl_handle_string(oidp, &policy_name[0],
sizeof(policy_name), req);
if (error != 0 || req->newptr == NULL)
return (error);
mtx_lock(&vm_default_policy_mtx);
/* Set: match on the subset of policies that make sense as a default */
if (strcmp("first-touch-rr", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
} else if (strcmp("first-touch", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_FIRST_TOUCH, 0);
} else if (strcmp("rr", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_ROUND_ROBIN, 0);
} else {
error = EINVAL;
goto finish;
}
error = 0;
finish:
mtx_unlock(&vm_default_policy_mtx);
return (error);
}
SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
0, 0, sysctl_vm_default_policy, "A",
"Default policy (rr, first-touch, first-touch-rr");
/*
* Initialise a VM domain iterator.
*
* Check the thread policy, then the proc policy,
* then default to the system policy.
*/
void
vm_policy_iterator_init(struct vm_domain_iterator *vi)
{
#ifdef VM_NUMA_ALLOC
struct vm_domain_policy lcl;
#endif
vm_domain_iterator_init(vi);
#ifdef VM_NUMA_ALLOC
/* Copy out the thread policy */
vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
if (lcl.p.policy != VM_POLICY_NONE) {
/* Thread policy is present; use it */
vm_domain_iterator_set_policy(vi, &lcl);
return;
}
vm_domain_policy_localcopy(&lcl,
&curthread->td_proc->p_vm_dom_policy);
if (lcl.p.policy != VM_POLICY_NONE) {
/* Process policy is present; use it */
vm_domain_iterator_set_policy(vi, &lcl);
return;
}
#endif
/* Use system default policy */
vm_domain_iterator_set_policy(vi, &vm_default_policy);
}
void
vm_policy_iterator_finish(struct vm_domain_iterator *vi)
{
vm_domain_iterator_cleanup(vi);
}
#ifdef VM_NUMA_ALLOC
static __inline int
vm_domain_rr_selectdomain(int skip_domain)

View File

@ -63,4 +63,7 @@ extern int vm_domain_iterator_run(struct vm_domain_iterator *vi,
extern int vm_domain_iterator_isdone(struct vm_domain_iterator *vi);
extern int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi);
extern void vm_policy_iterator_init(struct vm_domain_iterator *vi);
extern void vm_policy_iterator_finish(struct vm_domain_iterator *vi);
#endif /* __VM_DOMAIN_H__ */

View File

@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_param.h>
#include <vm/vm_domain.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@ -1603,6 +1604,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
vm_radix_lookup_le(&object->rtree, pindex) : NULL));
}
vm_page_t
vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain,
int req)
{
return (vm_page_alloc_domain_after(object, pindex, domain, req,
object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) :
NULL));
}
/*
* Allocate a page in the specified object with the given page index. To
* optimize insertion of the page into the object, the caller must also specifiy
@ -1610,8 +1621,33 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
* page index, or NULL if no such page exists.
*/
vm_page_t
vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req,
vm_page_t mpred)
vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
int req, vm_page_t mpred)
{
struct vm_domain_iterator vi;
vm_page_t m;
int domain, wait;
m = NULL;
vm_policy_iterator_init(&vi);
wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
req &= ~wait;
while (vm_domain_iterator_run(&vi, &domain) == 0) {
if (vm_domain_iterator_isdone(&vi))
req |= wait;
m = vm_page_alloc_domain_after(object, pindex, domain, req,
mpred);
if (m != NULL)
break;
}
vm_policy_iterator_finish(&vi);
return (m);
}
vm_page_t
vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
int req, vm_page_t mpred)
{
vm_page_t m;
int flags, req_class;
@ -1643,6 +1679,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req,
* for the request class.
*/
again:
m = NULL;
mtx_lock(&vm_page_queue_free_mtx);
if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
(req_class == VM_ALLOC_SYSTEM &&
@ -1655,23 +1692,26 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req,
#if VM_NRESERVLEVEL > 0
if (object == NULL || (object->flags & (OBJ_COLORED |
OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
vm_reserv_alloc_page(object, pindex, mpred)) == NULL)
vm_reserv_alloc_page(object, pindex, domain,
mpred)) == NULL)
#endif
{
/*
* If not, allocate it from the free page queues.
*/
m = vm_phys_alloc_pages(object != NULL ?
m = vm_phys_alloc_pages(domain, object != NULL ?
VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
#if VM_NRESERVLEVEL > 0
if (m == NULL && vm_reserv_reclaim_inactive()) {
m = vm_phys_alloc_pages(object != NULL ?
if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
m = vm_phys_alloc_pages(domain,
object != NULL ?
VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
0);
}
#endif
}
} else {
}
if (m == NULL) {
/*
* Not allocatable, give up.
*/
@ -1798,6 +1838,32 @@ vm_page_t
vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
struct vm_domain_iterator vi;
vm_page_t m;
int domain, wait;
m = NULL;
vm_policy_iterator_init(&vi);
wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
req &= ~wait;
while (vm_domain_iterator_run(&vi, &domain) == 0) {
if (vm_domain_iterator_isdone(&vi))
req |= wait;
m = vm_page_alloc_contig_domain(object, pindex, domain, req,
npages, low, high, alignment, boundary, memattr);
if (m != NULL)
break;
}
vm_policy_iterator_finish(&vi);
return (m);
}
vm_page_t
vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags;
@ -1838,6 +1904,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
* below the lower bound for the allocation class?
*/
again:
m_ret = NULL;
mtx_lock(&vm_page_queue_free_mtx);
if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
(req_class == VM_ALLOC_SYSTEM &&
@ -1850,31 +1917,27 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
#if VM_NRESERVLEVEL > 0
retry:
if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
(m_ret = vm_reserv_alloc_contig(object, pindex, npages,
low, high, alignment, boundary, mpred)) == NULL)
(m_ret = vm_reserv_alloc_contig(object, pindex, domain,
npages, low, high, alignment, boundary, mpred)) == NULL)
#endif
/*
* If not, allocate them from the free page queues.
*/
m_ret = vm_phys_alloc_contig(npages, low, high,
m_ret = vm_phys_alloc_contig(domain, npages, low, high,
alignment, boundary);
} else {
#if VM_NRESERVLEVEL > 0
if (m_ret == NULL && vm_reserv_reclaim_contig(
domain, npages, low, high, alignment, boundary))
goto retry;
#endif
}
if (m_ret == NULL) {
if (vm_page_alloc_fail(object, req))
goto again;
return (NULL);
}
if (m_ret != NULL)
vm_phys_freecnt_adj(m_ret, -npages);
else {
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_contig(npages, low, high, alignment,
boundary))
goto retry;
#endif
}
vm_phys_freecnt_adj(m_ret, -npages);
mtx_unlock(&vm_page_queue_free_mtx);
if (m_ret == NULL)
return (NULL);
for (m = m_ret; m < &m_ret[npages]; m++)
vm_page_alloc_check(m);
@ -1987,6 +2050,29 @@ vm_page_alloc_check(vm_page_t m)
*/
vm_page_t
vm_page_alloc_freelist(int flind, int req)
{
struct vm_domain_iterator vi;
vm_page_t m;
int domain, wait;
m = NULL;
vm_policy_iterator_init(&vi);
wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
req &= ~wait;
while (vm_domain_iterator_run(&vi, &domain) == 0) {
if (vm_domain_iterator_isdone(&vi))
req |= wait;
m = vm_page_alloc_freelist_domain(domain, flind, req);
if (m != NULL)
break;
}
vm_policy_iterator_finish(&vi);
return (m);
}
vm_page_t
vm_page_alloc_freelist_domain(int domain, int flind, int req)
{
vm_page_t m;
u_int flags, free_count;
@ -2009,17 +2095,14 @@ vm_page_alloc_freelist(int flind, int req)
(req_class == VM_ALLOC_SYSTEM &&
vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
(req_class == VM_ALLOC_INTERRUPT &&
vm_cnt.v_free_count > 0)) {
m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
} else {
vm_cnt.v_free_count > 0))
m = vm_phys_alloc_freelist_pages(domain, flind,
VM_FREEPOOL_DIRECT, 0);
if (m == NULL) {
if (vm_page_alloc_fail(NULL, req))
goto again;
return (NULL);
}
if (m == NULL) {
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
}
free_count = vm_phys_freecnt_adj(m, -1);
mtx_unlock(&vm_page_queue_free_mtx);
vm_page_alloc_check(m);

View File

@ -476,16 +476,24 @@ void vm_page_free_zero(vm_page_t m);
void vm_page_activate (vm_page_t);
void vm_page_advise(vm_page_t m, int advice);
vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int);
vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
vm_page_t);
vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr);
vm_page_t vm_page_alloc_contig_domain(vm_object_t object,
vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr);
vm_page_t vm_page_alloc_freelist(int, int);
vm_page_t vm_page_alloc_freelist_domain(int, int, int);
void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
vm_page_t *ma, int count);
void vm_page_deactivate (vm_page_t);
void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
@ -506,6 +514,8 @@ void vm_page_putfake(vm_page_t m);
void vm_page_readahead_finish(vm_page_t m);
bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
bool vm_page_reclaim_contig_domain(int req, u_long npages, int domain,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
void vm_page_reference(vm_page_t m);
void vm_page_remove (vm_page_t);
int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);

View File

@ -151,23 +151,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
&vm_ndomains, 0, "Number of physical memory domains available.");
/*
* Default to first-touch + round-robin.
*/
static struct mtx vm_default_policy_mtx;
MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
MTX_DEF);
#ifdef VM_NUMA_ALLOC
static struct vm_domain_policy vm_default_policy =
VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
#else
/* Use round-robin so the domain policy code will only try once per allocation */
static struct vm_domain_policy vm_default_policy =
VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
#endif
static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
int order);
static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary);
@ -176,60 +159,6 @@ static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order);
static int
sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
{
char policy_name[32];
int error;
mtx_lock(&vm_default_policy_mtx);
/* Map policy to output string */
switch (vm_default_policy.p.policy) {
case VM_POLICY_FIRST_TOUCH:
strcpy(policy_name, "first-touch");
break;
case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
strcpy(policy_name, "first-touch-rr");
break;
case VM_POLICY_ROUND_ROBIN:
default:
strcpy(policy_name, "rr");
break;
}
mtx_unlock(&vm_default_policy_mtx);
error = sysctl_handle_string(oidp, &policy_name[0],
sizeof(policy_name), req);
if (error != 0 || req->newptr == NULL)
return (error);
mtx_lock(&vm_default_policy_mtx);
/* Set: match on the subset of policies that make sense as a default */
if (strcmp("first-touch-rr", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
} else if (strcmp("first-touch", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_FIRST_TOUCH, 0);
} else if (strcmp("rr", policy_name) == 0) {
vm_domain_policy_set(&vm_default_policy,
VM_POLICY_ROUND_ROBIN, 0);
} else {
error = EINVAL;
goto finish;
}
error = 0;
finish:
mtx_unlock(&vm_default_policy_mtx);
return (error);
}
SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
0, 0, sysctl_vm_default_policy, "A",
"Default policy (rr, first-touch, first-touch-rr");
/*
* Red-black tree helpers for vm fictitious range management.
*/
@ -271,71 +200,6 @@ vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
(uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
}
#ifdef notyet
static __inline int
vm_rr_selectdomain(void)
{
#ifdef VM_NUMA_ALLOC
struct thread *td;
td = curthread;
td->td_dom_rr_idx++;
td->td_dom_rr_idx %= vm_ndomains;
return (td->td_dom_rr_idx);
#else
return (0);
#endif
}
#endif /* notyet */
/*
* Initialise a VM domain iterator.
*
* Check the thread policy, then the proc policy,
* then default to the system policy.
*
* Later on the various layers will have this logic
* plumbed into them and the phys code will be explicitly
* handed a VM domain policy to use.
*/
static void
vm_policy_iterator_init(struct vm_domain_iterator *vi)
{
#ifdef VM_NUMA_ALLOC
struct vm_domain_policy lcl;
#endif
vm_domain_iterator_init(vi);
#ifdef VM_NUMA_ALLOC
/* Copy out the thread policy */
vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
if (lcl.p.policy != VM_POLICY_NONE) {
/* Thread policy is present; use it */
vm_domain_iterator_set_policy(vi, &lcl);
return;
}
vm_domain_policy_localcopy(&lcl,
&curthread->td_proc->p_vm_dom_policy);
if (lcl.p.policy != VM_POLICY_NONE) {
/* Process policy is present; use it */
vm_domain_iterator_set_policy(vi, &lcl);
return;
}
#endif
/* Use system default policy */
vm_domain_iterator_set_policy(vi, &vm_default_policy);
}
static void
vm_policy_iterator_finish(struct vm_domain_iterator *vi)
{
vm_domain_iterator_cleanup(vi);
}
boolean_t
vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
{
@ -504,7 +368,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
KASSERT(domain < vm_ndomains,
KASSERT(domain >= 0 && domain < vm_ndomains,
("vm_phys_create_seg: invalid domain provided"));
seg = &vm_phys_segs[vm_phys_nsegs++];
while (seg > vm_phys_segs && (seg - 1)->start >= end) {
@ -736,29 +600,16 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
* The free page queues must be locked.
*/
vm_page_t
vm_phys_alloc_pages(int pool, int order)
vm_phys_alloc_pages(int domain, int pool, int order)
{
vm_page_t m;
int domain, flind;
struct vm_domain_iterator vi;
int flind;
KASSERT(pool < VM_NFREEPOOL,
("vm_phys_alloc_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_pages: order %d is out of range", order));
vm_policy_iterator_init(&vi);
while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
for (flind = 0; flind < vm_nfreelists; flind++) {
m = vm_phys_alloc_domain_pages(domain, flind, pool,
order);
if (m != NULL)
return (m);
}
for (flind = 0; flind < vm_nfreelists; flind++) {
m = vm_phys_alloc_freelist_pages(domain, flind, pool, order);
if (m != NULL)
return (m);
}
vm_policy_iterator_finish(&vi);
return (NULL);
}
@ -770,41 +621,23 @@ vm_phys_alloc_pages(int pool, int order)
* The free page queues must be locked.
*/
vm_page_t
vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
vm_phys_alloc_freelist_pages(int domain, int flind, int pool, int order)
{
struct vm_freelist *alt, *fl;
vm_page_t m;
struct vm_domain_iterator vi;
int domain;
int oind, pind;
KASSERT(freelist < VM_NFREELIST,
KASSERT(domain >= 0 && domain < vm_ndomains,
("vm_phys_alloc_freelist_pages: domain %d is out of range",
domain));
KASSERT(flind < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range",
freelist));
flind));
KASSERT(pool < VM_NFREEPOOL,
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
vm_policy_iterator_init(&vi);
while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
m = vm_phys_alloc_domain_pages(domain,
vm_freelist_to_flind[freelist], pool, order);
if (m != NULL)
return (m);
}
vm_policy_iterator_finish(&vi);
return (NULL);
}
static vm_page_t
vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
{
struct vm_freelist *fl;
struct vm_freelist *alt;
int oind, pind;
vm_page_t m;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
fl = &vm_phys_free_queues[domain][flind][pool][0];
for (oind = order; oind < VM_NFREEORDER; oind++) {
@ -1261,14 +1094,13 @@ vm_phys_unfree_page(vm_page_t m)
* "alignment" and "boundary" must be a power of two.
*/
vm_page_t
vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary)
{
vm_paddr_t pa_end, pa_start;
vm_page_t m_run;
struct vm_domain_iterator vi;
struct vm_phys_seg *seg;
int domain, segind;
int segind;
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
@ -1276,12 +1108,6 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
if (low >= high)
return (NULL);
vm_policy_iterator_init(&vi);
restartdom:
if (vm_domain_iterator_run(&vi, &domain) != 0) {
vm_policy_iterator_finish(&vi);
return (NULL);
}
m_run = NULL;
for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
seg = &vm_phys_segs[segind];
@ -1304,9 +1130,6 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
if (m_run != NULL)
break;
}
if (m_run == NULL && !vm_domain_iterator_isdone(&vi))
goto restartdom;
vm_policy_iterator_finish(&vi);
return (m_run);
}

View File

@ -72,10 +72,11 @@ extern int vm_phys_nsegs;
* The following functions are only to be used by the virtual memory system.
*/
void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary);
vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order);
vm_page_t vm_phys_alloc_pages(int pool, int order);
vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool,
int order);
vm_page_t vm_phys_alloc_pages(int domain, int pool, int order);
boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high);
int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
vm_memattr_t memattr);
@ -92,12 +93,13 @@ boolean_t vm_phys_unfree_page(vm_page_t m);
int vm_phys_mem_affinity(int f, int t);
/*
* vm_phys_domain:
*
* Return the memory domain the page belongs to.
* vm_phys_domidx:
*
* Return the index of the domain the page belongs to.
*/
static inline struct vm_domain *
vm_phys_domain(vm_page_t m)
static inline int
vm_phys_domidx(vm_page_t m)
{
#ifdef VM_NUMA_ALLOC
int domn, segind;
@ -107,12 +109,24 @@ vm_phys_domain(vm_page_t m)
KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m));
domn = vm_phys_segs[segind].domain;
KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
return (&vm_dom[domn]);
return (domn);
#else
return (&vm_dom[0]);
return (0);
#endif
}
/*
* vm_phys_domain:
*
* Return the memory domain the page belongs to.
*/
static inline struct vm_domain *
vm_phys_domain(vm_page_t m)
{
return (&vm_dom[vm_phys_domidx(m)]);
}
static inline u_int
vm_phys_freecnt_adj(vm_page_t m, int adj)
{

View File

@ -170,6 +170,7 @@ struct vm_reserv {
vm_object_t object; /* containing object */
vm_pindex_t pindex; /* offset within object */
vm_page_t pages; /* first page of a superpage */
int domain; /* NUMA domain */
int popcnt; /* # of pages in use */
char inpartpopq;
popmap_t popmap[NPOPMAP]; /* bit vector of used pages */
@ -207,8 +208,7 @@ static vm_reserv_t vm_reserv_array;
*
* Access to this queue is synchronized by the free page queue lock.
*/
static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM];
static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
@ -277,24 +277,27 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
{
struct sbuf sbuf;
vm_reserv_t rv;
int counter, error, level, unused_pages;
int counter, error, domain, level, unused_pages;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
sbuf_printf(&sbuf, "\nLEVEL SIZE NUMBER\n\n");
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
mtx_lock(&vm_page_queue_free_mtx);
TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
sbuf_printf(&sbuf, "\nDOMAIN LEVEL SIZE NUMBER\n\n");
for (domain = 0; domain < vm_ndomains; domain++) {
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
mtx_lock(&vm_page_queue_free_mtx);
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
mtx_unlock(&vm_page_queue_free_mtx);
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
}
mtx_unlock(&vm_page_queue_free_mtx);
sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
}
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
@ -321,8 +324,11 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
index));
KASSERT(rv->popcnt > 0,
("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
} else {
KASSERT(rv->pages->psind == 1,
@ -335,11 +341,12 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
if (rv->popcnt == 0) {
LIST_REMOVE(rv, objq);
rv->object = NULL;
rv->domain = -1;
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
vm_reserv_freed++;
} else {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
}
}
@ -384,15 +391,18 @@ vm_reserv_populate(vm_reserv_t rv, int index)
("vm_reserv_populate: reserv %p is already full", rv));
KASSERT(rv->pages->psind == 0,
("vm_reserv_populate: reserv %p is already promoted", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
popmap_set(rv->popmap, index);
rv->popcnt++;
if (rv->popcnt < VM_LEVEL_0_NPAGES) {
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
} else
rv->pages->psind = 1;
}
@ -413,9 +423,9 @@ vm_reserv_populate(vm_reserv_t rv, int index)
* The object and free page queue must be locked.
*/
vm_page_t
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_page_t mpred)
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred)
{
vm_paddr_t pa, size;
vm_page_t m, m_ret, msucc;
@ -535,7 +545,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
* specified index may not be the first page within the first new
* reservation.
*/
m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment,
m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
if (m == NULL)
return (NULL);
@ -558,6 +568,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object;
rv->pindex = first;
rv->domain = vm_phys_domidx(m);
KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
rv));
@ -613,7 +624,8 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
* The object and free page queue must be locked.
*/
vm_page_t
vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t mpred)
{
vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap;
@ -692,7 +704,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
/*
* Allocate and populate the new reservation.
*/
m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
if (m == NULL)
return (NULL);
rv = vm_reserv_from_page(m);
@ -703,6 +715,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object;
rv->pindex = first;
rv->domain = vm_phys_domidx(m);
KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
KASSERT(!rv->inpartpopq,
@ -749,6 +762,7 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m)
("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
LIST_REMOVE(rv, objq);
rv->object = NULL;
rv->domain = -1;
if (m != NULL) {
/*
* Since the reservation is being broken, there is no harm in
@ -818,7 +832,7 @@ vm_reserv_break_all(vm_object_t object)
KASSERT(rv->object == object,
("vm_reserv_break_all: reserv %p is corrupted", rv));
if (rv->inpartpopq) {
TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
}
vm_reserv_break(rv, NULL);
@ -856,7 +870,7 @@ vm_reserv_init(void)
{
vm_paddr_t paddr;
struct vm_phys_seg *seg;
int segind;
int i, segind;
/*
* Initialize the reservation array. Specifically, initialize the
@ -871,6 +885,8 @@ vm_reserv_init(void)
paddr += VM_LEVEL_0_SIZE;
}
}
for (i = 0; i < MAXMEMDOM; i++)
TAILQ_INIT(&vm_rvq_partpop[i]);
}
/*
@ -928,7 +944,10 @@ vm_reserv_reclaim(vm_reserv_t rv)
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
("vm_reserv_reclaim: reserv %p's domain is corrupted %d",
rv, rv->domain));
TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
rv->inpartpopq = FALSE;
vm_reserv_break(rv, NULL);
vm_reserv_reclaimed++;
@ -942,12 +961,12 @@ vm_reserv_reclaim(vm_reserv_t rv)
* The free page queue lock must be held.
*/
boolean_t
vm_reserv_reclaim_inactive(void)
vm_reserv_reclaim_inactive(int domain)
{
vm_reserv_t rv;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
vm_reserv_reclaim(rv);
return (TRUE);
}
@ -963,8 +982,8 @@ vm_reserv_reclaim_inactive(void)
* The free page queue lock must be held.
*/
boolean_t
vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary)
vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
vm_paddr_t pa, size;
vm_reserv_t rv;
@ -974,7 +993,7 @@ vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE);
size = npages << PAGE_SHIFT;
TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) {
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
if (pa + PAGE_SIZE - size < low) {
/* This entire reservation is too low; go to next. */

View File

@ -48,19 +48,20 @@
* The following functions are only to be used by the virtual memory system.
*/
vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
u_long npages, vm_paddr_t low, vm_paddr_t high,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
vm_page_t mpred);
int domain, vm_page_t mpred);
void vm_reserv_break_all(vm_object_t object);
boolean_t vm_reserv_free_page(vm_page_t m);
void vm_reserv_init(void);
bool vm_reserv_is_page_free(vm_page_t m);
int vm_reserv_level(vm_page_t m);
int vm_reserv_level_iffullpop(vm_page_t m);
boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
boolean_t vm_reserv_reclaim_inactive(void);
boolean_t vm_reserv_reclaim_contig(int domain, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary);
boolean_t vm_reserv_reclaim_inactive(int domain);
void vm_reserv_rename(vm_page_t m, vm_object_t new_object,
vm_object_t old_object, vm_pindex_t old_object_offset);
int vm_reserv_size(int level);