Use per-domain locks for vm page queue free. Move paging control from

global to per-domain state.  Protect reservations with the free lock
from the domain that they belong to.  Refactor to make vm domains more
of a first class object.

Reviewed by:    markj, kib, gallatin
Tested by:      pho
Sponsored by:   Netflix, Dell/EMC Isilon
Differential Revision:  https://reviews.freebsd.org/D14000
This commit is contained in:
Jeff Roberson 2018-02-06 22:10:07 +00:00
parent 1616767dfc
commit e2068d0bcd
37 changed files with 1268 additions and 631 deletions

View File

@ -282,7 +282,7 @@ cpu_startup(dummy)
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv); freeenv(sysenv);
} }
if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem); memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize); realmem = atop(memsize);
@ -309,8 +309,8 @@ cpu_startup(dummy)
vm_ksubmap_init(&kmi); vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n", printf("avail memory = %ju (%ju MB)\n",
ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_free_count()),
ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); ptoa((uintmax_t)vm_free_count()) / 1048576);
/* /*
* Set up buffers, so they can be used to read disk labels. * Set up buffers, so they can be used to read disk labels.

View File

@ -228,8 +228,8 @@ cpu_startup(void *dummy)
(uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem),
(uintmax_t)arm32_ptob(realmem) / mbyte); (uintmax_t)arm32_ptob(realmem) / mbyte);
printf("avail memory = %ju (%ju MB)\n", printf("avail memory = %ju (%ju MB)\n",
(uintmax_t)arm32_ptob(vm_cnt.v_free_count), (uintmax_t)arm32_ptob(vm_free_count()),
(uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
if (bootverbose) { if (bootverbose) {
arm_physmem_print_tables(); arm_physmem_print_tables();
devmap_print_table(); devmap_print_table();

View File

@ -3817,7 +3817,7 @@ pmap_get_pv_entry(void)
pv_entry_count++; pv_entry_count++;
if (pv_entry_count > pv_entry_high_water) if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup(); pagedaemon_wakeup(0); /* XXX ARM NUMA */
ret_value = uma_zalloc(pvzone, M_NOWAIT); ret_value = uma_zalloc(pvzone, M_NOWAIT);
return ret_value; return ret_value;
} }

View File

@ -78,7 +78,7 @@ void kmem_reap(void);
int kmem_debugging(void); int kmem_debugging(void);
void *calloc(size_t n, size_t s); void *calloc(size_t n, size_t s);
#define freemem vm_cnt.v_free_count #define freemem vm_free_count()
#define minfree vm_cnt.v_free_min #define minfree vm_cnt.v_free_min
#define heap_arena kernel_arena #define heap_arena kernel_arena
#define zio_arena NULL #define zio_arena NULL

View File

@ -379,7 +379,7 @@ static void
arc_free_target_init(void *unused __unused) arc_free_target_init(void *unused __unused)
{ {
zfs_arc_free_target = vm_pageout_wakeup_thresh; zfs_arc_free_target = (vm_cnt.v_free_min / 10) * 11;
} }
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
arc_free_target_init, NULL); arc_free_target_init, NULL);

View File

@ -156,7 +156,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS)
/* /*
* The correct thing here would be: * The correct thing here would be:
* *
memfree = vm_cnt.v_free_count * PAGE_SIZE; memfree = vm_free_count() * PAGE_SIZE;
memused = memtotal - memfree; memused = memtotal - memfree;
* *
* but it might mislead linux binaries into thinking there * but it might mislead linux binaries into thinking there
@ -178,7 +178,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS)
* like unstaticizing it just for linprocfs's sake. * like unstaticizing it just for linprocfs's sake.
*/ */
buffers = 0; buffers = 0;
cached = vm_cnt.v_inactive_count * PAGE_SIZE; cached = vm_inactive_count() * PAGE_SIZE;
sbuf_printf(sb, sbuf_printf(sb,
"MemTotal: %9lu kB\n" "MemTotal: %9lu kB\n"

View File

@ -106,7 +106,7 @@ tmpfs_mem_avail(void)
{ {
vm_ooffset_t avail; vm_ooffset_t avail;
avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved; avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
if (__predict_false(avail < 0)) if (__predict_false(avail < 0))
avail = 0; avail = 0;
return (avail); return (avail);

View File

@ -271,7 +271,7 @@ cpu_startup(dummy)
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv); freeenv(sysenv);
} }
if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem); memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize); realmem = atop(memsize);
@ -298,8 +298,8 @@ cpu_startup(dummy)
vm_ksubmap_init(&kmi); vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n", printf("avail memory = %ju (%ju MB)\n",
ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_free_count()),
ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); ptoa((uintmax_t)vm_free_count()) / 1048576);
/* /*
* Set up buffers, so they can be used to read disk labels. * Set up buffers, so they can be used to read disk labels.

View File

@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h> #include <vm/vm.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h> #include <vm/pmap.h>
#include <vm/vm_map.h> #include <vm/vm_map.h>
#include <sys/copyright.h> #include <sys/copyright.h>
@ -555,7 +556,7 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
/* Cast to avoid overflow on i386/PAE. */ /* Cast to avoid overflow on i386/PAE. */
pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count); pageablemem = ptoa((vm_paddr_t)vm_free_count());
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;

View File

@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/taskqueue.h> #include <sys/taskqueue.h>
#include <sys/vmem.h> #include <sys/vmem.h>
#include <sys/vmmeter.h>
#include "opt_vm.h" #include "opt_vm.h"
@ -72,6 +73,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/uma_int.h> #include <vm/uma_int.h>
int vmem_startup_count(void); int vmem_startup_count(void);
@ -644,7 +647,7 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
* possible due to M_USE_RESERVE page allocation. * possible due to M_USE_RESERVE page allocation.
*/ */
if (wait & M_WAITOK) if (wait & M_WAITOK)
VM_WAIT; vm_wait_domain(domain);
return (NULL); return (NULL);
} }
mtx_unlock(&vmem_bt_lock); mtx_unlock(&vmem_bt_lock);

View File

@ -139,7 +139,7 @@ __FBSDID("$FreeBSD$");
#define WITNESS_COUNT 1536 #define WITNESS_COUNT 1536
#endif #endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
#define WITNESS_PENDLIST (2048 + MAXCPU) #define WITNESS_PENDLIST (2048 + (MAXCPU * 4))
/* Allocate 256 KB of stack data space */ /* Allocate 256 KB of stack data space */
#define WITNESS_LO_DATA_COUNT 2048 #define WITNESS_LO_DATA_COUNT 2048

View File

@ -210,8 +210,8 @@ cpu_startup(void *dummy)
vm_ksubmap_init(&kmi); vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%juMB)\n", printf("avail memory = %ju (%juMB)\n",
ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_free_count()),
ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); ptoa((uintmax_t)vm_free_count()) / 1048576);
cpu_init_interrupts(); cpu_init_interrupts();
/* /*

View File

@ -1190,7 +1190,7 @@ pv_alloc(void)
pv_entry_count++; pv_entry_count++;
if (pv_entry_count > pv_entry_high_water) if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup(); pagedaemon_wakeup(0); /* XXX powerpc NUMA */
pv = uma_zalloc(pvzone, M_NOWAIT); pv = uma_zalloc(pvzone, M_NOWAIT);
return (pv); return (pv);

View File

@ -221,8 +221,8 @@ cpu_startup(void *dummy)
vm_ksubmap_init(&kmi); vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n", printf("avail memory = %ju (%ju MB)\n",
ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_free_count()),
ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); ptoa((uintmax_t)vm_free_count()) / 1048576);
/* /*
* Set up buffers, so they can be used to read disk labels. * Set up buffers, so they can be used to read disk labels.

View File

@ -190,8 +190,8 @@ cpu_startup(void *arg)
EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL,
SHUTDOWN_PRI_LAST); SHUTDOWN_PRI_LAST);
printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE, printf("avail memory = %lu (%lu MB)\n", vm_free_count() * PAGE_SIZE,
vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); vm_free_count() / ((1024 * 1024) / PAGE_SIZE));
if (bootverbose) if (bootverbose)
printf("machine: %s\n", sparc64_model); printf("machine: %s\n", sparc64_model);

View File

@ -140,23 +140,23 @@ struct vmmeter {
u_int v_interrupt_free_min; /* (c) reserved pages for int code */ u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe; /* (c) severe page depletion point */ u_int v_free_severe; /* (c) severe page depletion point */
u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */ u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */
u_int v_active_count VMMETER_ALIGNED; /* (a) pages active */
u_int v_inactive_count VMMETER_ALIGNED; /* (a) pages inactive */
u_int v_laundry_count VMMETER_ALIGNED; /* (a) pages eligible for
laundering */
u_int v_free_count VMMETER_ALIGNED; /* (f) pages free */
}; };
#endif /* _KERNEL || _WANT_VMMETER */ #endif /* _KERNEL || _WANT_VMMETER */
#ifdef _KERNEL #ifdef _KERNEL
#include <sys/domainset.h>
extern struct vmmeter vm_cnt; extern struct vmmeter vm_cnt;
extern u_int vm_pageout_wakeup_thresh; extern domainset_t vm_min_domains;
extern domainset_t vm_severe_domains;
#define VM_CNT_ADD(var, x) counter_u64_add(vm_cnt.var, x) #define VM_CNT_ADD(var, x) counter_u64_add(vm_cnt.var, x)
#define VM_CNT_INC(var) VM_CNT_ADD(var, 1) #define VM_CNT_INC(var) VM_CNT_ADD(var, 1)
#define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var) #define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var)
u_int vm_free_count(void);
/* /*
* Return TRUE if we are under our severe low-free-pages threshold * Return TRUE if we are under our severe low-free-pages threshold
* *
@ -167,7 +167,7 @@ static inline int
vm_page_count_severe(void) vm_page_count_severe(void)
{ {
return (vm_cnt.v_free_severe > vm_cnt.v_free_count); return (!DOMAINSET_EMPTY(&vm_severe_domains));
} }
/* /*
@ -183,50 +183,8 @@ static inline int
vm_page_count_min(void) vm_page_count_min(void)
{ {
return (vm_cnt.v_free_min > vm_cnt.v_free_count); return (!DOMAINSET_EMPTY(&vm_min_domains));
} }
/*
* Return TRUE if we have not reached our free page target during
* free page recovery operations.
*/
static inline int
vm_page_count_target(void)
{
return (vm_cnt.v_free_target > vm_cnt.v_free_count);
}
/*
* Return the number of pages we need to free-up or cache
* A positive number indicates that we do not have enough free pages.
*/
static inline int
vm_paging_target(void)
{
return (vm_cnt.v_free_target - vm_cnt.v_free_count);
}
/*
* Returns TRUE if the pagedaemon needs to be woken up.
*/
static inline int
vm_paging_needed(u_int free_count)
{
return (free_count < vm_pageout_wakeup_thresh);
}
/*
* Return the number of pages we need to launder.
* A positive number indicates that we have a shortfall of clean pages.
*/
static inline int
vm_laundry_target(void)
{
return (vm_paging_target());
}
#endif /* _KERNEL */ #endif /* _KERNEL */
#endif /* _SYS_VMMETER_H_ */ #endif /* _SYS_VMMETER_H_ */

View File

@ -2327,7 +2327,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
* of data we will have to page back in, plus an epsilon so * of data we will have to page back in, plus an epsilon so
* the system doesn't become critically low on swap space. * the system doesn't become critically low on swap space.
*/ */
if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat) if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat)
return (ENOMEM); return (ENOMEM);
/* /*

View File

@ -3464,7 +3464,7 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
slab->us_data = (void *)addr; slab->us_data = (void *)addr;
slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC; slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
slab->us_size = size; slab->us_size = size;
slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE( slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
pmap_kextract(addr))); pmap_kextract(addr)));
uma_total_inc(size); uma_total_inc(size);
} else { } else {

View File

@ -122,5 +122,9 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
void vm_imgact_unmap_page(struct sf_buf *sf); void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td); void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages); int vm_thread_new(struct thread *td, int pages);
u_int vm_active_count(void);
u_int vm_inactive_count(void);
u_int vm_laundry_count(void);
u_int vm_wait_count(void);
#endif /* _KERNEL */ #endif /* _KERNEL */
#endif /* !_VM_EXTERN_H_ */ #endif /* !_VM_EXTERN_H_ */

View File

@ -552,7 +552,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
} }
while (vm_page_count_severe()) { while (vm_page_count_severe()) {
VM_WAIT; vm_wait_severe();
} }
if ((flags & RFMEM) == 0) { if ((flags & RFMEM) == 0) {

View File

@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_map.h> #include <vm/vm_map.h>
#include <vm/vm_pager.h> #include <vm/vm_pager.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>

View File

@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h> #include <vm/vm_radix.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#include <vm/uma.h> #include <vm/uma.h>
@ -196,7 +197,7 @@ kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
if (!vm_page_reclaim_contig_domain(domain, if (!vm_page_reclaim_contig_domain(domain,
pflags, 1, low, high, PAGE_SIZE, 0) && pflags, 1, low, high, PAGE_SIZE, 0) &&
(flags & M_WAITOK) != 0) (flags & M_WAITOK) != 0)
VM_WAIT; vm_wait_domain(domain);
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
tries++; tries++;
goto retry; goto retry;
@ -205,9 +206,9 @@ kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
vmem_free(vmem, addr, size); vmem_free(vmem, addr, size);
return (0); return (0);
} }
KASSERT(vm_phys_domidx(m) == domain, KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_attr_domain: Domain mismatch %d != %d", ("kmem_alloc_attr_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain)); vm_phys_domain(m), domain));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m); pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
@ -280,7 +281,7 @@ kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
if (!vm_page_reclaim_contig_domain(domain, pflags, if (!vm_page_reclaim_contig_domain(domain, pflags,
npages, low, high, alignment, boundary) && npages, low, high, alignment, boundary) &&
(flags & M_WAITOK) != 0) (flags & M_WAITOK) != 0)
VM_WAIT; vm_wait_domain(domain);
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
tries++; tries++;
goto retry; goto retry;
@ -288,9 +289,9 @@ kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
vmem_free(vmem, addr, size); vmem_free(vmem, addr, size);
return (0); return (0);
} }
KASSERT(vm_phys_domidx(m) == domain, KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_contig_domain: Domain mismatch %d != %d", ("kmem_alloc_contig_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain)); vm_phys_domain(m), domain));
end_m = m + npages; end_m = m + npages;
tmp = addr; tmp = addr;
for (; m < end_m; m++) { for (; m < end_m; m++) {
@ -452,9 +453,9 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
kmem_unback(object, addr, i); kmem_unback(object, addr, i);
return (KERN_NO_SPACE); return (KERN_NO_SPACE);
} }
KASSERT(vm_phys_domidx(m) == domain, KASSERT(vm_phys_domain(m) == domain,
("kmem_back_domain: Domain mismatch %d != %d", ("kmem_back_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain)); vm_phys_domain(m), domain));
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m); pmap_zero_page(m);
KASSERT((m->oflags & VPO_UNMANAGED) != 0, KASSERT((m->oflags & VPO_UNMANAGED) != 0,
@ -514,7 +515,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
end = offset + size; end = offset + size;
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
m = vm_page_lookup(object, atop(offset)); m = vm_page_lookup(object, atop(offset));
domain = vm_phys_domidx(m); domain = vm_phys_domain(m);
for (; offset < end; offset += PAGE_SIZE, m = next) { for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m); next = vm_page_next(m);
vm_page_unwire(m, PQ_NONE); vm_page_unwire(m, PQ_NONE);

View File

@ -2011,7 +2011,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
* free pages allocating pv entries. * free pages allocating pv entries.
*/ */
if (((flags & MAP_PREFAULT_MADVISE) != 0 && if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
vm_cnt.v_free_count < vm_cnt.v_free_reserved) || vm_page_count_severe()) ||
((flags & MAP_PREFAULT_PARTIAL) != 0 && ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
tmpidx >= threshold)) { tmpidx >= threshold)) {
psize = tmpidx; psize = tmpidx;

View File

@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/pmap.h> #include <vm/pmap.h>
#include <vm/vm_map.h> #include <vm/vm_map.h>
#include <vm/vm_object.h> #include <vm/vm_object.h>
@ -213,9 +215,6 @@ vmtotal(SYSCTL_HANDLER_ARGS)
total.t_dw++; total.t_dw++;
else else
total.t_sl++; total.t_sl++;
if (td->td_wchan ==
&vm_cnt.v_free_count)
total.t_pw++;
} }
break; break;
case TDS_CAN_RUN: case TDS_CAN_RUN:
@ -283,7 +282,8 @@ vmtotal(SYSCTL_HANDLER_ARGS)
} }
} }
mtx_unlock(&vm_object_list_mtx); mtx_unlock(&vm_object_list_mtx);
total.t_free = vm_cnt.v_free_count; total.t_pw = vm_wait_count();
total.t_free = vm_free_count();
#if defined(COMPAT_FREEBSD11) #if defined(COMPAT_FREEBSD11)
/* sysctl(8) allocates twice as much memory as reported by sysctl(3) */ /* sysctl(8) allocates twice as much memory as reported by sysctl(3) */
if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen == if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen ==
@ -339,7 +339,7 @@ sysctl_handle_vmstat(SYSCTL_HANDLER_ARGS)
#define VM_STATS(parent, var, descr) \ #define VM_STATS(parent, var, descr) \
SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \ SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \
CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr); CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr) #define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr) #define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
@ -379,19 +379,36 @@ VM_STATS_VM(v_vforkpages, "VM pages affected by vfork()");
VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()"); VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel"); VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
static int
sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS)
{
u_int (*fn)(void);
uint32_t val;
fn = arg1;
val = fn();
return (SYSCTL_OUT(req, &val, sizeof(val)));
}
#define VM_STATS_PROC(var, descr, fn) \
SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \
CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr)
#define VM_STATS_UINT(var, descr) \ #define VM_STATS_UINT(var, descr) \
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr)
VM_STATS_UINT(v_page_size, "Page size in bytes"); VM_STATS_UINT(v_page_size, "Page size in bytes");
VM_STATS_UINT(v_page_count, "Total number of pages in system"); VM_STATS_UINT(v_page_count, "Total number of pages in system");
VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock"); VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
VM_STATS_UINT(v_free_target, "Pages desired free"); VM_STATS_UINT(v_free_target, "Pages desired free");
VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold"); VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
VM_STATS_UINT(v_free_count, "Free pages"); VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
VM_STATS_UINT(v_wire_count, "Wired pages"); VM_STATS_UINT(v_wire_count, "Wired pages");
VM_STATS_UINT(v_active_count, "Active pages"); VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
VM_STATS_UINT(v_inactive_target, "Desired inactive pages"); VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
VM_STATS_UINT(v_inactive_count, "Inactive pages"); VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
VM_STATS_UINT(v_laundry_count, "Pages eligible for laundering"); VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering",
vm_laundry_count);
VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel"); VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code"); VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code");
VM_STATS_UINT(v_free_severe, "Severe page depletion point"); VM_STATS_UINT(v_free_severe, "Severe page depletion point");
@ -406,3 +423,52 @@ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_cache_count, CTLFLAG_RD,
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD, SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD,
SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility"); SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
#endif #endif
u_int
vm_free_count(void)
{
u_int v;
int i;
v = 0;
for (i = 0; i < vm_ndomains; i++)
v += vm_dom[i].vmd_free_count;
return (v);
}
static
u_int
vm_pagequeue_count(int pq)
{
u_int v;
int i;
v = 0;
for (i = 0; i < vm_ndomains; i++)
v += vm_dom[i].vmd_pagequeues[pq].pq_cnt;
return (v);
}
u_int
vm_active_count(void)
{
return vm_pagequeue_count(PQ_ACTIVE);
}
u_int
vm_inactive_count(void)
{
return vm_pagequeue_count(PQ_INACTIVE);
}
u_int
vm_laundry_count(void)
{
return vm_pagequeue_count(PQ_LAUNDRY);
}

View File

@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_pager.h> #include <vm/vm_pager.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h> #include <vm/swap_pager.h>
#include <vm/vm_kern.h> #include <vm/vm_kern.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>

View File

@ -297,6 +297,17 @@ vm_object_color(vm_object_t object, u_short color)
} }
} }
static __inline bool
vm_object_reserv(vm_object_t object)
{
if (object != NULL &&
(object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) {
return (true);
}
return (false);
}
void vm_object_clear_flag(vm_object_t object, u_short bits); void vm_object_clear_flag(vm_object_t object, u_short bits);
void vm_object_pip_add(vm_object_t object, short i); void vm_object_pip_add(vm_object_t object, short i);
void vm_object_pip_subtract(vm_object_t object, short i); void vm_object_pip_subtract(vm_object_t object, short i);

View File

@ -116,8 +116,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_pager.h>
#include <vm/vm_radix.h> #include <vm/vm_radix.h>
#include <vm/vm_reserv.h> #include <vm/vm_reserv.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
@ -136,9 +137,15 @@ extern int vmem_startup_count(void);
*/ */
struct vm_domain vm_dom[MAXMEMDOM]; struct vm_domain vm_dom[MAXMEMDOM];
struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx;
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
domainset_t __exclusive_cache_line vm_min_domains;
domainset_t __exclusive_cache_line vm_severe_domains;
static int vm_min_waiters;
static int vm_severe_waiters;
static int vm_pageproc_waiters;
/* /*
* bogus page -- for I/O to/from partially complete buffers, * bogus page -- for I/O to/from partially complete buffers,
@ -164,24 +171,22 @@ static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
/* Is the page daemon waiting for free pages? */
static int vm_pageout_pages_needed;
static uma_zone_t fakepg_zone; static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m); static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_enqueue(uint8_t queue, vm_page_t m);
static void vm_page_free_phys(vm_page_t m); static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m);
static void vm_page_free_wakeup(void);
static void vm_page_init(void *dummy); static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object, static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred); vm_pindex_t pindex, vm_page_t mpred);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
vm_page_t mpred); vm_page_t mpred);
static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
vm_paddr_t high); vm_page_t m_run, vm_paddr_t high);
static int vm_page_alloc_fail(vm_object_t object, int req); static void vm_domain_free_wakeup(struct vm_domain *);
static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
int req);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
@ -318,6 +323,7 @@ vm_page_blacklist_next(char **list, char *end)
static void static void
vm_page_blacklist_check(char *list, char *end) vm_page_blacklist_check(char *list, char *end)
{ {
struct vm_domain *vmd;
vm_paddr_t pa; vm_paddr_t pa;
vm_page_t m; vm_page_t m;
char *next; char *next;
@ -330,9 +336,10 @@ vm_page_blacklist_check(char *list, char *end)
m = vm_phys_paddr_to_vm_page(pa); m = vm_phys_paddr_to_vm_page(pa);
if (m == NULL) if (m == NULL)
continue; continue;
mtx_lock(&vm_page_queue_free_mtx); vmd = vm_pagequeue_domain(m);
vm_domain_free_lock(vmd);
ret = vm_phys_unfree_page(m); ret = vm_phys_unfree_page(m);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
if (ret == TRUE) { if (ret == TRUE) {
TAILQ_INSERT_TAIL(&blacklist_head, m, listq); TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
if (bootverbose) if (bootverbose)
@ -395,28 +402,23 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
} }
static void static void
vm_page_domain_init(struct vm_domain *vmd) vm_page_domain_init(int domain)
{ {
struct vm_domain *vmd;
struct vm_pagequeue *pq; struct vm_pagequeue *pq;
int i; int i;
vmd = VM_DOMAIN(domain);
bzero(vmd, sizeof(*vmd));
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
"vm inactive pagequeue"; "vm inactive pagequeue";
*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
&vm_cnt.v_inactive_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
"vm active pagequeue"; "vm active pagequeue";
*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
&vm_cnt.v_active_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
"vm laundry pagequeue"; "vm laundry pagequeue";
*__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
&vm_cnt.v_laundry_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
"vm unswappable pagequeue"; "vm unswappable pagequeue";
/* Unswappable dirty pages are counted as being in the laundry. */ vmd->vmd_domain = domain;
*__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) =
&vm_cnt.v_laundry_count;
vmd->vmd_page_count = 0; vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0; vmd->vmd_free_count = 0;
vmd->vmd_segs = 0; vmd->vmd_segs = 0;
@ -427,6 +429,7 @@ vm_page_domain_init(struct vm_domain *vmd)
mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
MTX_DEF | MTX_DUPOK); MTX_DEF | MTX_DUPOK);
} }
mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
} }
/* /*
@ -463,7 +466,6 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
vm_offset_t vm_offset_t
vm_page_startup(vm_offset_t vaddr) vm_page_startup(vm_offset_t vaddr)
{ {
struct vm_domain *vmd;
struct vm_phys_seg *seg; struct vm_phys_seg *seg;
vm_page_t m; vm_page_t m;
char *list, *listend; char *list, *listend;
@ -494,11 +496,11 @@ vm_page_startup(vm_offset_t vaddr)
/* /*
* Initialize the page and queue locks. * Initialize the page and queue locks.
*/ */
mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
for (i = 0; i < PA_LOCK_COUNT; i++) for (i = 0; i < PA_LOCK_COUNT; i++)
mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
for (i = 0; i < vm_ndomains; i++) for (i = 0; i < vm_ndomains; i++)
vm_page_domain_init(&vm_dom[i]); vm_page_domain_init(i);
/* /*
* Allocate memory for use when boot strapping the kernel memory * Allocate memory for use when boot strapping the kernel memory
@ -704,7 +706,6 @@ vm_page_startup(vm_offset_t vaddr)
* physical memory allocator's free lists. * physical memory allocator's free lists.
*/ */
vm_cnt.v_page_count = 0; vm_cnt.v_page_count = 0;
vm_cnt.v_free_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) { for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind]; seg = &vm_phys_segs[segind];
for (m = seg->first_page, pa = seg->start; pa < seg->end; for (m = seg->first_page, pa = seg->start; pa < seg->end;
@ -719,6 +720,8 @@ vm_page_startup(vm_offset_t vaddr)
* or doesn't overlap any of them. * or doesn't overlap any of them.
*/ */
for (i = 0; phys_avail[i + 1] != 0; i += 2) { for (i = 0; phys_avail[i + 1] != 0; i += 2) {
struct vm_domain *vmd;
if (seg->start < phys_avail[i] || if (seg->start < phys_avail[i] ||
seg->end > phys_avail[i + 1]) seg->end > phys_avail[i + 1])
continue; continue;
@ -726,13 +729,14 @@ vm_page_startup(vm_offset_t vaddr)
m = seg->first_page; m = seg->first_page;
pagecount = (u_long)atop(seg->end - seg->start); pagecount = (u_long)atop(seg->end - seg->start);
mtx_lock(&vm_page_queue_free_mtx); vmd = VM_DOMAIN(seg->domain);
vm_domain_free_lock(vmd);
vm_phys_free_contig(m, pagecount); vm_phys_free_contig(m, pagecount);
vm_phys_freecnt_adj(m, (int)pagecount); vm_domain_freecnt_adj(vmd, (int)pagecount);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
vm_cnt.v_page_count += (u_int)pagecount; vm_cnt.v_page_count += (u_int)pagecount;
vmd = &vm_dom[seg->domain]; vmd = VM_DOMAIN(seg->domain);
vmd->vmd_page_count += (u_int)pagecount; vmd->vmd_page_count += (u_int)pagecount;
vmd->vmd_segs |= 1UL << m->segind; vmd->vmd_segs |= 1UL << m->segind;
break; break;
@ -1657,12 +1661,40 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
return (m); return (m);
} }
/*
* Returns true if the number of free pages exceeds the minimum
* for the request class and false otherwise.
*/
int
vm_domain_available(struct vm_domain *vmd, int req, int npages)
{
vm_domain_free_assert_locked(vmd);
req = req & VM_ALLOC_CLASS_MASK;
/*
* The page daemon is allowed to dig deeper into the free page list.
*/
if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
req = VM_ALLOC_SYSTEM;
if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
(req == VM_ALLOC_SYSTEM &&
vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
(req == VM_ALLOC_INTERRUPT &&
vmd->vmd_free_count >= npages))
return (1);
return (0);
}
vm_page_t vm_page_t
vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
int req, vm_page_t mpred) int req, vm_page_t mpred)
{ {
struct vm_domain *vmd;
vm_page_t m; vm_page_t m;
int flags, req_class; int flags;
u_int free_count; u_int free_count;
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@ -1678,34 +1710,27 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
if (object != NULL) if (object != NULL)
VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object);
req_class = req & VM_ALLOC_CLASS_MASK;
/*
* The page daemon is allowed to dig deeper into the free page list.
*/
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
/*
* Allocate a page if the number of free pages exceeds the minimum
* for the request class.
*/
again: again:
m = NULL; m = NULL;
mtx_lock(&vm_page_queue_free_mtx); #if VM_NRESERVLEVEL > 0
if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || if (vm_object_reserv(object) &&
(req_class == VM_ALLOC_SYSTEM && (m = vm_reserv_extend(req, object, pindex, domain, mpred))
vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || != NULL) {
(req_class == VM_ALLOC_INTERRUPT && domain = vm_phys_domain(m);
vm_cnt.v_free_count > 0)) { vmd = VM_DOMAIN(domain);
goto found;
}
#endif
vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
if (vm_domain_available(vmd, req, 1)) {
/* /*
* Can we allocate the page from a reservation? * Can we allocate the page from a reservation?
*/ */
#if VM_NRESERVLEVEL > 0 #if VM_NRESERVLEVEL > 0
if (object == NULL || (object->flags & (OBJ_COLORED | if (!vm_object_reserv(object) ||
OBJ_FICTITIOUS)) != OBJ_COLORED || (m = (m = vm_reserv_alloc_page(object, pindex,
vm_reserv_alloc_page(object, pindex, domain, domain, mpred)) == NULL)
mpred)) == NULL)
#endif #endif
{ {
/* /*
@ -1727,7 +1752,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
/* /*
* Not allocatable, give up. * Not allocatable, give up.
*/ */
if (vm_page_alloc_fail(object, req)) if (vm_domain_alloc_fail(vmd, object, req))
goto again; goto again;
return (NULL); return (NULL);
} }
@ -1736,8 +1761,18 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
* At this point we had better have found a good page. * At this point we had better have found a good page.
*/ */
KASSERT(m != NULL, ("missing page")); KASSERT(m != NULL, ("missing page"));
free_count = vm_phys_freecnt_adj(m, -1); free_count = vm_domain_freecnt_adj(vmd, -1);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
/*
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
if (vm_paging_needed(vmd, free_count))
pagedaemon_wakeup(vmd->vmd_domain);
#if VM_NRESERVLEVEL > 0
found:
#endif
vm_page_alloc_check(m); vm_page_alloc_check(m);
/* /*
@ -1770,7 +1805,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
if (object != NULL) { if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) { if (vm_page_insert_after(m, object, pindex, mpred)) {
pagedaemon_wakeup(); pagedaemon_wakeup(domain);
if (req & VM_ALLOC_WIRED) { if (req & VM_ALLOC_WIRED) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1); atomic_subtract_int(&vm_cnt.v_wire_count, 1);
m->wire_count = 0; m->wire_count = 0;
@ -1795,13 +1830,6 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
} else } else
m->pindex = pindex; m->pindex = pindex;
/*
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
if (vm_paging_needed(free_count))
pagedaemon_wakeup();
return (m); return (m);
} }
@ -1869,9 +1897,9 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr) vm_paddr_t boundary, vm_memattr_t memattr)
{ {
struct vm_domain *vmd;
vm_page_t m, m_ret, mpred; vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags; u_int busy_lock, flags, oflags;
int req_class;
mpred = NULL; /* XXX: pacify gcc */ mpred = NULL; /* XXX: pacify gcc */
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@ -1889,13 +1917,6 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
object)); object));
} }
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
req_class = req & VM_ALLOC_CLASS_MASK;
/*
* The page daemon is allowed to dig deeper into the free page list.
*/
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
if (object != NULL) { if (object != NULL) {
mpred = vm_radix_lookup_le(&object->rtree, pindex); mpred = vm_radix_lookup_le(&object->rtree, pindex);
@ -1908,19 +1929,25 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
* below the lower bound for the allocation class? * below the lower bound for the allocation class?
*/ */
again: again:
#if VM_NRESERVLEVEL > 0
if (vm_object_reserv(object) &&
(m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
npages, low, high, alignment, boundary, mpred)) != NULL) {
domain = vm_phys_domain(m_ret);
vmd = VM_DOMAIN(domain);
goto found;
}
#endif
m_ret = NULL; m_ret = NULL;
mtx_lock(&vm_page_queue_free_mtx); vmd = VM_DOMAIN(domain);
if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || vm_domain_free_lock(vmd);
(req_class == VM_ALLOC_SYSTEM && if (vm_domain_available(vmd, req, npages)) {
vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) ||
(req_class == VM_ALLOC_INTERRUPT &&
vm_cnt.v_free_count >= npages)) {
/* /*
* Can we allocate the pages from a reservation? * Can we allocate the pages from a reservation?
*/ */
#if VM_NRESERVLEVEL > 0 #if VM_NRESERVLEVEL > 0
retry: retry:
if (object == NULL || (object->flags & OBJ_COLORED) == 0 || if (!vm_object_reserv(object) ||
(m_ret = vm_reserv_alloc_contig(object, pindex, domain, (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
npages, low, high, alignment, boundary, mpred)) == NULL) npages, low, high, alignment, boundary, mpred)) == NULL)
#endif #endif
@ -1936,12 +1963,15 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
#endif #endif
} }
if (m_ret == NULL) { if (m_ret == NULL) {
if (vm_page_alloc_fail(object, req)) if (vm_domain_alloc_fail(vmd, object, req))
goto again; goto again;
return (NULL); return (NULL);
} }
vm_phys_freecnt_adj(m_ret, -npages); vm_domain_freecnt_adj(vmd, -npages);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
#if VM_NRESERVLEVEL > 0
found:
#endif
for (m = m_ret; m < &m_ret[npages]; m++) for (m = m_ret; m < &m_ret[npages]; m++)
vm_page_alloc_check(m); vm_page_alloc_check(m);
@ -1977,7 +2007,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
m->oflags = oflags; m->oflags = oflags;
if (object != NULL) { if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) { if (vm_page_insert_after(m, object, pindex, mpred)) {
pagedaemon_wakeup(); pagedaemon_wakeup(domain);
if ((req & VM_ALLOC_WIRED) != 0) if ((req & VM_ALLOC_WIRED) != 0)
atomic_subtract_int( atomic_subtract_int(
&vm_cnt.v_wire_count, npages); &vm_cnt.v_wire_count, npages);
@ -2007,8 +2037,9 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
pmap_page_set_memattr(m, memattr); pmap_page_set_memattr(m, memattr);
pindex++; pindex++;
} }
if (vm_paging_needed(vm_cnt.v_free_count)) vmd = VM_DOMAIN(domain);
pagedaemon_wakeup(); if (vm_paging_needed(vmd, vmd->vmd_free_count))
pagedaemon_wakeup(domain);
return (m_ret); return (m_ret);
} }
@ -2070,37 +2101,26 @@ vm_page_alloc_freelist(int freelist, int req)
vm_page_t vm_page_t
vm_page_alloc_freelist_domain(int domain, int freelist, int req) vm_page_alloc_freelist_domain(int domain, int freelist, int req)
{ {
struct vm_domain *vmd;
vm_page_t m; vm_page_t m;
u_int flags, free_count; u_int flags, free_count;
int req_class;
req_class = req & VM_ALLOC_CLASS_MASK;
/*
* The page daemon is allowed to dig deeper into the free page list.
*/
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
/* /*
* Do not allocate reserved pages unless the req has asked for it. * Do not allocate reserved pages unless the req has asked for it.
*/ */
vmd = VM_DOMAIN(domain);
again: again:
mtx_lock(&vm_page_queue_free_mtx); vm_domain_free_lock(vmd);
if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || if (vm_domain_available(vmd, req, 1))
(req_class == VM_ALLOC_SYSTEM &&
vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
(req_class == VM_ALLOC_INTERRUPT &&
vm_cnt.v_free_count > 0))
m = vm_phys_alloc_freelist_pages(domain, freelist, m = vm_phys_alloc_freelist_pages(domain, freelist,
VM_FREEPOOL_DIRECT, 0); VM_FREEPOOL_DIRECT, 0);
if (m == NULL) { if (m == NULL) {
if (vm_page_alloc_fail(NULL, req)) if (vm_domain_alloc_fail(vmd, NULL, req))
goto again; goto again;
return (NULL); return (NULL);
} }
free_count = vm_phys_freecnt_adj(m, -1); free_count = vm_domain_freecnt_adj(vmd, -1);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
vm_page_alloc_check(m); vm_page_alloc_check(m);
/* /*
@ -2121,8 +2141,8 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req)
} }
/* Unmanaged pages don't use "act_count". */ /* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED; m->oflags = VPO_UNMANAGED;
if (vm_paging_needed(free_count)) if (vm_paging_needed(vmd, free_count))
pagedaemon_wakeup(); pagedaemon_wakeup(domain);
return (m); return (m);
} }
@ -2344,9 +2364,10 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
* "req_class" must be an allocation class. * "req_class" must be an allocation class.
*/ */
static int static int
vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
vm_paddr_t high) vm_paddr_t high)
{ {
struct vm_domain *vmd;
struct mtx *m_mtx; struct mtx *m_mtx;
struct spglist free; struct spglist free;
vm_object_t object; vm_object_t object;
@ -2496,7 +2517,9 @@ vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
unlock: unlock:
VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(object);
} else { } else {
mtx_lock(&vm_page_queue_free_mtx); MPASS(vm_phys_domain(m) == domain);
vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
order = m->order; order = m->order;
if (order < VM_NFREEORDER) { if (order < VM_NFREEORDER) {
/* /*
@ -2513,7 +2536,7 @@ vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
else if (vm_reserv_is_page_free(m)) else if (vm_reserv_is_page_free(m))
order = 0; order = 0;
#endif #endif
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
if (order == VM_NFREEORDER) if (order == VM_NFREEORDER)
error = EINVAL; error = EINVAL;
} }
@ -2521,13 +2544,15 @@ vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
if (m_mtx != NULL) if (m_mtx != NULL)
mtx_unlock(m_mtx); mtx_unlock(m_mtx);
if ((m = SLIST_FIRST(&free)) != NULL) { if ((m = SLIST_FIRST(&free)) != NULL) {
mtx_lock(&vm_page_queue_free_mtx); vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
do { do {
MPASS(vm_phys_domain(m) == domain);
SLIST_REMOVE_HEAD(&free, plinks.s.ss); SLIST_REMOVE_HEAD(&free, plinks.s.ss);
vm_page_free_phys(m); vm_page_free_phys(vmd, m);
} while ((m = SLIST_FIRST(&free)) != NULL); } while ((m = SLIST_FIRST(&free)) != NULL);
vm_page_free_wakeup(); vm_domain_free_wakeup(vmd);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
} }
return (error); return (error);
} }
@ -2567,6 +2592,7 @@ bool
vm_page_reclaim_contig_domain(int domain, int req, u_long npages, vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{ {
struct vm_domain *vmd;
vm_paddr_t curr_low; vm_paddr_t curr_low;
vm_page_t m_run, m_runs[NRUNS]; vm_page_t m_run, m_runs[NRUNS];
u_long count, reclaimed; u_long count, reclaimed;
@ -2587,9 +2613,10 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
* Return if the number of free pages cannot satisfy the requested * Return if the number of free pages cannot satisfy the requested
* allocation. * allocation.
*/ */
count = vm_cnt.v_free_count; vmd = VM_DOMAIN(domain);
if (count < npages + vm_cnt.v_free_reserved || (count < npages + count = vmd->vmd_free_count;
vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || if (count < npages + vmd->vmd_free_reserved || (count < npages +
vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
(count < npages && req_class == VM_ALLOC_INTERRUPT)) (count < npages && req_class == VM_ALLOC_INTERRUPT))
return (false); return (false);
@ -2625,8 +2652,8 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
for (i = 0; count > 0 && i < NRUNS; i++) { for (i = 0; count > 0 && i < NRUNS; i++) {
count--; count--;
m_run = m_runs[RUN_INDEX(count)]; m_run = m_runs[RUN_INDEX(count)];
error = vm_page_reclaim_run(req_class, npages, m_run, error = vm_page_reclaim_run(req_class, domain, npages,
high); m_run, high);
if (error == 0) { if (error == 0) {
reclaimed += npages; reclaimed += npages;
if (reclaimed >= MIN_RECLAIM) if (reclaimed >= MIN_RECLAIM)
@ -2666,66 +2693,191 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
return (ret); return (ret);
} }
/*
* Set the domain in the appropriate page level domainset.
*/
void
vm_domain_set(struct vm_domain *vmd)
{
mtx_lock(&vm_domainset_lock);
if (!vmd->vmd_minset && vm_paging_min(vmd)) {
vmd->vmd_minset = 1;
DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
}
if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
vmd->vmd_severeset = 1;
DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
}
mtx_unlock(&vm_domainset_lock);
}
/*
* Clear the domain from the appropriate page level domainset.
*/
static void
vm_domain_clear(struct vm_domain *vmd)
{
mtx_lock(&vm_domainset_lock);
if (vmd->vmd_minset && !vm_paging_min(vmd)) {
vmd->vmd_minset = 0;
DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
if (vm_min_waiters != 0) {
vm_min_waiters = 0;
wakeup(&vm_min_domains);
}
}
if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
vmd->vmd_severeset = 0;
DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
if (vm_severe_waiters != 0) {
vm_severe_waiters = 0;
wakeup(&vm_severe_domains);
}
}
mtx_unlock(&vm_domainset_lock);
}
/*
* Wait for free pages to exceed the min threshold globally.
*/
void
vm_wait_min(void)
{
mtx_lock(&vm_domainset_lock);
while (vm_page_count_min()) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
}
/*
* Wait for free pages to exceed the severe threshold globally.
*/
void
vm_wait_severe(void)
{
mtx_lock(&vm_domainset_lock);
while (vm_page_count_severe()) {
vm_severe_waiters++;
msleep(&vm_severe_domains, &vm_domainset_lock, PVM,
"vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
}
u_int
vm_wait_count(void)
{
u_int cnt;
int i;
cnt = 0;
for (i = 0; i < vm_ndomains; i++)
cnt += VM_DOMAIN(i)->vmd_waiters;
cnt += vm_severe_waiters + vm_min_waiters;
return (cnt);
}
/*
* vm_wait_domain:
*
* Sleep until free pages are available for allocation.
* - Called in various places after failed memory allocations.
*/
void
vm_wait_domain(int domain)
{
struct vm_domain *vmd;
vmd = VM_DOMAIN(domain);
vm_domain_free_assert_locked(vmd);
if (curproc == pageproc) {
vmd->vmd_pageout_pages_needed = 1;
msleep(&vmd->vmd_pageout_pages_needed,
vm_domain_free_lockptr(vmd), PDROP | PSWP, "VMWait", 0);
} else {
if (pageproc == NULL)
panic("vm_wait in early boot");
pagedaemon_wait(domain, PVM, "vmwait");
}
}
/* /*
* vm_wait: (also see VM_WAIT macro) * vm_wait: (also see VM_WAIT macro)
* *
* Sleep until free pages are available for allocation. * Sleep until free pages are available for allocation.
* - Called in various places before memory allocations. * - Called in various places after failed memory allocations.
*/ */
static void
_vm_wait(void)
{
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
PDROP | PSWP, "VMWait", 0);
} else {
if (pageproc == NULL)
panic("vm_wait in early boot");
pagedaemon_wait(PVM, "vmwait");
}
}
void void
vm_wait(void) vm_wait(void)
{ {
mtx_lock(&vm_page_queue_free_mtx); /*
_vm_wait(); * We use racey wakeup synchronization to avoid expensive global
* locking for the pageproc when sleeping with a non-specific vm_wait.
* To handle this, we only sleep for one tick in this instance. It
* is expected that most allocations for the pageproc will come from
* kmem or vm_page_grab* which will use the more specific and
* race-free vm_wait_domain().
*/
if (curproc == pageproc) {
mtx_lock(&vm_domainset_lock);
vm_pageproc_waiters++;
msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM,
"pageprocwait", 1);
mtx_unlock(&vm_domainset_lock);
} else {
/*
* XXX Ideally we would wait only until the allocation could
* be satisfied. This condition can cause new allocators to
* consume all freed pages while old allocators wait.
*/
mtx_lock(&vm_domainset_lock);
if (vm_page_count_min()) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PVM,
"vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
}
} }
/* /*
* vm_page_alloc_fail: * vm_domain_alloc_fail:
* *
* Called when a page allocation function fails. Informs the * Called when a page allocation function fails. Informs the
* pagedaemon and performs the requested wait. Requires the * pagedaemon and performs the requested wait. Requires the
* page_queue_free and object lock on entry. Returns with the * domain_free and object lock on entry. Returns with the
* object lock held and free lock released. Returns an error when * object lock held and free lock released. Returns an error when
* retry is necessary. * retry is necessary.
* *
*/ */
static int static int
vm_page_alloc_fail(vm_object_t object, int req) vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(vmd);
atomic_add_int(&vm_pageout_deficit, atomic_add_int(&vmd->vmd_pageout_deficit,
max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
if (object != NULL) if (object != NULL)
VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(object);
_vm_wait(); vm_wait_domain(vmd->vmd_domain);
if (object != NULL) if (object != NULL)
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
if (req & VM_ALLOC_WAITOK) if (req & VM_ALLOC_WAITOK)
return (EAGAIN); return (EAGAIN);
} else { } else {
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
pagedaemon_wakeup(); pagedaemon_wakeup(vmd->vmd_domain);
} }
return (0); return (0);
} }
@ -2744,18 +2896,19 @@ void
vm_waitpfault(void) vm_waitpfault(void)
{ {
mtx_lock(&vm_page_queue_free_mtx); mtx_lock(&vm_domainset_lock);
pagedaemon_wait(PUSER, "pfault"); if (vm_page_count_min()) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
}
mtx_unlock(&vm_domainset_lock);
} }
struct vm_pagequeue * struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m) vm_page_pagequeue(vm_page_t m)
{ {
if (vm_page_in_laundry(m)) return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
return (&vm_dom[0].vmd_pagequeues[m->queue]);
else
return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
} }
/* /*
@ -2817,10 +2970,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t m)
KASSERT(queue < PQ_COUNT, KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p", ("vm_page_enqueue: invalid queue %u request for page %p",
queue, m)); queue, m));
if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
pq = &vm_dom[0].vmd_pagequeues[queue];
else
pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq); vm_pagequeue_lock(pq);
m->queue = queue; m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@ -2902,7 +3052,7 @@ vm_page_activate(vm_page_t m)
} }
/* /*
* vm_page_free_wakeup: * vm_domain_free_wakeup:
* *
* Helper routine for vm_page_free_toq(). This routine is called * Helper routine for vm_page_free_toq(). This routine is called
* when a page is added to the free queues. * when a page is added to the free queues.
@ -2910,28 +3060,39 @@ vm_page_activate(vm_page_t m)
* The page queues must be locked. * The page queues must be locked.
*/ */
static void static void
vm_page_free_wakeup(void) vm_domain_free_wakeup(struct vm_domain *vmd)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(vmd);
/* /*
* if pageout daemon needs pages, then tell it that there are * if pageout daemon needs pages, then tell it that there are
* some free. * some free.
*/ */
if (vm_pageout_pages_needed && if (vmd->vmd_pageout_pages_needed &&
vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) { vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
wakeup(&vm_pageout_pages_needed); wakeup(&vmd->vmd_pageout_pages_needed);
vm_pageout_pages_needed = 0; vmd->vmd_pageout_pages_needed = 0;
} }
/* /*
* wakeup processes that are waiting on memory if we hit a * wakeup processes that are waiting on memory if we hit a
* high water mark. And wakeup scheduler process if we have * high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes. * lots of memory. this process will swapin processes.
*/ */
if (vm_pages_needed && !vm_page_count_min()) { if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
vm_pages_needed = false; vmd->vmd_pages_needed = false;
wakeup(&vm_cnt.v_free_count); wakeup(&vmd->vmd_free_count);
} }
if ((vmd->vmd_minset && !vm_paging_min(vmd)) ||
(vmd->vmd_severeset && !vm_paging_severe(vmd)))
vm_domain_clear(vmd);
/* See comments in vm_wait(); */
if (vm_pageproc_waiters) {
vm_pageproc_waiters = 0;
wakeup(&vm_pageproc_waiters);
}
} }
/* /*
@ -3018,12 +3179,12 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
* queues. This is the last step to free a page. * queues. This is the last step to free a page.
*/ */
static void static void
vm_page_free_phys(vm_page_t m) vm_page_free_phys(struct vm_domain *vmd, vm_page_t m)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(vmd);
vm_phys_freecnt_adj(m, 1); vm_domain_freecnt_adj(vmd, 1);
#if VM_NRESERVLEVEL > 0 #if VM_NRESERVLEVEL > 0
if (!vm_reserv_free_page(m)) if (!vm_reserv_free_page(m))
#endif #endif
@ -3033,15 +3194,27 @@ vm_page_free_phys(vm_page_t m)
void void
vm_page_free_phys_pglist(struct pglist *tq) vm_page_free_phys_pglist(struct pglist *tq)
{ {
struct vm_domain *vmd;
vm_page_t m; vm_page_t m;
if (TAILQ_EMPTY(tq)) if (TAILQ_EMPTY(tq))
return; return;
mtx_lock(&vm_page_queue_free_mtx); vmd = NULL;
TAILQ_FOREACH(m, tq, listq) TAILQ_FOREACH(m, tq, listq) {
vm_page_free_phys(m); if (vmd != vm_pagequeue_domain(m)) {
vm_page_free_wakeup(); if (vmd != NULL) {
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_wakeup(vmd);
vm_domain_free_unlock(vmd);
}
vmd = vm_pagequeue_domain(m);
vm_domain_free_lock(vmd);
}
vm_page_free_phys(vmd, m);
}
if (vmd != NULL) {
vm_domain_free_wakeup(vmd);
vm_domain_free_unlock(vmd);
}
} }
/* /*
@ -3056,13 +3229,15 @@ vm_page_free_phys_pglist(struct pglist *tq)
void void
vm_page_free_toq(vm_page_t m) vm_page_free_toq(vm_page_t m)
{ {
struct vm_domain *vmd;
if (!vm_page_free_prep(m, false)) if (!vm_page_free_prep(m, false))
return; return;
mtx_lock(&vm_page_queue_free_mtx); vmd = vm_pagequeue_domain(m);
vm_page_free_phys(m); vm_domain_free_lock(vmd);
vm_page_free_wakeup(); vm_page_free_phys(vmd, m);
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_wakeup(vmd);
vm_domain_free_unlock(vmd);
} }
/* /*
@ -3173,7 +3348,7 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse)
if ((queue = m->queue) == PQ_INACTIVE && !noreuse) if ((queue = m->queue) == PQ_INACTIVE && !noreuse)
return; return;
if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
/* Avoid multiple acquisitions of the inactive queue lock. */ /* Avoid multiple acquisitions of the inactive queue lock. */
if (queue == PQ_INACTIVE) { if (queue == PQ_INACTIVE) {
vm_pagequeue_lock(pq); vm_pagequeue_lock(pq);
@ -3185,8 +3360,9 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse)
} }
m->queue = PQ_INACTIVE; m->queue = PQ_INACTIVE;
if (noreuse) if (noreuse)
TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead, TAILQ_INSERT_BEFORE(
m, plinks.q); &vm_pagequeue_domain(m)->vmd_inacthead, m,
plinks.q);
else else
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_inc(pq); vm_pagequeue_cnt_inc(pq);
@ -3963,10 +4139,10 @@ vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)
DB_SHOW_COMMAND(page, vm_page_print_page_info) DB_SHOW_COMMAND(page, vm_page_print_page_info)
{ {
db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count); db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count); db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count); db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count); db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count); db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@ -3978,7 +4154,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
{ {
int dom; int dom;
db_printf("pq_free %d\n", vm_cnt.v_free_count); db_printf("pq_free %d\n", vm_free_count());
for (dom = 0; dom < vm_ndomains; dom++) { for (dom = 0; dom < vm_ndomains; dom++) {
db_printf( db_printf(
"dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",

View File

@ -218,54 +218,10 @@ TAILQ_HEAD(pglist, vm_page);
#endif #endif
SLIST_HEAD(spglist, vm_page); SLIST_HEAD(spglist, vm_page);
struct vm_pagequeue {
struct mtx pq_mutex;
struct pglist pq_pl;
int pq_cnt;
u_int * const pq_vcnt;
const char * const pq_name;
} __aligned(CACHE_LINE_SIZE);
struct vm_domain {
struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
struct vmem *vmd_kernel_arena;
u_int vmd_page_count;
u_int vmd_free_count;
long vmd_segs; /* bitmask of the segments */
boolean_t vmd_oom;
int vmd_oom_seq;
int vmd_last_active_scan;
struct vm_page vmd_laundry_marker;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
};
extern struct vm_domain vm_dom[MAXMEMDOM];
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#ifdef _KERNEL #ifdef _KERNEL
extern vm_page_t bogus_page; extern vm_page_t bogus_page;
static __inline void
vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
{
#ifdef notyet
vm_pagequeue_assert_locked(pq);
#endif
pq->pq_cnt += addend;
atomic_add_int(pq->pq_vcnt, addend);
}
#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
#endif /* _KERNEL */ #endif /* _KERNEL */
extern struct mtx_padalign vm_page_queue_free_mtx;
extern struct mtx_padalign pa_lock[]; extern struct mtx_padalign pa_lock[];
#if defined(__arm__) #if defined(__arm__)

View File

@ -110,6 +110,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_pager.h> #include <vm/vm_pager.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h> #include <vm/swap_pager.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#include <vm/uma.h> #include <vm/uma.h>
@ -147,19 +148,7 @@ SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
#define VM_LAUNDER_RATE 10 #define VM_LAUNDER_RATE 10
#define VM_INACT_SCAN_RATE 2 #define VM_INACT_SCAN_RATE 2
int vm_pageout_deficit; /* Estimated number of pages deficit */
u_int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12; static int vm_pageout_oom_seq = 12;
static bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */
/* Pending request for dirty page laundering. */
static enum {
VM_LAUNDRY_IDLE,
VM_LAUNDRY_BACKGROUND,
VM_LAUNDRY_SHORTFALL
} vm_laundry_request = VM_LAUNDRY_IDLE;
static int vm_inactq_scans;
static int vm_pageout_update_period; static int vm_pageout_update_period;
static int disable_swap_pageouts; static int disable_swap_pageouts;
@ -173,10 +162,6 @@ SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
CTLFLAG_RWTUN, &vm_panic_on_oom, 0, CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"panic on out of memory instead of killing the largest process"); "panic on out of memory instead of killing the largest process");
SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
"free page threshold for waking up the pageout daemon");
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0, CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period"); "Maximum active LRU update period");
@ -200,11 +185,6 @@ SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,
&act_scan_laundry_weight, 0, &act_scan_laundry_weight, 0,
"weight given to clean vs. dirty pages in active queue scans"); "weight given to clean vs. dirty pages in active queue scans");
static u_int vm_background_launder_target;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
&vm_background_launder_target, 0,
"background laundering target, in pages");
static u_int vm_background_launder_rate = 4096; static u_int vm_background_launder_rate = 4096;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN, SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
&vm_background_launder_rate, 0, &vm_background_launder_rate, 0,
@ -959,18 +939,18 @@ isqrt(u_int num)
static void static void
vm_pageout_laundry_worker(void *arg) vm_pageout_laundry_worker(void *arg)
{ {
struct vm_domain *domain; struct vm_domain *vmd;
struct vm_pagequeue *pq; struct vm_pagequeue *pq;
uint64_t nclean, ndirty; uint64_t nclean, ndirty;
u_int inactq_scans, last_launder; u_int inactq_scans, last_launder;
int domidx, last_target, launder, shortfall, shortfall_cycle, target; int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall; bool in_shortfall;
domidx = (uintptr_t)arg; domain = (uintptr_t)arg;
domain = &vm_dom[domidx]; vmd = VM_DOMAIN(domain);
pq = &domain->vmd_pagequeues[PQ_LAUNDRY]; pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
KASSERT(domain->vmd_segs != 0, ("domain without segments")); KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY); vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);
shortfall = 0; shortfall = 0;
in_shortfall = false; in_shortfall = false;
@ -982,9 +962,9 @@ vm_pageout_laundry_worker(void *arg)
/* /*
* Calls to these handlers are serialized by the swap syscall lock. * Calls to these handlers are serialized by the swap syscall lock.
*/ */
(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain, (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
EVENTHANDLER_PRI_ANY); EVENTHANDLER_PRI_ANY);
(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain, (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
EVENTHANDLER_PRI_ANY); EVENTHANDLER_PRI_ANY);
/* /*
@ -1006,7 +986,7 @@ vm_pageout_laundry_worker(void *arg)
target = shortfall; target = shortfall;
} else if (!in_shortfall) } else if (!in_shortfall)
goto trybackground; goto trybackground;
else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) { else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
/* /*
* We recently entered shortfall and began laundering * We recently entered shortfall and began laundering
* pages. If we have completed that laundering run * pages. If we have completed that laundering run
@ -1040,11 +1020,12 @@ vm_pageout_laundry_worker(void *arg)
* memory pressure required to trigger laundering decreases. * memory pressure required to trigger laundering decreases.
*/ */
trybackground: trybackground:
nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count; nclean = vmd->vmd_free_count +
ndirty = vm_cnt.v_laundry_count; vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && inactq_scans != last_launder && if (target == 0 && inactq_scans != last_launder &&
ndirty * isqrt(inactq_scans - last_launder) >= nclean) { ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
target = vm_background_launder_target; target = vmd->vmd_background_launder_target;
} }
/* /*
@ -1076,7 +1057,7 @@ vm_pageout_laundry_worker(void *arg)
* pages could exceed "target" by the maximum size of * pages could exceed "target" by the maximum size of
* a cluster minus one. * a cluster minus one.
*/ */
target -= min(vm_pageout_launder(domain, launder, target -= min(vm_pageout_launder(vmd, launder,
in_shortfall), target); in_shortfall), target);
pause("laundp", hz / VM_LAUNDER_RATE); pause("laundp", hz / VM_LAUNDER_RATE);
} }
@ -1087,8 +1068,8 @@ vm_pageout_laundry_worker(void *arg)
* kicks us. * kicks us.
*/ */
vm_pagequeue_lock(pq); vm_pagequeue_lock(pq);
if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE) if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
(void)mtx_sleep(&vm_laundry_request, (void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0); vm_pagequeue_lockptr(pq), PVM, "launds", 0);
/* /*
@ -1096,16 +1077,17 @@ vm_pageout_laundry_worker(void *arg)
* a shortfall laundering unless we're already in the middle of * a shortfall laundering unless we're already in the middle of
* one. This may preempt a background laundering. * one. This may preempt a background laundering.
*/ */
if (vm_laundry_request == VM_LAUNDRY_SHORTFALL && if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall || shortfall_cycle == 0)) { (!in_shortfall || shortfall_cycle == 0)) {
shortfall = vm_laundry_target() + vm_pageout_deficit; shortfall = vm_laundry_target(vmd) +
vmd->vmd_pageout_deficit;
target = 0; target = 0;
} else } else
shortfall = 0; shortfall = 0;
if (target == 0) if (target == 0)
vm_laundry_request = VM_LAUNDRY_IDLE; vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
inactq_scans = vm_inactq_scans; inactq_scans = vmd->vmd_inactq_scans;
vm_pagequeue_unlock(pq); vm_pagequeue_unlock(pq);
} }
} }
@ -1134,7 +1116,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* If we need to reclaim memory ask kernel caches to return * If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing. * some. We rate limit to avoid thrashing.
*/ */
if (vmd == &vm_dom[0] && pass > 0 && if (vmd == VM_DOMAIN(0) && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) { (time_uptime - lowmem_uptime) >= lowmem_period) {
/* /*
* Decrease registered cache sizes. * Decrease registered cache sizes.
@ -1163,8 +1145,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* the page daemon and this calculation. * the page daemon and this calculation.
*/ */
if (pass > 0) { if (pass > 0) {
deficit = atomic_readandclear_int(&vm_pageout_deficit); deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
page_shortage = vm_paging_target() + deficit; page_shortage = vm_paging_target(vmd) + deficit;
} else } else
page_shortage = deficit = 0; page_shortage = deficit = 0;
starting_page_shortage = page_shortage; starting_page_shortage = page_shortage;
@ -1357,18 +1339,20 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* keep count. * keep count.
*/ */
if (starting_page_shortage > 0) { if (starting_page_shortage > 0) {
pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY]; pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq); vm_pagequeue_lock(pq);
if (vm_laundry_request == VM_LAUNDRY_IDLE && if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) { (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) { if (page_shortage > 0) {
vm_laundry_request = VM_LAUNDRY_SHORTFALL; vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls); VM_CNT_INC(v_pdshortfalls);
} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) } else if (vmd->vmd_laundry_request !=
vm_laundry_request = VM_LAUNDRY_BACKGROUND; VM_LAUNDRY_SHORTFALL)
wakeup(&vm_laundry_request); vmd->vmd_laundry_request =
VM_LAUNDRY_BACKGROUND;
wakeup(&vmd->vmd_laundry_request);
} }
vm_inactq_scans++; vmd->vmd_inactq_scans++;
vm_pagequeue_unlock(pq); vm_pagequeue_unlock(pq);
} }
@ -1397,9 +1381,9 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* more aggressively, improving the effectiveness of clustering and * more aggressively, improving the effectiveness of clustering and
* ensuring that they can eventually be reused. * ensuring that they can eventually be reused.
*/ */
inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count + inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
vm_cnt.v_laundry_count / act_scan_laundry_weight) + vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
vm_paging_target() + deficit + addl_page_shortage; vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight; inactq_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
@ -1742,6 +1726,8 @@ vm_pageout_oom(int shortage)
} }
sx_sunlock(&allproc_lock); sx_sunlock(&allproc_lock);
if (bigproc != NULL) { if (bigproc != NULL) {
int i;
if (vm_panic_on_oom != 0) if (vm_panic_on_oom != 0)
panic("out of swap space"); panic("out of swap space");
PROC_LOCK(bigproc); PROC_LOCK(bigproc);
@ -1749,19 +1735,20 @@ vm_pageout_oom(int shortage)
sched_nice(bigproc, PRIO_MIN); sched_nice(bigproc, PRIO_MIN);
_PRELE(bigproc); _PRELE(bigproc);
PROC_UNLOCK(bigproc); PROC_UNLOCK(bigproc);
wakeup(&vm_cnt.v_free_count); for (i = 0; i < vm_ndomains; i++)
wakeup(&VM_DOMAIN(i)->vmd_free_count);
} }
} }
static void static void
vm_pageout_worker(void *arg) vm_pageout_worker(void *arg)
{ {
struct vm_domain *domain; struct vm_domain *vmd;
int domidx, pass; int domain, pass;
bool target_met; bool target_met;
domidx = (uintptr_t)arg; domain = (uintptr_t)arg;
domain = &vm_dom[domidx]; vmd = VM_DOMAIN(domain);
pass = 0; pass = 0;
target_met = true; target_met = true;
@ -1771,18 +1758,18 @@ vm_pageout_worker(void *arg)
* is allocated. * is allocated.
*/ */
KASSERT(domain->vmd_segs != 0, ("domain without segments")); KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
domain->vmd_last_active_scan = ticks; vmd->vmd_last_active_scan = ticks;
vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE); vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE);
vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE); vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl, TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
&domain->vmd_inacthead, plinks.q); &vmd->vmd_inacthead, plinks.q);
/* /*
* The pageout daemon worker is never done, so loop forever. * The pageout daemon worker is never done, so loop forever.
*/ */
while (TRUE) { while (TRUE) {
mtx_lock(&vm_page_queue_free_mtx); vm_domain_free_lock(vmd);
/* /*
* Generally, after a level >= 1 scan, if there are enough * Generally, after a level >= 1 scan, if there are enough
@ -1796,34 +1783,34 @@ vm_pageout_worker(void *arg)
* thread will, nonetheless, wait until another page is freed * thread will, nonetheless, wait until another page is freed
* or this wakeup is performed. * or this wakeup is performed.
*/ */
if (vm_pages_needed && !vm_page_count_min()) { if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
vm_pages_needed = false; vmd->vmd_pages_needed = false;
wakeup(&vm_cnt.v_free_count); wakeup(&vmd->vmd_free_count);
} }
/* /*
* Do not clear vm_pageout_wanted until we reach our free page * Do not clear vmd_pageout_wanted until we reach our free page
* target. Otherwise, we may be awakened over and over again, * target. Otherwise, we may be awakened over and over again,
* wasting CPU time. * wasting CPU time.
*/ */
if (vm_pageout_wanted && target_met) if (vmd->vmd_pageout_wanted && target_met)
vm_pageout_wanted = false; vmd->vmd_pageout_wanted = false;
/* /*
* Might the page daemon receive a wakeup call? * Might the page daemon receive a wakeup call?
*/ */
if (vm_pageout_wanted) { if (vmd->vmd_pageout_wanted) {
/* /*
* No. Either vm_pageout_wanted was set by another * No. Either vmd_pageout_wanted was set by another
* thread during the previous scan, which must have * thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was * been a level 0 scan, or vmd_pageout_wanted was
* already set and the scan failed to free enough * already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 1 * pages. If we haven't yet performed a level >= 1
* (page reclamation) scan, then increase the level * (page reclamation) scan, then increase the level
* and scan again now. Otherwise, sleep a bit and * and scan again now. Otherwise, sleep a bit and
* try again later. * try again later.
*/ */
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
if (pass >= 1) if (pass >= 1)
pause("pwait", hz / VM_INACT_SCAN_RATE); pause("pwait", hz / VM_INACT_SCAN_RATE);
pass++; pass++;
@ -1834,20 +1821,20 @@ vm_pageout_worker(void *arg)
* sleep until the next wakeup or until pages need to * sleep until the next wakeup or until pages need to
* have their reference stats updated. * have their reference stats updated.
*/ */
if (vm_pages_needed) { if (vmd->vmd_pages_needed) {
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(vmd);
if (pass == 0) if (pass == 0)
pass++; pass++;
} else if (mtx_sleep(&vm_pageout_wanted, } else if (mtx_sleep(&vmd->vmd_pageout_wanted,
&vm_page_queue_free_mtx, PDROP | PVM, "psleep", vm_domain_free_lockptr(vmd), PDROP | PVM,
hz) == 0) { "psleep", hz) == 0) {
VM_CNT_INC(v_pdwakeups); VM_CNT_INC(v_pdwakeups);
pass = 1; pass = 1;
} else } else
pass = 0; pass = 0;
} }
target_met = vm_pageout_scan(domain, pass); target_met = vm_pageout_scan(vmd, pass);
} }
} }
@ -1855,41 +1842,76 @@ vm_pageout_worker(void *arg)
* vm_pageout_init initialises basic pageout daemon settings. * vm_pageout_init initialises basic pageout daemon settings.
*/ */
static void static void
vm_pageout_init(void) vm_pageout_init_domain(int domain)
{ {
/* struct vm_domain *vmd;
* Initialize some paging parameters.
*/ vmd = VM_DOMAIN(domain);
vm_cnt.v_interrupt_free_min = 2; vmd->vmd_interrupt_free_min = 2;
if (vm_cnt.v_page_count < 2000)
vm_pageout_page_count = 8;
/* /*
* v_free_reserved needs to include enough for the largest * v_free_reserved needs to include enough for the largest
* swap pager structures plus enough for any pv_entry structs * swap pager structures plus enough for any pv_entry structs
* when paging. * when paging.
*/ */
if (vm_cnt.v_page_count > 1024) if (vmd->vmd_page_count > 1024)
vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200; vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200;
else else
vm_cnt.v_free_min = 4; vmd->vmd_free_min = 4;
vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
vm_cnt.v_interrupt_free_min; vmd->vmd_interrupt_free_min;
vm_cnt.v_free_reserved = vm_pageout_page_count + vmd->vmd_free_reserved = vm_pageout_page_count +
vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768); vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768);
vm_cnt.v_free_severe = vm_cnt.v_free_min / 2; vmd->vmd_free_severe = vmd->vmd_free_min / 2;
vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved; vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
vm_cnt.v_free_min += vm_cnt.v_free_reserved; vmd->vmd_free_min += vmd->vmd_free_reserved;
vm_cnt.v_free_severe += vm_cnt.v_free_reserved; vmd->vmd_free_severe += vmd->vmd_free_reserved;
vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2; vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3) if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3; vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
/* /*
* Set the default wakeup threshold to be 10% above the minimum * Set the default wakeup threshold to be 10% above the minimum
* page limit. This keeps the steady state out of shortfall. * page limit. This keeps the steady state out of shortfall.
*/ */
vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11; vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;
/*
* Target amount of memory to move out of the laundry queue during a
* background laundering. This is proportional to the amount of system
* memory.
*/
vmd->vmd_background_launder_target = (vmd->vmd_free_target -
vmd->vmd_free_min) / 10;
}
static void
vm_pageout_init(void)
{
u_int freecount;
int i;
/*
* Initialize some paging parameters.
*/
if (vm_cnt.v_page_count < 2000)
vm_pageout_page_count = 8;
freecount = 0;
for (i = 0; i < vm_ndomains; i++) {
struct vm_domain *vmd;
vm_pageout_init_domain(i);
vmd = VM_DOMAIN(i);
vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
vm_cnt.v_free_target += vmd->vmd_free_target;
vm_cnt.v_free_min += vmd->vmd_free_min;
vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
vm_cnt.v_free_severe += vmd->vmd_free_severe;
freecount += vmd->vmd_free_count;
}
/* /*
* Set interval in seconds for active scan. We want to visit each * Set interval in seconds for active scan. We want to visit each
@ -1899,17 +1921,8 @@ vm_pageout_init(void)
if (vm_pageout_update_period == 0) if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600; vm_pageout_update_period = 600;
/* XXX does not really belong here */
if (vm_page_max_wired == 0) if (vm_page_max_wired == 0)
vm_page_max_wired = vm_cnt.v_free_count / 3; vm_page_max_wired = freecount / 3;
/*
* Target amount of memory to move out of the laundry queue during a
* background laundering. This is proportional to the amount of system
* memory.
*/
vm_background_launder_target = (vm_cnt.v_free_target -
vm_cnt.v_free_min) / 10;
} }
/* /*
@ -1933,6 +1946,12 @@ vm_pageout(void)
panic("starting pageout for domain %d, error %d\n", panic("starting pageout for domain %d, error %d\n",
i, error); i, error);
} }
error = kthread_add(vm_pageout_laundry_worker,
(void *)(uintptr_t)i, curproc, NULL, 0, 0,
"laundry: dom%d", i);
if (error != 0)
panic("starting laundry for domain %d, error %d",
i, error);
} }
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma"); 0, 0, "uma");
@ -1945,14 +1964,16 @@ vm_pageout(void)
* Perform an advisory wakeup of the page daemon. * Perform an advisory wakeup of the page daemon.
*/ */
void void
pagedaemon_wakeup(void) pagedaemon_wakeup(int domain)
{ {
struct vm_domain *vmd;
mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED); vmd = VM_DOMAIN(domain);
vm_domain_free_assert_unlocked(vmd);
if (!vm_pageout_wanted && curthread->td_proc != pageproc) { if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
vm_pageout_wanted = true; vmd->vmd_pageout_wanted = true;
wakeup(&vm_pageout_wanted); wakeup(&vmd->vmd_pageout_wanted);
} }
} }
@ -1962,22 +1983,26 @@ pagedaemon_wakeup(void)
* This function returns with the free queues mutex unlocked. * This function returns with the free queues mutex unlocked.
*/ */
void void
pagedaemon_wait(int pri, const char *wmesg) pagedaemon_wait(int domain, int pri, const char *wmesg)
{ {
struct vm_domain *vmd;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vmd = VM_DOMAIN(domain);
vm_domain_free_assert_locked(vmd);
/* /*
* vm_pageout_wanted may have been set by an advisory wakeup, but if the * vmd_pageout_wanted may have been set by an advisory wakeup, but if
* page daemon is running on a CPU, the wakeup will have been lost. * the page daemon is running on a CPU, the wakeup will have been lost.
* Thus, deliver a potentially spurious wakeup to ensure that the page * Thus, deliver a potentially spurious wakeup to ensure that the page
* daemon has been notified of the shortage. * daemon has been notified of the shortage.
*/ */
if (!vm_pageout_wanted || !vm_pages_needed) { if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) {
vm_pageout_wanted = true; vmd->vmd_pageout_wanted = true;
wakeup(&vm_pageout_wanted); wakeup(&vmd->vmd_pageout_wanted);
} }
vm_pages_needed = true; vmd->vmd_pages_needed = true;
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri, vmd->vmd_waiters++;
msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri,
wmesg, 0); wmesg, 0);
vmd->vmd_waiters--;
} }

View File

@ -74,9 +74,7 @@
*/ */
extern int vm_page_max_wired; extern int vm_page_max_wired;
extern int vm_pageout_deficit;
extern int vm_pageout_page_count; extern int vm_pageout_page_count;
extern bool vm_pages_needed;
#define VM_OOM_MEM 1 #define VM_OOM_MEM 1
#define VM_OOM_SWAPZ 2 #define VM_OOM_SWAPZ 2
@ -95,12 +93,15 @@ extern bool vm_pages_needed;
* Signal pageout-daemon and wait for it. * Signal pageout-daemon and wait for it.
*/ */
void pagedaemon_wait(int pri, const char *wmesg); void pagedaemon_wait(int domain, int pri, const char *wmesg);
void pagedaemon_wakeup(void); void pagedaemon_wakeup(int domain);
#define VM_WAIT vm_wait() #define VM_WAIT vm_wait()
#define VM_WAITPFAULT vm_waitpfault() #define VM_WAITPFAULT vm_waitpfault()
void vm_wait(void); void vm_wait(void);
void vm_waitpfault(void); void vm_waitpfault(void);
void vm_wait_domain(int domain);
void vm_wait_min(void);
void vm_wait_severe(void);
#ifdef _KERNEL #ifdef _KERNEL
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *); int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);

235
sys/vm/vm_pagequeue.h Normal file
View File

@ -0,0 +1,235 @@
/*-
* SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $FreeBSD$
*/
#ifndef _VM_PAGEQUEUE_
#define _VM_PAGEQUEUE_
#ifdef _KERNEL
struct vm_pagequeue {
struct mtx pq_mutex;
struct pglist pq_pl;
int pq_cnt;
const char * const pq_name;
} __aligned(CACHE_LINE_SIZE);
struct vm_domain {
struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
struct mtx_padalign vmd_free_mtx;
struct vmem *vmd_kernel_arena;
u_int vmd_domain; /* Domain number. */
u_int vmd_page_count;
long vmd_segs; /* bitmask of the segments */
/* Paging control variables, locked by domain_free_mtx. */
u_int vmd_free_count;
boolean_t vmd_oom;
int vmd_oom_seq;
int vmd_last_active_scan;
struct vm_page vmd_laundry_marker;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
int vmd_pageout_pages_needed; /* page daemon waiting for pages? */
int vmd_pageout_deficit; /* Estimated number of pages deficit */
int vmd_waiters; /* Pageout waiters. */
bool vmd_pages_needed; /* Are threads waiting for free pages? */
bool vmd_pageout_wanted; /* pageout daemon wait channel */
bool vmd_minset; /* Are we in vm_min_domains? */
bool vmd_severeset; /* Are we in vm_severe_domains? */
int vmd_inactq_scans;
enum {
VM_LAUNDRY_IDLE = 0,
VM_LAUNDRY_BACKGROUND,
VM_LAUNDRY_SHORTFALL
} vmd_laundry_request;
/* Paging thresholds. */
u_int vmd_background_launder_target;
u_int vmd_free_reserved; /* (c) pages reserved for deadlock */
u_int vmd_free_target; /* (c) pages desired free */
u_int vmd_free_min; /* (c) pages desired free */
u_int vmd_inactive_target; /* (c) pages desired inactive */
u_int vmd_pageout_free_min; /* (c) min pages reserved for kernel */
u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */
u_int vmd_interrupt_free_min; /* (c) reserved pages for int code */
u_int vmd_free_severe; /* (c) severe page depletion point */
} __aligned(CACHE_LINE_SIZE);
extern struct vm_domain vm_dom[MAXMEMDOM];
#define VM_DOMAIN(n) (&vm_dom[(n)])
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#define vm_domain_free_assert_locked(n) \
mtx_assert(vm_domain_free_lockptr((n)), MA_OWNED)
#define vm_domain_free_assert_unlocked(n) \
mtx_assert(vm_domain_free_lockptr((n)), MA_NOTOWNED)
#define vm_domain_free_lock(d) \
mtx_lock(vm_domain_free_lockptr((d)))
#define vm_domain_free_lockptr(d) \
(&(d)->vmd_free_mtx)
#define vm_domain_free_unlock(d) \
mtx_unlock(vm_domain_free_lockptr((d)))
static __inline void
vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
{
#ifdef notyet
vm_pagequeue_assert_locked(pq);
#endif
pq->pq_cnt += addend;
}
#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
void vm_domain_set(struct vm_domain *vmd);
int vm_domain_available(struct vm_domain *vmd, int req, int npages);
/*
* vm_pagequeue_domain:
*
* Return the memory domain the page belongs to.
*/
static inline struct vm_domain *
vm_pagequeue_domain(vm_page_t m)
{
return (VM_DOMAIN(vm_phys_domain(m)));
}
/*
* Return the number of pages we need to free-up or cache
* A positive number indicates that we do not have enough free pages.
*/
static inline int
vm_paging_target(struct vm_domain *vmd)
{
return (vmd->vmd_free_target - vmd->vmd_free_count);
}
/*
* Returns TRUE if the pagedaemon needs to be woken up.
*/
static inline int
vm_paging_needed(struct vm_domain *vmd, u_int free_count)
{
return (free_count < vmd->vmd_pageout_wakeup_thresh);
}
/*
* Returns TRUE if the domain is below the min paging target.
*/
static inline int
vm_paging_min(struct vm_domain *vmd)
{
return (vmd->vmd_free_min > vmd->vmd_free_count);
}
/*
* Returns TRUE if the domain is below the severe paging target.
*/
static inline int
vm_paging_severe(struct vm_domain *vmd)
{
return (vmd->vmd_free_severe > vmd->vmd_free_count);
}
/*
* Return the number of pages we need to launder.
* A positive number indicates that we have a shortfall of clean pages.
*/
static inline int
vm_laundry_target(struct vm_domain *vmd)
{
return (vm_paging_target(vmd));
}
static inline u_int
vm_domain_freecnt_adj(struct vm_domain *vmd, int adj)
{
u_int ret;
vm_domain_free_assert_locked(vmd);
ret = vmd->vmd_free_count += adj;
if ((!vmd->vmd_minset && vm_paging_min(vmd)) ||
(!vmd->vmd_severeset && vm_paging_severe(vmd)))
vm_domain_set(vmd);
return (ret);
}
#endif /* _KERNEL */
#endif /* !_VM_PAGEQUEUE_ */

View File

@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
"Too many physsegs."); "Too many physsegs.");
@ -653,7 +654,7 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
if (flind < 0) if (flind < 0)
return (NULL); return (NULL);
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
fl = &vm_phys_free_queues[domain][flind][pool][0]; fl = &vm_phys_free_queues[domain][flind][pool][0];
for (oind = order; oind < VM_NFREEORDER; oind++) { for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl); m = TAILQ_FIRST(&fl[oind].pl);
@ -906,8 +907,8 @@ vm_phys_free_pages(vm_page_t m, int order)
m, m->pool)); m, m->pool));
KASSERT(order < VM_NFREEORDER, KASSERT(order < VM_NFREEORDER,
("vm_phys_free_pages: order %d is out of range", order)); ("vm_phys_free_pages: order %d is out of range", order));
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
seg = &vm_phys_segs[m->segind]; seg = &vm_phys_segs[m->segind];
vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
if (order < VM_NFREEORDER - 1) { if (order < VM_NFREEORDER - 1) {
pa = VM_PAGE_TO_PHYS(m); pa = VM_PAGE_TO_PHYS(m);
do { do {
@ -945,7 +946,7 @@ vm_phys_free_contig(vm_page_t m, u_long npages)
* Avoid unnecessary coalescing by freeing the pages in the largest * Avoid unnecessary coalescing by freeing the pages in the largest
* possible power-of-two-sized subsets. * possible power-of-two-sized subsets.
*/ */
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(vm_pagequeue_domain(m));
for (;; npages -= n) { for (;; npages -= n) {
/* /*
* Unsigned "min" is used here so that "order" is assigned * Unsigned "min" is used here so that "order" is assigned
@ -1051,14 +1052,13 @@ vm_phys_unfree_page(vm_page_t m)
vm_page_t m_set, m_tmp; vm_page_t m_set, m_tmp;
int order; int order;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
/* /*
* First, find the contiguous, power of two-sized set of free * First, find the contiguous, power of two-sized set of free
* physical pages containing the given physical page "m" and * physical pages containing the given physical page "m" and
* assign it to "m_set". * assign it to "m_set".
*/ */
seg = &vm_phys_segs[m->segind]; seg = &vm_phys_segs[m->segind];
vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
order < VM_NFREEORDER - 1; ) { order < VM_NFREEORDER - 1; ) {
order++; order++;
@ -1122,7 +1122,7 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
KASSERT(npages > 0, ("npages is 0")); KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (low >= high) if (low >= high)
return (NULL); return (NULL);
m_run = NULL; m_run = NULL;
@ -1167,7 +1167,7 @@ vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
KASSERT(npages > 0, ("npages is 0")); KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
/* Compute the queue that is the best fit for npages. */ /* Compute the queue that is the best fit for npages. */
for (order = 0; (1 << order) < npages; order++); for (order = 0; (1 << order) < npages; order++);
/* Search for a run satisfying the specified conditions. */ /* Search for a run satisfying the specified conditions. */

View File

@ -96,12 +96,12 @@ int vm_phys_mem_affinity(int f, int t);
/* /*
* *
* vm_phys_domidx: * vm_phys_domain:
* *
* Return the index of the domain the page belongs to. * Return the index of the domain the page belongs to.
*/ */
static inline int static inline int
vm_phys_domidx(vm_page_t m) vm_phys_domain(vm_page_t m)
{ {
#ifdef NUMA #ifdef NUMA
int domn, segind; int domn, segind;
@ -117,26 +117,5 @@ vm_phys_domidx(vm_page_t m)
#endif #endif
} }
/*
* vm_phys_domain:
*
* Return the memory domain the page belongs to.
*/
static inline struct vm_domain *
vm_phys_domain(vm_page_t m)
{
return (&vm_dom[vm_phys_domidx(m)]);
}
static inline u_int
vm_phys_freecnt_adj(vm_page_t m, int adj)
{
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
vm_phys_domain(m)->vmd_free_count += adj;
return (vm_cnt.v_free_count += adj);
}
#endif /* _KERNEL */ #endif /* _KERNEL */
#endif /* !_VM_PHYS_H_ */ #endif /* !_VM_PHYS_H_ */

View File

@ -59,7 +59,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_phys.h> #include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h> #include <vm/vm_radix.h>
#include <vm/vm_reserv.h> #include <vm/vm_reserv.h>
@ -163,17 +165,21 @@ popmap_is_set(popmap_t popmap[], int i)
* object's list of reservations. * object's list of reservations.
* *
* A partially populated reservation can be broken and reclaimed at any time. * A partially populated reservation can be broken and reclaimed at any time.
*
* f - vm_domain_free_lock
* o - vm_reserv_object_lock
* c - constant after boot
*/ */
struct vm_reserv { struct vm_reserv {
TAILQ_ENTRY(vm_reserv) partpopq; TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
LIST_ENTRY(vm_reserv) objq; LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
vm_object_t object; /* containing object */ vm_object_t object; /* (o, f) containing object */
vm_pindex_t pindex; /* offset within object */ vm_pindex_t pindex; /* (o, f) offset in object */
vm_page_t pages; /* first page of a superpage */ vm_page_t pages; /* (c) first page */
int domain; /* NUMA domain */ int domain; /* (c) NUMA domain. */
int popcnt; /* # of pages in use */ int popcnt; /* (f) # of pages in use */
char inpartpopq; char inpartpopq; /* (f) */
popmap_t popmap[NPOPMAP]; /* bit vector of used pages */ popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
}; };
/* /*
@ -234,6 +240,25 @@ static long vm_reserv_reclaimed;
SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
&vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
/*
* The object lock pool is used to synchronize the rvq. We can not use a
* pool mutex because it is required before malloc works.
*
* The "hash" function could be made faster without divide and modulo.
*/
#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU
struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
#define vm_reserv_object_lock_idx(object) \
(((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
#define vm_reserv_object_lock_ptr(object) \
&vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
#define vm_reserv_object_lock(object) \
mtx_lock(vm_reserv_object_lock_ptr((object)))
#define vm_reserv_object_unlock(object) \
mtx_unlock(vm_reserv_object_lock_ptr((object)))
static void vm_reserv_break(vm_reserv_t rv, vm_page_t m); static void vm_reserv_break(vm_reserv_t rv, vm_page_t m);
static void vm_reserv_depopulate(vm_reserv_t rv, int index); static void vm_reserv_depopulate(vm_reserv_t rv, int index);
static vm_reserv_t vm_reserv_from_page(vm_page_t m); static vm_reserv_t vm_reserv_from_page(vm_page_t m);
@ -288,12 +313,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0; counter = 0;
unused_pages = 0; unused_pages = 0;
mtx_lock(&vm_page_queue_free_mtx); vm_domain_free_lock(VM_DOMAIN(domain));
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++; counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
} }
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(VM_DOMAIN(domain));
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n", sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level, domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter); unused_pages * ((int)PAGE_SIZE / 1024), counter);
@ -304,6 +329,49 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
return (error); return (error);
} }
/*
* Remove a reservation from the object's objq.
*/
static void
vm_reserv_remove(vm_reserv_t rv)
{
vm_object_t object;
KASSERT(rv->object != NULL,
("vm_reserv_remove: reserv %p is free", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
object = rv->object;
vm_reserv_object_lock(object);
LIST_REMOVE(rv, objq);
rv->object = NULL;
vm_reserv_object_unlock(object);
}
/*
* Insert a new reservation into the object's objq.
*/
static void
vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
{
int i;
KASSERT(rv->object == NULL,
("vm_reserv_insert: reserv %p isn't free", rv));
KASSERT(rv->popcnt == 0,
("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
for (i = 0; i < NPOPMAP; i++)
KASSERT(rv->popmap[i] == 0,
("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
vm_reserv_object_lock(object);
rv->pindex = pindex;
rv->object = object;
LIST_INSERT_HEAD(&object->rvq, rv, objq);
vm_reserv_object_unlock(object);
}
/* /*
* Reduces the given reservation's population count. If the population count * Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the * becomes zero, the reservation is destroyed. Additionally, moves the
@ -316,7 +384,7 @@ static void
vm_reserv_depopulate(vm_reserv_t rv, int index) vm_reserv_depopulate(vm_reserv_t rv, int index)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL, KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv)); ("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(popmap_is_set(rv->popmap, index), KASSERT(popmap_is_set(rv->popmap, index),
@ -339,9 +407,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
popmap_clear(rv->popmap, index); popmap_clear(rv->popmap, index);
rv->popcnt--; rv->popcnt--;
if (rv->popcnt == 0) { if (rv->popcnt == 0) {
LIST_REMOVE(rv, objq); vm_reserv_remove(rv);
rv->object = NULL;
rv->domain = -1;
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
vm_reserv_freed++; vm_reserv_freed++;
} else { } else {
@ -360,6 +426,43 @@ vm_reserv_from_page(vm_page_t m)
return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
} }
/*
* Returns an existing reservation or NULL and initialized successor pointer.
*/
static vm_reserv_t
vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
vm_page_t mpred, vm_page_t *msuccp)
{
vm_reserv_t rv;
vm_page_t msucc;
msucc = NULL;
if (mpred != NULL) {
KASSERT(mpred->object == object,
("vm_reserv_from_object: object doesn't contain mpred"));
KASSERT(mpred->pindex < pindex,
("vm_reserv_from_object: mpred doesn't precede pindex"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
msucc = TAILQ_NEXT(mpred, listq);
} else
msucc = TAILQ_FIRST(&object->memq);
if (msucc != NULL) {
KASSERT(msucc->pindex > pindex,
("vm_reserv_from_object: msucc doesn't succeed pindex"));
rv = vm_reserv_from_page(msucc);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
}
rv = NULL;
found:
*msuccp = msucc;
return (rv);
}
/* /*
* Returns TRUE if the given reservation contains the given page index and * Returns TRUE if the given reservation contains the given page index and
* FALSE otherwise. * FALSE otherwise.
@ -381,7 +484,7 @@ static void
vm_reserv_populate(vm_reserv_t rv, int index) vm_reserv_populate(vm_reserv_t rv, int index)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL, KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv)); ("vm_reserv_populate: reserv %p is free", rv));
KASSERT(popmap_is_clear(rv->popmap, index), KASSERT(popmap_is_clear(rv->popmap, index),
@ -407,6 +510,100 @@ vm_reserv_populate(vm_reserv_t rv, int index)
rv->pages->psind = 1; rv->pages->psind = 1;
} }
/*
* Allocates a contiguous set of physical pages of the given size "npages"
* from existing or newly created reservations. All of the physical pages
* must be at or above the given physical address "low" and below the given
* physical address "high". The given value "alignment" determines the
* alignment of the first physical page in the set. If the given value
* "boundary" is non-zero, then the set of physical pages cannot cross any
* physical address boundary that is a multiple of that value. Both
* "alignment" and "boundary" must be a power of two.
*
* The page "mpred" must immediately precede the offset "pindex" within the
* specified object.
*
* The object and free page queue must be locked.
*/
vm_page_t
vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred)
{
struct vm_domain *vmd;
vm_paddr_t pa, size;
vm_page_t m, msucc;
vm_reserv_t rv;
int i, index;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
/*
* Is a reservation fundamentally impossible?
*/
if (pindex < VM_RESERV_INDEX(object, pindex) ||
pindex + npages > object->size || object->resident_page_count == 0)
return (NULL);
/*
* All reservations of a particular size have the same alignment.
* Assuming that the first page is allocated from a reservation, the
* least significant bits of its physical address can be determined
* from its offset from the beginning of the reservation and the size
* of the reservation.
*
* Could the specified index within a reservation of the smallest
* possible size satisfy the alignment and boundary requirements?
*/
pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
if ((pa & (alignment - 1)) != 0)
return (NULL);
size = npages << PAGE_SHIFT;
if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
return (NULL);
/*
* Look for an existing reservation.
*/
rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
if (rv == NULL)
return (NULL);
KASSERT(object != kernel_object || rv->domain == domain,
("vm_reserv_extend_contig: Domain mismatch from reservation."));
index = VM_RESERV_INDEX(object, pindex);
/* Does the allocation fit within the reservation? */
if (index + npages > VM_LEVEL_0_NPAGES)
return (NULL);
domain = rv->domain;
vmd = VM_DOMAIN(domain);
vm_domain_free_lock(vmd);
if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
m = NULL;
goto out;
}
m = &rv->pages[index];
pa = VM_PAGE_TO_PHYS(m);
if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
m = NULL;
goto out;
}
/* Handle vm_page_rename(m, new_object, ...). */
for (i = 0; i < npages; i++) {
if (popmap_is_set(rv->popmap, index + i)) {
m = NULL;
goto out;
}
}
for (i = 0; i < npages; i++)
vm_reserv_populate(rv, index + i);
vm_domain_freecnt_adj(vmd, -npages);
out:
vm_domain_free_unlock(vmd);
return (m);
}
/* /*
* Allocates a contiguous set of physical pages of the given size "npages" * Allocates a contiguous set of physical pages of the given size "npages"
* from existing or newly created reservations. All of the physical pages * from existing or newly created reservations. All of the physical pages
@ -434,7 +631,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long allocpages, maxpages, minpages; u_long allocpages, maxpages, minpages;
int i, index, n; int i, index, n;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
@ -463,52 +660,48 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
return (NULL); return (NULL);
/* /*
* Look for an existing reservation. * Callers should've extended an existing reservation prior to
* calling this function. If a reservation exists it is
* incompatible with the allocation.
*/ */
if (mpred != NULL) { rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
KASSERT(mpred->object == object, if (rv != NULL)
("vm_reserv_alloc_contig: object doesn't contain mpred")); return (NULL);
KASSERT(mpred->pindex < pindex,
("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
msucc = TAILQ_NEXT(mpred, listq);
} else
msucc = TAILQ_FIRST(&object->memq);
if (msucc != NULL) {
KASSERT(msucc->pindex > pindex,
("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
rv = vm_reserv_from_page(msucc);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
}
/* /*
* Could at least one reservation fit between the first index to the * Could at least one reservation fit between the first index to the
* left that can be used ("leftcap") and the first index to the right * left that can be used ("leftcap") and the first index to the right
* that cannot be used ("rightcap")? * that cannot be used ("rightcap")?
*
* We must synchronize with the reserv object lock to protect the
* pindex/object of the resulting reservations against rename while
* we are inspecting.
*/ */
first = pindex - VM_RESERV_INDEX(object, pindex); first = pindex - VM_RESERV_INDEX(object, pindex);
minpages = VM_RESERV_INDEX(object, pindex) + npages;
maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
allocpages = maxpages;
vm_reserv_object_lock(object);
if (mpred != NULL) { if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object) if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1; leftcap = mpred->pindex + 1;
else else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES; leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
if (leftcap > first) if (leftcap > first) {
vm_reserv_object_unlock(object);
return (NULL); return (NULL);
}
} }
minpages = VM_RESERV_INDEX(object, pindex) + npages;
maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
allocpages = maxpages;
if (msucc != NULL) { if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object) if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex; rightcap = msucc->pindex;
else else
rightcap = rv->pindex; rightcap = rv->pindex;
if (first + maxpages > rightcap) { if (first + maxpages > rightcap) {
if (maxpages == VM_LEVEL_0_NPAGES) if (maxpages == VM_LEVEL_0_NPAGES) {
vm_reserv_object_unlock(object);
return (NULL); return (NULL);
}
/* /*
* At least one reservation will fit between "leftcap" * At least one reservation will fit between "leftcap"
@ -519,6 +712,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
allocpages = minpages; allocpages = minpages;
} }
} }
vm_reserv_object_unlock(object);
/* /*
* Would the last new reservation extend past the end of the object? * Would the last new reservation extend past the end of the object?
@ -549,7 +743,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
if (m == NULL) if (m == NULL)
return (NULL); return (NULL);
KASSERT(vm_phys_domidx(m) == domain, KASSERT(vm_phys_domain(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested.")); ("vm_reserv_alloc_contig: Page domain does not match requested."));
/* /*
@ -565,22 +759,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
KASSERT(rv->pages == m, KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted", ("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv)); rv));
KASSERT(rv->object == NULL, vm_reserv_insert(rv, object, first);
("vm_reserv_alloc_contig: reserv %p isn't free", rv));
LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object;
rv->pindex = first;
rv->domain = domain;
KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE",
rv));
for (i = 0; i < NPOPMAP; i++)
KASSERT(rv->popmap[i] == 0,
("vm_reserv_alloc_contig: reserv %p's popmap is corrupted",
rv));
n = ulmin(VM_LEVEL_0_NPAGES - index, npages); n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
vm_reserv_populate(rv, index + i); vm_reserv_populate(rv, index + i);
@ -594,31 +773,70 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
allocpages -= VM_LEVEL_0_NPAGES; allocpages -= VM_LEVEL_0_NPAGES;
} while (allocpages >= VM_LEVEL_0_NPAGES); } while (allocpages >= VM_LEVEL_0_NPAGES);
return (m_ret); return (m_ret);
}
/*
* Attempts to extend an existing reservation and allocate the page to the
* object.
*
* The page "mpred" must immediately precede the offset "pindex" within the
* specified object.
*
* The object must be locked.
*/
vm_page_t
vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t mpred)
{
struct vm_domain *vmd;
vm_page_t m, msucc;
vm_reserv_t rv;
int index, free_count;
VM_OBJECT_ASSERT_WLOCKED(object);
/* /*
* Found a matching reservation. * Could a reservation currently exist?
*/ */
found: if (pindex < VM_RESERV_INDEX(object, pindex) ||
pindex >= object->size || object->resident_page_count == 0)
return (NULL);
/*
* Look for an existing reservation.
*/
rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
if (rv == NULL)
return (NULL);
KASSERT(object != kernel_object || rv->domain == domain,
("vm_reserv_extend: Domain mismatch from reservation."));
domain = rv->domain;
vmd = VM_DOMAIN(domain);
index = VM_RESERV_INDEX(object, pindex); index = VM_RESERV_INDEX(object, pindex);
/* Does the allocation fit within the reservation? */
if (index + npages > VM_LEVEL_0_NPAGES)
return (NULL);
m = &rv->pages[index]; m = &rv->pages[index];
pa = VM_PAGE_TO_PHYS(m); vm_domain_free_lock(vmd);
if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || if (vm_domain_available(vmd, req, 1) == 0 ||
((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) /* Handle reclaim race. */
return (NULL); rv->object != object ||
/* Handle vm_page_rename(m, new_object, ...). */ /* Handle vm_page_rename(m, new_object, ...). */
for (i = 0; i < npages; i++) popmap_is_set(rv->popmap, index))
if (popmap_is_set(rv->popmap, index + i)) m = NULL;
return (NULL); if (m != NULL) {
for (i = 0; i < npages; i++) vm_reserv_populate(rv, index);
vm_reserv_populate(rv, index + i); free_count = vm_domain_freecnt_adj(vmd, -1);
} else
free_count = vmd->vmd_free_count;
vm_domain_free_unlock(vmd);
if (vm_paging_needed(vmd, free_count))
pagedaemon_wakeup(domain);
return (m); return (m);
} }
/* /*
* Allocates a page from an existing or newly created reservation. * Allocates a page from an existing reservation.
* *
* The page "mpred" must immediately precede the offset "pindex" within the * The page "mpred" must immediately precede the offset "pindex" within the
* specified object. * specified object.
@ -632,9 +850,9 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t m, msucc; vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap; vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv; vm_reserv_t rv;
int i, index; int index;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object);
/* /*
@ -645,48 +863,45 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
return (NULL); return (NULL);
/* /*
* Look for an existing reservation. * Callers should've extended an existing reservation prior to
* calling this function. If a reservation exists it is
* incompatible with the allocation.
*/ */
if (mpred != NULL) { rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
KASSERT(mpred->object == object, if (rv != NULL)
("vm_reserv_alloc_page: object doesn't contain mpred")); return (NULL);
KASSERT(mpred->pindex < pindex,
("vm_reserv_alloc_page: mpred doesn't precede pindex"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
msucc = TAILQ_NEXT(mpred, listq);
} else
msucc = TAILQ_FIRST(&object->memq);
if (msucc != NULL) {
KASSERT(msucc->pindex > pindex,
("vm_reserv_alloc_page: msucc doesn't succeed pindex"));
rv = vm_reserv_from_page(msucc);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
}
/* /*
* Could a reservation fit between the first index to the left that * Could a reservation fit between the first index to the left that
* can be used and the first index to the right that cannot be used? * can be used and the first index to the right that cannot be used?
*
* We must synchronize with the reserv object lock to protect the
* pindex/object of the resulting reservations against rename while
* we are inspecting.
*/ */
first = pindex - VM_RESERV_INDEX(object, pindex); first = pindex - VM_RESERV_INDEX(object, pindex);
vm_reserv_object_lock(object);
if (mpred != NULL) { if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object) if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1; leftcap = mpred->pindex + 1;
else else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES; leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
if (leftcap > first) if (leftcap > first) {
vm_reserv_object_unlock(object);
return (NULL); return (NULL);
}
} }
if (msucc != NULL) { if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object) if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex; rightcap = msucc->pindex;
else else
rightcap = rv->pindex; rightcap = rv->pindex;
if (first + VM_LEVEL_0_NPAGES > rightcap) if (first + VM_LEVEL_0_NPAGES > rightcap) {
vm_reserv_object_unlock(object);
return (NULL); return (NULL);
}
} }
vm_reserv_object_unlock(object);
/* /*
* Would a new reservation extend past the end of the object? * Would a new reservation extend past the end of the object?
@ -712,37 +927,10 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
rv = vm_reserv_from_page(m); rv = vm_reserv_from_page(m);
KASSERT(rv->pages == m, KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv)); ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
KASSERT(rv->object == NULL, vm_reserv_insert(rv, object, first);
("vm_reserv_alloc_page: reserv %p isn't free", rv));
LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object;
rv->pindex = first;
rv->domain = domain;
KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv));
for (i = 0; i < NPOPMAP; i++)
KASSERT(rv->popmap[i] == 0,
("vm_reserv_alloc_page: reserv %p's popmap is corrupted",
rv));
index = VM_RESERV_INDEX(object, pindex); index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index); vm_reserv_populate(rv, index);
return (&rv->pages[index]); return (&rv->pages[index]);
/*
* Found a matching reservation.
*/
found:
index = VM_RESERV_INDEX(object, pindex);
m = &rv->pages[index];
KASSERT(object != kernel_object || vm_phys_domidx(m) == domain,
("vm_reserv_alloc_page: Domain mismatch from reservation."));
/* Handle vm_page_rename(m, new_object, ...). */
if (popmap_is_set(rv->popmap, index))
return (NULL);
vm_reserv_populate(rv, index);
return (m);
} }
/* /*
@ -759,14 +947,8 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m)
{ {
int begin_zeroes, hi, i, lo; int begin_zeroes, hi, i, lo;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL, vm_reserv_remove(rv);
("vm_reserv_break: reserv %p is free", rv));
KASSERT(!rv->inpartpopq,
("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
LIST_REMOVE(rv, objq);
rv->object = NULL;
rv->domain = -1;
if (m != NULL) { if (m != NULL) {
/* /*
* Since the reservation is being broken, there is no harm in * Since the reservation is being broken, there is no harm in
@ -830,9 +1012,26 @@ void
vm_reserv_break_all(vm_object_t object) vm_reserv_break_all(vm_object_t object)
{ {
vm_reserv_t rv; vm_reserv_t rv;
struct vm_domain *vmd;
mtx_lock(&vm_page_queue_free_mtx); /*
* This access of object->rvq is unsynchronized so that the
* object rvq lock can nest after the domain_free lock. We
* must check for races in the results. However, the object
* lock prevents new additions, so we are guaranteed that when
* it returns NULL the object is properly empty.
*/
vmd = NULL;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) { while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
if (vmd != VM_DOMAIN(rv->domain)) {
if (vmd != NULL)
vm_domain_free_unlock(vmd);
vmd = VM_DOMAIN(rv->domain);
vm_domain_free_lock(vmd);
}
/* Reclaim race. */
if (rv->object != object)
continue;
KASSERT(rv->object == object, KASSERT(rv->object == object,
("vm_reserv_break_all: reserv %p is corrupted", rv)); ("vm_reserv_break_all: reserv %p is corrupted", rv));
if (rv->inpartpopq) { if (rv->inpartpopq) {
@ -841,7 +1040,8 @@ vm_reserv_break_all(vm_object_t object)
} }
vm_reserv_break(rv, NULL); vm_reserv_break(rv, NULL);
} }
mtx_unlock(&vm_page_queue_free_mtx); if (vmd != NULL)
vm_domain_free_unlock(vmd);
} }
/* /*
@ -855,8 +1055,8 @@ vm_reserv_free_page(vm_page_t m)
{ {
vm_reserv_t rv; vm_reserv_t rv;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m); rv = vm_reserv_from_page(m);
vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL) if (rv->object == NULL)
return (FALSE); return (FALSE);
vm_reserv_depopulate(rv, m - rv->pages); vm_reserv_depopulate(rv, m - rv->pages);
@ -886,6 +1086,8 @@ vm_reserv_init(void)
while (paddr + VM_LEVEL_0_SIZE <= seg->end) { while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages = vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
PHYS_TO_VM_PAGE(paddr); PHYS_TO_VM_PAGE(paddr);
vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
seg->domain;
paddr += VM_LEVEL_0_SIZE; paddr += VM_LEVEL_0_SIZE;
} }
} }
@ -902,8 +1104,8 @@ vm_reserv_is_page_free(vm_page_t m)
{ {
vm_reserv_t rv; vm_reserv_t rv;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m); rv = vm_reserv_from_page(m);
vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL) if (rv->object == NULL)
return (false); return (false);
return (popmap_is_clear(rv->popmap, m - rv->pages)); return (popmap_is_clear(rv->popmap, m - rv->pages));
@ -945,7 +1147,7 @@ static void
vm_reserv_reclaim(vm_reserv_t rv) vm_reserv_reclaim(vm_reserv_t rv)
{ {
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->inpartpopq, KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv)); ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@ -969,7 +1171,7 @@ vm_reserv_reclaim_inactive(int domain)
{ {
vm_reserv_t rv; vm_reserv_t rv;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
vm_reserv_reclaim(rv); vm_reserv_reclaim(rv);
return (TRUE); return (TRUE);
@ -993,7 +1195,7 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
vm_reserv_t rv; vm_reserv_t rv;
int hi, i, lo, low_index, next_free; int hi, i, lo, low_index, next_free;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (npages > VM_LEVEL_0_NPAGES - 1) if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE); return (FALSE);
size = npages << PAGE_SHIFT; size = npages << PAGE_SHIFT;
@ -1084,14 +1286,19 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
VM_OBJECT_ASSERT_WLOCKED(new_object); VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m); rv = vm_reserv_from_page(m);
if (rv->object == old_object) { if (rv->object == old_object) {
mtx_lock(&vm_page_queue_free_mtx); vm_domain_free_lock(VM_DOMAIN(rv->domain));
if (rv->object == old_object) { if (rv->object == old_object) {
vm_reserv_object_lock(old_object);
rv->object = NULL;
LIST_REMOVE(rv, objq); LIST_REMOVE(rv, objq);
LIST_INSERT_HEAD(&new_object->rvq, rv, objq); vm_reserv_object_unlock(old_object);
vm_reserv_object_lock(new_object);
rv->object = new_object; rv->object = new_object;
rv->pindex -= old_object_offset; rv->pindex -= old_object_offset;
LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
vm_reserv_object_unlock(new_object);
} }
mtx_unlock(&vm_page_queue_free_mtx); vm_domain_free_unlock(VM_DOMAIN(rv->domain));
} }
} }
@ -1121,6 +1328,7 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
{ {
vm_paddr_t new_end; vm_paddr_t new_end;
size_t size; size_t size;
int i;
/* /*
* Calculate the size (in bytes) of the reservation array. Round up * Calculate the size (in bytes) of the reservation array. Round up
@ -1140,6 +1348,10 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
VM_PROT_READ | VM_PROT_WRITE); VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size); bzero(vm_reserv_array, size);
for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
MTX_DEF);
/* /*
* Return the next available physical address. * Return the next available physical address.
*/ */

View File

@ -50,8 +50,14 @@
vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred); u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_extend_contig(int req, vm_object_t object,
vm_pindex_t pindex, int domain, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
int domain, vm_page_t mpred); int domain, vm_page_t mpred);
vm_page_t vm_reserv_extend(int req, vm_object_t object,
vm_pindex_t pindex, int domain, vm_page_t mpred);
void vm_reserv_break_all(vm_object_t object); void vm_reserv_break_all(vm_object_t object);
boolean_t vm_reserv_free_page(vm_page_t m); boolean_t vm_reserv_free_page(vm_page_t m);
void vm_reserv_init(void); void vm_reserv_init(void);

View File

@ -650,7 +650,7 @@ swapper(void)
loop: loop:
if (vm_page_count_min()) { if (vm_page_count_min()) {
VM_WAIT; vm_wait_min();
goto loop; goto loop;
} }

View File

@ -1167,7 +1167,7 @@ vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count,
* daemon up. This should be probably be addressed XXX. * daemon up. This should be probably be addressed XXX.
*/ */
if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min) if (vm_page_count_min())
flags |= VM_PAGER_PUT_SYNC; flags |= VM_PAGER_PUT_SYNC;
/* /*