The flag "vm_pages_needed" has long served two distinct purposes: (1) to

indicate that threads are waiting for free pages to become available and
(2) to indicate whether a wakeup call has been sent to the page daemon.
The trouble is that a single flag cannot really serve both purposes, because
we have two distinct targets for when to wakeup threads waiting for free
pages versus when the page daemon has completed its work.  In particular,
the flag will be cleared by vm_page_free() before the page daemon has met
its target, and this can lead to the OOM killer being invoked prematurely.
To address this problem, a new flag "vm_pageout_wanted" is introduced.

Discussed with:	jeff
Reviewed by:	kib, markj
Tested by:	markj
Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Alan Cox 2016-05-27 19:15:45 +00:00
parent 23413065d4
commit 56ce06907c
4 changed files with 65 additions and 44 deletions

View File

@ -76,7 +76,7 @@ struct vmmeter {
u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
u_int v_intrans; /* (p) intransit blocking page faults */
u_int v_reactivated; /* (f) pages reactivated from free list */
u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */
u_int v_pdwakeups; /* (p) times daemon has awaken from sleep */
u_int v_pdpages; /* (p) pages analyzed by daemon */
u_int v_tcached; /* (p) total pages cached */

View File

@ -2700,10 +2700,11 @@ vm_wait(void)
msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
PDROP | PSWP, "VMWait", 0);
} else {
if (!vm_pages_needed) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
if (!vm_pageout_wanted) {
vm_pageout_wanted = true;
wakeup(&vm_pageout_wanted);
}
vm_pages_needed = true;
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM,
"vmwait", 0);
}
@ -2724,10 +2725,11 @@ vm_waitpfault(void)
{
mtx_lock(&vm_page_queue_free_mtx);
if (!vm_pages_needed) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
if (!vm_pageout_wanted) {
vm_pageout_wanted = true;
wakeup(&vm_pageout_wanted);
}
vm_pages_needed = true;
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER,
"pfault", 0);
}
@ -2908,7 +2910,7 @@ vm_page_free_wakeup(void)
* lots of memory. this process will swapin processes.
*/
if (vm_pages_needed && !vm_page_count_min()) {
vm_pages_needed = 0;
vm_pages_needed = false;
wakeup(&vm_cnt.v_free_count);
}
}

View File

@ -156,10 +156,11 @@ SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
#endif
int vm_pages_needed; /* Event on which pageout daemon sleeps */
int vm_pageout_deficit; /* Estimated number of pages deficit */
int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12;
bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */
#if !defined(NO_SWAPPING)
static int vm_pageout_req_swapout; /* XXX */
@ -1550,48 +1551,65 @@ vm_pageout_worker(void *arg)
* The pageout daemon worker is never done, so loop forever.
*/
while (TRUE) {
/*
* If we have enough free memory, wakeup waiters. Do
* not clear vm_pages_needed until we reach our target,
* otherwise we may be woken up over and over again and
* waste a lot of cpu.
*/
mtx_lock(&vm_page_queue_free_mtx);
/*
* Generally, after a level >= 1 scan, if there are enough
* free pages to wakeup the waiters, then they are already
* awake. A call to vm_page_free() during the scan awakened
* them. However, in the following case, this wakeup serves
* to bound the amount of time that a thread might wait.
* Suppose a thread's call to vm_page_alloc() fails, but
* before that thread calls VM_WAIT, enough pages are freed by
* other threads to alleviate the free page shortage. The
* thread will, nonetheless, wait until another page is freed
* or this wakeup is performed.
*/
if (vm_pages_needed && !vm_page_count_min()) {
if (!vm_paging_needed())
vm_pages_needed = 0;
vm_pages_needed = false;
wakeup(&vm_cnt.v_free_count);
}
if (vm_pages_needed) {
/*
* Do not clear vm_pageout_wanted until we reach our target.
* Otherwise, we may be awakened over and over again, wasting
* CPU time.
*/
if (vm_pageout_wanted && !vm_paging_needed())
vm_pageout_wanted = false;
/*
* Might the page daemon receive a wakeup call?
*/
if (vm_pageout_wanted) {
/*
* We're still not done. Either vm_pages_needed was
* set by another thread during the previous scan
* (typically, this happens during a level 0 scan) or
* vm_pages_needed was already set and the scan failed
* to free enough pages. If we haven't yet performed
* a level >= 2 scan (unlimited dirty cleaning), then
* upgrade the level and scan again now. Otherwise,
* sleep a bit and try again later. While sleeping,
* vm_pages_needed can be cleared.
* No. Either vm_pageout_wanted was set by another
* thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was
* already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 2
* scan (unlimited dirty cleaning), then upgrade the
* level and scan again now. Otherwise, sleep a bit
* and try again later.
*/
mtx_unlock(&vm_page_queue_free_mtx);
if (domain->vmd_pass > 1)
msleep(&vm_pages_needed,
&vm_page_queue_free_mtx, PVM, "psleep",
hz / 2);
pause("psleep", hz / 2);
domain->vmd_pass++;
} else {
/*
* Good enough, sleep until required to refresh
* stats.
* Yes. Sleep until pages need to be reclaimed or
* have their reference stats updated.
*/
msleep(&vm_pages_needed, &vm_page_queue_free_mtx,
PVM, "psleep", hz);
if (mtx_sleep(&vm_pageout_wanted,
&vm_page_queue_free_mtx, PDROP | PVM, "psleep",
hz) == 0) {
PCPU_INC(cnt.v_pdwakeups);
domain->vmd_pass = 1;
} else
domain->vmd_pass = 0;
}
if (vm_pages_needed) {
vm_cnt.v_pdwakeups++;
domain->vmd_pass++;
} else
domain->vmd_pass = 0;
mtx_unlock(&vm_page_queue_free_mtx);
vm_pageout_scan(domain, domain->vmd_pass);
}
}
@ -1688,9 +1706,9 @@ void
pagedaemon_wakeup(void)
{
if (!vm_pages_needed && curthread->td_proc != pageproc) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
vm_pageout_wanted = true;
wakeup(&vm_pageout_wanted);
}
}

View File

@ -72,9 +72,10 @@
*/
extern int vm_page_max_wired;
extern int vm_pages_needed; /* should be some "event" structure */
extern int vm_pageout_deficit;
extern int vm_pageout_page_count;
extern bool vm_pageout_wanted;
extern bool vm_pages_needed;
/*
* Swap out requests