Fix OOM handling of some corner cases.
In addition to pagedaemon initiating OOM, also do it from the vm_fault() internals. Namely, if the thread waits for a free page to satisfy page fault some preconfigured amount of time, trigger OOM. These triggers are rate-limited, due to a usual case of several threads of the same multi-threaded process to enter fault handler simultaneously. The faults from pagedaemon threads participate in the calculation of OOM rate, but are not under the limit. Reviewed by: markj (previous version) Tested by: pho Discussed with: alc Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D13671
This commit is contained in:
parent
3a79e8e772
commit
245139c69d
@ -135,6 +135,18 @@ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr,
|
||||
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
|
||||
int backward, int forward, bool obj_locked);
|
||||
|
||||
static int vm_pfault_oom_attempts = 3;
|
||||
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
|
||||
&vm_pfault_oom_attempts, 0,
|
||||
"Number of page allocation attempts in page fault handler before it "
|
||||
"triggers OOM handling");
|
||||
|
||||
static int vm_pfault_oom_wait = 10;
|
||||
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
|
||||
&vm_pfault_oom_wait, 0,
|
||||
"Number of seconds to wait for free pages before retrying "
|
||||
"the page fault handler");
|
||||
|
||||
static inline void
|
||||
release_page(struct faultstate *fs)
|
||||
{
|
||||
@ -570,7 +582,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
vm_pindex_t retry_pindex;
|
||||
vm_prot_t prot, retry_prot;
|
||||
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
|
||||
int locked, nera, result, rv;
|
||||
int locked, nera, oom, result, rv;
|
||||
u_char behavior;
|
||||
boolean_t wired; /* Passed by reference. */
|
||||
bool dead, hardfault, is_first_object_locked;
|
||||
@ -581,7 +593,9 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
|
||||
nera = -1;
|
||||
hardfault = false;
|
||||
|
||||
RetryFault:;
|
||||
RetryFault:
|
||||
oom = 0;
|
||||
RetryFault_oom:
|
||||
|
||||
/*
|
||||
* Find the backing store object and offset into it to begin the
|
||||
@ -827,7 +841,18 @@ RetryFault:;
|
||||
}
|
||||
if (fs.m == NULL) {
|
||||
unlock_and_deallocate(&fs);
|
||||
vm_waitpfault(dset);
|
||||
if (vm_pfault_oom_attempts < 0 ||
|
||||
oom < vm_pfault_oom_attempts) {
|
||||
oom++;
|
||||
vm_waitpfault(dset,
|
||||
vm_pfault_oom_wait * hz);
|
||||
goto RetryFault_oom;
|
||||
}
|
||||
if (bootverbose)
|
||||
printf(
|
||||
"proc %d (%s) failed to alloc page on fault, starting OOM\n",
|
||||
curproc->p_pid, curproc->p_comm);
|
||||
vm_pageout_oom(VM_OOM_MEM_PF);
|
||||
goto RetryFault;
|
||||
}
|
||||
}
|
||||
|
@ -3032,7 +3032,7 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
|
||||
* this balance without careful testing first.
|
||||
*/
|
||||
void
|
||||
vm_waitpfault(struct domainset *dset)
|
||||
vm_waitpfault(struct domainset *dset, int timo)
|
||||
{
|
||||
|
||||
/*
|
||||
@ -3044,7 +3044,7 @@ vm_waitpfault(struct domainset *dset)
|
||||
if (vm_page_count_min_set(&dset->ds_mask)) {
|
||||
vm_min_waiters++;
|
||||
msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
|
||||
"pfault", 0);
|
||||
"pfault", timo);
|
||||
} else
|
||||
mtx_unlock(&vm_domainset_lock);
|
||||
}
|
||||
|
@ -1720,6 +1720,12 @@ vm_pageout_oom_pagecount(struct vmspace *vmspace)
|
||||
return (res);
|
||||
}
|
||||
|
||||
static int vm_oom_ratelim_last;
|
||||
static int vm_oom_pf_secs = 10;
|
||||
SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
|
||||
"");
|
||||
static struct mtx vm_oom_ratelim_mtx;
|
||||
|
||||
void
|
||||
vm_pageout_oom(int shortage)
|
||||
{
|
||||
@ -1727,8 +1733,30 @@ vm_pageout_oom(int shortage)
|
||||
vm_offset_t size, bigsize;
|
||||
struct thread *td;
|
||||
struct vmspace *vm;
|
||||
int now;
|
||||
bool breakout;
|
||||
|
||||
/*
|
||||
* For OOM requests originating from vm_fault(), there is a high
|
||||
* chance that a single large process faults simultaneously in
|
||||
* several threads. Also, on an active system running many
|
||||
* processes of middle-size, like buildworld, all of them
|
||||
* could fault almost simultaneously as well.
|
||||
*
|
||||
* To avoid killing too many processes, rate-limit OOMs
|
||||
* initiated by vm_fault() time-outs on the waits for free
|
||||
* pages.
|
||||
*/
|
||||
mtx_lock(&vm_oom_ratelim_mtx);
|
||||
now = ticks;
|
||||
if (shortage == VM_OOM_MEM_PF &&
|
||||
(u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
|
||||
mtx_unlock(&vm_oom_ratelim_mtx);
|
||||
return;
|
||||
}
|
||||
vm_oom_ratelim_last = now;
|
||||
mtx_unlock(&vm_oom_ratelim_mtx);
|
||||
|
||||
/*
|
||||
* We keep the process bigproc locked once we find it to keep anyone
|
||||
* from messing with it; however, there is a possibility of
|
||||
@ -1793,7 +1821,7 @@ vm_pageout_oom(int shortage)
|
||||
continue;
|
||||
}
|
||||
size = vmspace_swap_count(vm);
|
||||
if (shortage == VM_OOM_MEM)
|
||||
if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
|
||||
size += vm_pageout_oom_pagecount(vm);
|
||||
vm_map_unlock_read(&vm->vm_map);
|
||||
vmspace_free(vm);
|
||||
@ -2048,6 +2076,7 @@ vm_pageout(void)
|
||||
p = curproc;
|
||||
td = curthread;
|
||||
|
||||
mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
|
||||
swap_pager_swap_init();
|
||||
for (first = -1, i = 0; i < vm_ndomains; i++) {
|
||||
if (VM_DOMAIN_EMPTY(i)) {
|
||||
|
@ -79,7 +79,8 @@ extern u_long vm_page_max_user_wired;
|
||||
extern int vm_pageout_page_count;
|
||||
|
||||
#define VM_OOM_MEM 1
|
||||
#define VM_OOM_SWAPZ 2
|
||||
#define VM_OOM_MEM_PF 2
|
||||
#define VM_OOM_SWAPZ 3
|
||||
|
||||
/*
|
||||
* vm_lowmem flags.
|
||||
@ -96,7 +97,7 @@ extern int vm_pageout_page_count;
|
||||
*/
|
||||
|
||||
void vm_wait(vm_object_t obj);
|
||||
void vm_waitpfault(struct domainset *);
|
||||
void vm_waitpfault(struct domainset *, int timo);
|
||||
void vm_wait_domain(int domain);
|
||||
void vm_wait_min(void);
|
||||
void vm_wait_severe(void);
|
||||
|
Loading…
Reference in New Issue
Block a user