diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 3c570b0a6b7f..c41b151fa502 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -187,6 +187,13 @@ vm_page_count_severe(void) return (!DOMAINSET_EMPTY(&vm_severe_domains)); } +static inline int +vm_page_count_severe_set(domainset_t *mask) +{ + + return (DOMAINSET_SUBSET(&vm_severe_domains, mask)); +} + /* * Return TRUE if we are under our minimum low-free-pages threshold. * diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index eae083adade6..93f7c2596de2 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -100,6 +100,8 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, pindex += (((uintptr_t)obj) / sizeof(*obj)); di->di_offset = pindex; } + /* Skip zones below min on the first pass. */ + di->di_minskip = true; } static void @@ -213,6 +215,8 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | VM_ALLOC_NOWAIT; vm_domainset_iter_first(di, domain); + if (DOMAINSET_ISSET(*domain, &vm_min_domains)) + vm_domainset_iter_page(di, domain, req); } int @@ -227,8 +231,15 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) return (ENOMEM); /* If there are more domains to visit we run the iterator. */ - if (--di->di_n != 0) { + while (--di->di_n != 0) { vm_domainset_iter_next(di, domain); + if (!di->di_minskip || + !DOMAINSET_ISSET(*domain, &vm_min_domains)) + return (0); + } + if (di->di_minskip) { + di->di_minskip = false; + vm_domainset_iter_first(di, domain); return (0); } @@ -258,6 +269,8 @@ vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, di->di_flags = *flags; *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; vm_domainset_iter_first(di, domain); + if (DOMAINSET_ISSET(*domain, &vm_min_domains)) + vm_domainset_iter_malloc(di, domain, flags); } int @@ -265,8 +278,17 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) { /* If there are more domains to visit we run the iterator. */ - if (--di->di_n != 0) { + while (--di->di_n != 0) { vm_domainset_iter_next(di, domain); + if (!di->di_minskip || + !DOMAINSET_ISSET(*domain, &vm_min_domains)) + return (0); + } + + /* If we skipped zones below min start the search from the beginning. */ + if (di->di_minskip) { + di->di_minskip = false; + vm_domainset_iter_first(di, domain); return (0); } diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h index 542fe47da677..10da5caa0ea7 100644 --- a/sys/vm/vm_domainset.h +++ b/sys/vm/vm_domainset.h @@ -34,9 +34,10 @@ struct vm_domainset_iter { struct domainset *di_domain; int *di_iter; vm_pindex_t di_offset; - int di_policy; int di_flags; - int di_n; + uint16_t di_policy; + domainid_t di_n; + bool di_minskip; }; int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *); @@ -46,4 +47,6 @@ int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *); void vm_domainset_iter_malloc_init(struct vm_domainset_iter *, struct vm_object *, int *, int *); +void vm_wait_doms(const domainset_t *); + #endif /* __VM_DOMAINSET_H__ */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e35c31b5427f..d5a6b57f47e3 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -548,6 +548,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, { struct faultstate fs; struct vnode *vp; + struct domainset *dset; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; @@ -791,7 +792,11 @@ RetryFault:; * there, and allocation can fail, causing * restart and new reading of the p_flag. */ - if (!vm_page_count_severe() || P_KILLED(curproc)) { + dset = fs.object->domain.dr_policy; + if (dset == NULL) + dset = curthread->td_domain.dr_policy; + if (!vm_page_count_severe_set(&dset->ds_mask) || + P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 vm_object_color(fs.object, atop(vaddr) - fs.pindex); @@ -806,7 +811,7 @@ RetryFault:; } if (fs.m == NULL) { unlock_and_deallocate(&fs); - vm_waitpfault(); + vm_waitpfault(dset); goto RetryFault; } } diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 7952c81a1afe..832dbce324ef 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -534,6 +535,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2, struct vmspace *vm2, int flags) { struct proc *p1 = td->td_proc; + struct domainset *dset; int error; if ((flags & RFPROC) == 0) { @@ -557,9 +559,9 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2, p2->p_vmspace = p1->p_vmspace; atomic_add_int(&p1->p_vmspace->vm_refcnt, 1); } - - while (vm_page_count_severe()) { - vm_wait_severe(); + dset = td2->td_domain.dr_policy; + while (vm_page_count_severe_set(&dset->ds_mask)) { + vm_wait_doms(&dset->ds_mask); } if ((flags & RFMEM) == 0) { diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 539e6effd69a..850b0638841e 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2935,7 +2935,7 @@ vm_wait_count(void) return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters); } -static void +void vm_wait_doms(const domainset_t *wdoms) { @@ -2961,10 +2961,10 @@ vm_wait_doms(const domainset_t *wdoms) mtx_lock(&vm_domainset_lock); if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) { vm_min_waiters++; - msleep(&vm_min_domains, &vm_domainset_lock, PVM, - "vmwait", 0); - } - mtx_unlock(&vm_domainset_lock); + msleep(&vm_min_domains, &vm_domainset_lock, + PVM | PDROP, "vmwait", 0); + } else + mtx_unlock(&vm_domainset_lock); } } @@ -3069,15 +3069,21 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req) * this balance without careful testing first. */ void -vm_waitpfault(void) +vm_waitpfault(struct domainset *dset) { + /* + * XXX Ideally we would wait only until the allocation could + * be satisfied. This condition can cause new allocators to + * consume all freed pages while old allocators wait. + */ mtx_lock(&vm_domainset_lock); - if (vm_page_count_min()) { + if (DOMAINSET_SUBSET(&vm_min_domains, &dset->ds_mask)) { vm_min_waiters++; - msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0); - } - mtx_unlock(&vm_domainset_lock); + msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP, + "pfault", 0); + } else + mtx_unlock(&vm_domainset_lock); } struct vm_pagequeue * diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 935088f1f301..3b238a0ba3fb 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -96,7 +96,7 @@ extern int vm_pageout_page_count; */ void vm_wait(vm_object_t obj); -void vm_waitpfault(void); +void vm_waitpfault(struct domainset *); void vm_wait_domain(int domain); void vm_wait_min(void); void vm_wait_severe(void);