diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 2fb7c55e6af4..6f50053604c7 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -118,7 +118,8 @@ __FBSDID("$FreeBSD$"); /* the kernel process "vm_pageout"*/ static void vm_pageout(void); static void vm_pageout_init(void); -static int vm_pageout_clean(vm_page_t); +static int vm_pageout_clean(vm_page_t m); +static int vm_pageout_cluster(vm_page_t m); static void vm_pageout_scan(struct vm_domain *vmd, int pass); static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass); @@ -347,7 +348,7 @@ vm_pageout_page_lock(vm_page_t m, vm_page_t *next) * late and we cannot do anything that will mess with the page. */ static int -vm_pageout_clean(vm_page_t m) +vm_pageout_cluster(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count], pb, ps; @@ -905,6 +906,115 @@ vm_pageout_map_deactivate_pages(map, desired) } #endif /* !defined(NO_SWAPPING) */ +/* + * Attempt to acquire all of the necessary locks to launder a page and + * then call through the clustering layer to PUTPAGES. Wait a short + * time for a vnode lock. + * + * Requires the page and object lock on entry, releases both before return. + * Returns 0 on success and an errno otherwise. + */ +static int +vm_pageout_clean(vm_page_t m) +{ + struct vnode *vp; + struct mount *mp; + vm_object_t object; + vm_pindex_t pindex; + int error, lockmode; + + vm_page_assert_locked(m); + object = m->object; + VM_OBJECT_ASSERT_WLOCKED(object); + error = 0; + vp = NULL; + mp = NULL; + + /* + * The object is already known NOT to be dead. It + * is possible for the vget() to block the whole + * pageout daemon, but the new low-memory handling + * code should prevent it. + * + * We can't wait forever for the vnode lock, we might + * deadlock due to a vn_read() getting stuck in + * vm_wait while holding this vnode. We skip the + * vnode if we can't get it in a reasonable amount + * of time. + */ + if (object->type == OBJT_VNODE) { + vm_page_unlock(m); + vp = object->handle; + if (vp->v_type == VREG && + vn_start_write(vp, &mp, V_NOWAIT) != 0) { + mp = NULL; + error = EDEADLK; + goto unlock_all; + } + KASSERT(mp != NULL, + ("vp %p with NULL v_mount", vp)); + vm_object_reference_locked(object); + pindex = m->pindex; + VM_OBJECT_WUNLOCK(object); + lockmode = MNT_SHARED_WRITES(vp->v_mount) ? + LK_SHARED : LK_EXCLUSIVE; + if (vget(vp, lockmode | LK_TIMELOCK, curthread)) { + vp = NULL; + error = EDEADLK; + goto unlock_mp; + } + VM_OBJECT_WLOCK(object); + vm_page_lock(m); + /* + * While the object and page were unlocked, the page + * may have been: + * (1) moved to a different queue, + * (2) reallocated to a different object, + * (3) reallocated to a different offset, or + * (4) cleaned. + */ + if (m->queue != PQ_INACTIVE || m->object != object || + m->pindex != pindex || m->dirty == 0) { + vm_page_unlock(m); + error = ENXIO; + goto unlock_all; + } + + /* + * The page may have been busied or held while the object + * and page locks were released. + */ + if (vm_page_busied(m) || m->hold_count != 0) { + vm_page_unlock(m); + error = EBUSY; + goto unlock_all; + } + } + + /* + * If a page is dirty, then it is either being washed + * (but not yet cleaned) or it is still in the + * laundry. If it is still in the laundry, then we + * start the cleaning operation. + */ + if (vm_pageout_cluster(m) == 0) + error = EIO; + +unlock_all: + VM_OBJECT_WUNLOCK(object); + +unlock_mp: + vm_page_lock_assert(m, MA_NOTOWNED); + if (mp != NULL) { + if (vp != NULL) + vput(vp); + vm_object_deallocate(object); + vn_finished_write(mp); + } + + return (error); +} + /* * vm_pageout_scan does the dirty work for the pageout daemon. * @@ -921,7 +1031,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) int act_delta, addl_page_shortage, deficit, maxscan, page_shortage; int vnodes_skipped = 0; int maxlaunder; - int lockmode; boolean_t queues_locked; /* @@ -1155,9 +1264,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) * on the inactive queue, we may have to go all out. */ int swap_pageouts_ok; - struct vnode *vp = NULL; - struct mount *mp = NULL; - vm_pindex_t pindex; + int error; if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) { swap_pageouts_ok = 1; @@ -1180,124 +1287,20 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) vm_page_requeue_locked(m); goto relock_queues; } - + error = vm_pageout_clean(m); /* - * The object is already known NOT to be dead. It - * is possible for the vget() to block the whole - * pageout daemon, but the new low-memory handling - * code should prevent it. - * - * The previous code skipped locked vnodes and, worse, - * reordered pages in the queue. This results in - * completely non-deterministic operation and, on a - * busy system, can lead to extremely non-optimal - * pageouts. For example, it can cause clean pages - * to be freed and dirty pages to be moved to the end - * of the queue. Since dirty pages are also moved to - * the end of the queue once-cleaned, this gives - * way too large a weighting to deferring the freeing - * of dirty pages. - * - * We can't wait forever for the vnode lock, we might - * deadlock due to a vn_read() getting stuck in - * vm_wait while holding this vnode. We skip the - * vnode if we can't get it in a reasonable amount - * of time. - */ - if (object->type == OBJT_VNODE) { - vm_page_unlock(m); - vp = object->handle; - if (vp->v_type == VREG && - vn_start_write(vp, &mp, V_NOWAIT) != 0) { - mp = NULL; - ++pageout_lock_miss; - if (object->flags & OBJ_MIGHTBEDIRTY) - vnodes_skipped++; - goto unlock_and_continue; - } - KASSERT(mp != NULL, - ("vp %p with NULL v_mount", vp)); - vm_object_reference_locked(object); - pindex = m->pindex; - VM_OBJECT_WUNLOCK(object); - lockmode = MNT_SHARED_WRITES(vp->v_mount) ? - LK_SHARED : LK_EXCLUSIVE; - if (vget(vp, lockmode | LK_TIMELOCK, - curthread)) { - VM_OBJECT_WLOCK(object); - ++pageout_lock_miss; - if (object->flags & OBJ_MIGHTBEDIRTY) - vnodes_skipped++; - vp = NULL; - goto unlock_and_continue; - } - VM_OBJECT_WLOCK(object); - vm_page_lock(m); - /* - * While the object and page were unlocked, - * the page may have been - * (1) moved to a different queue, - * (2) reallocated to a different object, - * (3) reallocated to a different offset, or - * (4) cleaned. - */ - if (m->queue != PQ_INACTIVE || - m->object != object || - m->pindex != pindex || - m->dirty == 0) { - vm_page_unlock(m); - if (object->flags & OBJ_MIGHTBEDIRTY) - vnodes_skipped++; - goto unlock_and_continue; - } - - /* - * The page may have been busied during the - * blocking in vget(). We don't move the - * page back onto the end of the queue so that - * statistics are more correct if we don't. - */ - if (vm_page_busied(m)) { - vm_page_unlock(m); - addl_page_shortage++; - goto unlock_and_continue; - } - - /* - * If the page has become held it might - * be undergoing I/O, so skip it - */ - if (m->hold_count != 0) { - vm_page_unlock(m); - addl_page_shortage++; - if (object->flags & OBJ_MIGHTBEDIRTY) - vnodes_skipped++; - goto unlock_and_continue; - } - } - - /* - * If a page is dirty, then it is either being washed - * (but not yet cleaned) or it is still in the - * laundry. If it is still in the laundry, then we - * start the cleaning operation. - * - * decrement page_shortage on success to account for + * Decrement page_shortage on success to account for * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ - if (vm_pageout_clean(m) != 0) { - --page_shortage; - --maxlaunder; - } -unlock_and_continue: - vm_page_lock_assert(m, MA_NOTOWNED); - VM_OBJECT_WUNLOCK(object); - if (mp != NULL) { - if (vp != NULL) - vput(vp); - vm_object_deallocate(object); - vn_finished_write(mp); + if (error == 0) { + page_shortage--; + maxlaunder--; + } else if (error == EDEADLK) { + pageout_lock_miss++; + vnodes_skipped++; + } else if (error == EBUSY) { + addl_page_shortage++; } vm_page_lock_assert(m, MA_NOTOWNED); goto relock_queues;