- Simplify vm_pageout_scan() by introducing a new vm_pageout_clean()

function that does the locking and validation associated with cleaning
   a page.  This moves 150 lines of code into its own function.
 - Rename vm_pageout_clean() to vm_pageout_cluster() to define what it
   really does; clustering nearby pages for pageout optimization.

Reviewd by:	alc, kib, kmacy
Tested by:	pho (earlier version)
Sponsored by:	EMC / Isilon
This commit is contained in:
Jeff Roberson 2015-04-07 02:18:52 +00:00
parent 45440aa84e
commit 34d8b7ea3b

View File

@ -118,7 +118,8 @@ __FBSDID("$FreeBSD$");
/* the kernel process "vm_pageout"*/
static void vm_pageout(void);
static void vm_pageout_init(void);
static int vm_pageout_clean(vm_page_t);
static int vm_pageout_clean(vm_page_t m);
static int vm_pageout_cluster(vm_page_t m);
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
@ -347,7 +348,7 @@ vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
* late and we cannot do anything that will mess with the page.
*/
static int
vm_pageout_clean(vm_page_t m)
vm_pageout_cluster(vm_page_t m)
{
vm_object_t object;
vm_page_t mc[2*vm_pageout_page_count], pb, ps;
@ -905,6 +906,115 @@ vm_pageout_map_deactivate_pages(map, desired)
}
#endif /* !defined(NO_SWAPPING) */
/*
* Attempt to acquire all of the necessary locks to launder a page and
* then call through the clustering layer to PUTPAGES. Wait a short
* time for a vnode lock.
*
* Requires the page and object lock on entry, releases both before return.
* Returns 0 on success and an errno otherwise.
*/
static int
vm_pageout_clean(vm_page_t m)
{
struct vnode *vp;
struct mount *mp;
vm_object_t object;
vm_pindex_t pindex;
int error, lockmode;
vm_page_assert_locked(m);
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
error = 0;
vp = NULL;
mp = NULL;
/*
* The object is already known NOT to be dead. It
* is possible for the vget() to block the whole
* pageout daemon, but the new low-memory handling
* code should prevent it.
*
* We can't wait forever for the vnode lock, we might
* deadlock due to a vn_read() getting stuck in
* vm_wait while holding this vnode. We skip the
* vnode if we can't get it in a reasonable amount
* of time.
*/
if (object->type == OBJT_VNODE) {
vm_page_unlock(m);
vp = object->handle;
if (vp->v_type == VREG &&
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
mp = NULL;
error = EDEADLK;
goto unlock_all;
}
KASSERT(mp != NULL,
("vp %p with NULL v_mount", vp));
vm_object_reference_locked(object);
pindex = m->pindex;
VM_OBJECT_WUNLOCK(object);
lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
LK_SHARED : LK_EXCLUSIVE;
if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
vp = NULL;
error = EDEADLK;
goto unlock_mp;
}
VM_OBJECT_WLOCK(object);
vm_page_lock(m);
/*
* While the object and page were unlocked, the page
* may have been:
* (1) moved to a different queue,
* (2) reallocated to a different object,
* (3) reallocated to a different offset, or
* (4) cleaned.
*/
if (m->queue != PQ_INACTIVE || m->object != object ||
m->pindex != pindex || m->dirty == 0) {
vm_page_unlock(m);
error = ENXIO;
goto unlock_all;
}
/*
* The page may have been busied or held while the object
* and page locks were released.
*/
if (vm_page_busied(m) || m->hold_count != 0) {
vm_page_unlock(m);
error = EBUSY;
goto unlock_all;
}
}
/*
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
* laundry. If it is still in the laundry, then we
* start the cleaning operation.
*/
if (vm_pageout_cluster(m) == 0)
error = EIO;
unlock_all:
VM_OBJECT_WUNLOCK(object);
unlock_mp:
vm_page_lock_assert(m, MA_NOTOWNED);
if (mp != NULL) {
if (vp != NULL)
vput(vp);
vm_object_deallocate(object);
vn_finished_write(mp);
}
return (error);
}
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
@ -921,7 +1031,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
int vnodes_skipped = 0;
int maxlaunder;
int lockmode;
boolean_t queues_locked;
/*
@ -1155,9 +1264,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* on the inactive queue, we may have to go all out.
*/
int swap_pageouts_ok;
struct vnode *vp = NULL;
struct mount *mp = NULL;
vm_pindex_t pindex;
int error;
if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
swap_pageouts_ok = 1;
@ -1180,124 +1287,20 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
vm_page_requeue_locked(m);
goto relock_queues;
}
error = vm_pageout_clean(m);
/*
* The object is already known NOT to be dead. It
* is possible for the vget() to block the whole
* pageout daemon, but the new low-memory handling
* code should prevent it.
*
* The previous code skipped locked vnodes and, worse,
* reordered pages in the queue. This results in
* completely non-deterministic operation and, on a
* busy system, can lead to extremely non-optimal
* pageouts. For example, it can cause clean pages
* to be freed and dirty pages to be moved to the end
* of the queue. Since dirty pages are also moved to
* the end of the queue once-cleaned, this gives
* way too large a weighting to deferring the freeing
* of dirty pages.
*
* We can't wait forever for the vnode lock, we might
* deadlock due to a vn_read() getting stuck in
* vm_wait while holding this vnode. We skip the
* vnode if we can't get it in a reasonable amount
* of time.
*/
if (object->type == OBJT_VNODE) {
vm_page_unlock(m);
vp = object->handle;
if (vp->v_type == VREG &&
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
mp = NULL;
++pageout_lock_miss;
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
goto unlock_and_continue;
}
KASSERT(mp != NULL,
("vp %p with NULL v_mount", vp));
vm_object_reference_locked(object);
pindex = m->pindex;
VM_OBJECT_WUNLOCK(object);
lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
LK_SHARED : LK_EXCLUSIVE;
if (vget(vp, lockmode | LK_TIMELOCK,
curthread)) {
VM_OBJECT_WLOCK(object);
++pageout_lock_miss;
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
vp = NULL;
goto unlock_and_continue;
}
VM_OBJECT_WLOCK(object);
vm_page_lock(m);
/*
* While the object and page were unlocked,
* the page may have been
* (1) moved to a different queue,
* (2) reallocated to a different object,
* (3) reallocated to a different offset, or
* (4) cleaned.
*/
if (m->queue != PQ_INACTIVE ||
m->object != object ||
m->pindex != pindex ||
m->dirty == 0) {
vm_page_unlock(m);
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
goto unlock_and_continue;
}
/*
* The page may have been busied during the
* blocking in vget(). We don't move the
* page back onto the end of the queue so that
* statistics are more correct if we don't.
*/
if (vm_page_busied(m)) {
vm_page_unlock(m);
addl_page_shortage++;
goto unlock_and_continue;
}
/*
* If the page has become held it might
* be undergoing I/O, so skip it
*/
if (m->hold_count != 0) {
vm_page_unlock(m);
addl_page_shortage++;
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
goto unlock_and_continue;
}
}
/*
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
* laundry. If it is still in the laundry, then we
* start the cleaning operation.
*
* decrement page_shortage on success to account for
* Decrement page_shortage on success to account for
* the (future) cleaned page. Otherwise we could wind
* up laundering or cleaning too many pages.
*/
if (vm_pageout_clean(m) != 0) {
--page_shortage;
--maxlaunder;
}
unlock_and_continue:
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_WUNLOCK(object);
if (mp != NULL) {
if (vp != NULL)
vput(vp);
vm_object_deallocate(object);
vn_finished_write(mp);
if (error == 0) {
page_shortage--;
maxlaunder--;
} else if (error == EDEADLK) {
pageout_lock_miss++;
vnodes_skipped++;
} else if (error == EBUSY) {
addl_page_shortage++;
}
vm_page_lock_assert(m, MA_NOTOWNED);
goto relock_queues;