- Simplify vm_pageout_scan() by introducing a new vm_pageout_clean()
function that does the locking and validation associated with cleaning a page. This moves 150 lines of code into its own function. - Rename vm_pageout_clean() to vm_pageout_cluster() to define what it really does; clustering nearby pages for pageout optimization. Reviewd by: alc, kib, kmacy Tested by: pho (earlier version) Sponsored by: EMC / Isilon
This commit is contained in:
parent
45440aa84e
commit
34d8b7ea3b
@ -118,7 +118,8 @@ __FBSDID("$FreeBSD$");
|
||||
/* the kernel process "vm_pageout"*/
|
||||
static void vm_pageout(void);
|
||||
static void vm_pageout_init(void);
|
||||
static int vm_pageout_clean(vm_page_t);
|
||||
static int vm_pageout_clean(vm_page_t m);
|
||||
static int vm_pageout_cluster(vm_page_t m);
|
||||
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
|
||||
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
|
||||
|
||||
@ -347,7 +348,7 @@ vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
|
||||
* late and we cannot do anything that will mess with the page.
|
||||
*/
|
||||
static int
|
||||
vm_pageout_clean(vm_page_t m)
|
||||
vm_pageout_cluster(vm_page_t m)
|
||||
{
|
||||
vm_object_t object;
|
||||
vm_page_t mc[2*vm_pageout_page_count], pb, ps;
|
||||
@ -905,6 +906,115 @@ vm_pageout_map_deactivate_pages(map, desired)
|
||||
}
|
||||
#endif /* !defined(NO_SWAPPING) */
|
||||
|
||||
/*
|
||||
* Attempt to acquire all of the necessary locks to launder a page and
|
||||
* then call through the clustering layer to PUTPAGES. Wait a short
|
||||
* time for a vnode lock.
|
||||
*
|
||||
* Requires the page and object lock on entry, releases both before return.
|
||||
* Returns 0 on success and an errno otherwise.
|
||||
*/
|
||||
static int
|
||||
vm_pageout_clean(vm_page_t m)
|
||||
{
|
||||
struct vnode *vp;
|
||||
struct mount *mp;
|
||||
vm_object_t object;
|
||||
vm_pindex_t pindex;
|
||||
int error, lockmode;
|
||||
|
||||
vm_page_assert_locked(m);
|
||||
object = m->object;
|
||||
VM_OBJECT_ASSERT_WLOCKED(object);
|
||||
error = 0;
|
||||
vp = NULL;
|
||||
mp = NULL;
|
||||
|
||||
/*
|
||||
* The object is already known NOT to be dead. It
|
||||
* is possible for the vget() to block the whole
|
||||
* pageout daemon, but the new low-memory handling
|
||||
* code should prevent it.
|
||||
*
|
||||
* We can't wait forever for the vnode lock, we might
|
||||
* deadlock due to a vn_read() getting stuck in
|
||||
* vm_wait while holding this vnode. We skip the
|
||||
* vnode if we can't get it in a reasonable amount
|
||||
* of time.
|
||||
*/
|
||||
if (object->type == OBJT_VNODE) {
|
||||
vm_page_unlock(m);
|
||||
vp = object->handle;
|
||||
if (vp->v_type == VREG &&
|
||||
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
|
||||
mp = NULL;
|
||||
error = EDEADLK;
|
||||
goto unlock_all;
|
||||
}
|
||||
KASSERT(mp != NULL,
|
||||
("vp %p with NULL v_mount", vp));
|
||||
vm_object_reference_locked(object);
|
||||
pindex = m->pindex;
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
|
||||
LK_SHARED : LK_EXCLUSIVE;
|
||||
if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
|
||||
vp = NULL;
|
||||
error = EDEADLK;
|
||||
goto unlock_mp;
|
||||
}
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_page_lock(m);
|
||||
/*
|
||||
* While the object and page were unlocked, the page
|
||||
* may have been:
|
||||
* (1) moved to a different queue,
|
||||
* (2) reallocated to a different object,
|
||||
* (3) reallocated to a different offset, or
|
||||
* (4) cleaned.
|
||||
*/
|
||||
if (m->queue != PQ_INACTIVE || m->object != object ||
|
||||
m->pindex != pindex || m->dirty == 0) {
|
||||
vm_page_unlock(m);
|
||||
error = ENXIO;
|
||||
goto unlock_all;
|
||||
}
|
||||
|
||||
/*
|
||||
* The page may have been busied or held while the object
|
||||
* and page locks were released.
|
||||
*/
|
||||
if (vm_page_busied(m) || m->hold_count != 0) {
|
||||
vm_page_unlock(m);
|
||||
error = EBUSY;
|
||||
goto unlock_all;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page is dirty, then it is either being washed
|
||||
* (but not yet cleaned) or it is still in the
|
||||
* laundry. If it is still in the laundry, then we
|
||||
* start the cleaning operation.
|
||||
*/
|
||||
if (vm_pageout_cluster(m) == 0)
|
||||
error = EIO;
|
||||
|
||||
unlock_all:
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
|
||||
unlock_mp:
|
||||
vm_page_lock_assert(m, MA_NOTOWNED);
|
||||
if (mp != NULL) {
|
||||
if (vp != NULL)
|
||||
vput(vp);
|
||||
vm_object_deallocate(object);
|
||||
vn_finished_write(mp);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_pageout_scan does the dirty work for the pageout daemon.
|
||||
*
|
||||
@ -921,7 +1031,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
|
||||
int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
|
||||
int vnodes_skipped = 0;
|
||||
int maxlaunder;
|
||||
int lockmode;
|
||||
boolean_t queues_locked;
|
||||
|
||||
/*
|
||||
@ -1155,9 +1264,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
|
||||
* on the inactive queue, we may have to go all out.
|
||||
*/
|
||||
int swap_pageouts_ok;
|
||||
struct vnode *vp = NULL;
|
||||
struct mount *mp = NULL;
|
||||
vm_pindex_t pindex;
|
||||
int error;
|
||||
|
||||
if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
|
||||
swap_pageouts_ok = 1;
|
||||
@ -1180,124 +1287,20 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
|
||||
vm_page_requeue_locked(m);
|
||||
goto relock_queues;
|
||||
}
|
||||
|
||||
error = vm_pageout_clean(m);
|
||||
/*
|
||||
* The object is already known NOT to be dead. It
|
||||
* is possible for the vget() to block the whole
|
||||
* pageout daemon, but the new low-memory handling
|
||||
* code should prevent it.
|
||||
*
|
||||
* The previous code skipped locked vnodes and, worse,
|
||||
* reordered pages in the queue. This results in
|
||||
* completely non-deterministic operation and, on a
|
||||
* busy system, can lead to extremely non-optimal
|
||||
* pageouts. For example, it can cause clean pages
|
||||
* to be freed and dirty pages to be moved to the end
|
||||
* of the queue. Since dirty pages are also moved to
|
||||
* the end of the queue once-cleaned, this gives
|
||||
* way too large a weighting to deferring the freeing
|
||||
* of dirty pages.
|
||||
*
|
||||
* We can't wait forever for the vnode lock, we might
|
||||
* deadlock due to a vn_read() getting stuck in
|
||||
* vm_wait while holding this vnode. We skip the
|
||||
* vnode if we can't get it in a reasonable amount
|
||||
* of time.
|
||||
*/
|
||||
if (object->type == OBJT_VNODE) {
|
||||
vm_page_unlock(m);
|
||||
vp = object->handle;
|
||||
if (vp->v_type == VREG &&
|
||||
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
|
||||
mp = NULL;
|
||||
++pageout_lock_miss;
|
||||
if (object->flags & OBJ_MIGHTBEDIRTY)
|
||||
vnodes_skipped++;
|
||||
goto unlock_and_continue;
|
||||
}
|
||||
KASSERT(mp != NULL,
|
||||
("vp %p with NULL v_mount", vp));
|
||||
vm_object_reference_locked(object);
|
||||
pindex = m->pindex;
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
|
||||
LK_SHARED : LK_EXCLUSIVE;
|
||||
if (vget(vp, lockmode | LK_TIMELOCK,
|
||||
curthread)) {
|
||||
VM_OBJECT_WLOCK(object);
|
||||
++pageout_lock_miss;
|
||||
if (object->flags & OBJ_MIGHTBEDIRTY)
|
||||
vnodes_skipped++;
|
||||
vp = NULL;
|
||||
goto unlock_and_continue;
|
||||
}
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_page_lock(m);
|
||||
/*
|
||||
* While the object and page were unlocked,
|
||||
* the page may have been
|
||||
* (1) moved to a different queue,
|
||||
* (2) reallocated to a different object,
|
||||
* (3) reallocated to a different offset, or
|
||||
* (4) cleaned.
|
||||
*/
|
||||
if (m->queue != PQ_INACTIVE ||
|
||||
m->object != object ||
|
||||
m->pindex != pindex ||
|
||||
m->dirty == 0) {
|
||||
vm_page_unlock(m);
|
||||
if (object->flags & OBJ_MIGHTBEDIRTY)
|
||||
vnodes_skipped++;
|
||||
goto unlock_and_continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* The page may have been busied during the
|
||||
* blocking in vget(). We don't move the
|
||||
* page back onto the end of the queue so that
|
||||
* statistics are more correct if we don't.
|
||||
*/
|
||||
if (vm_page_busied(m)) {
|
||||
vm_page_unlock(m);
|
||||
addl_page_shortage++;
|
||||
goto unlock_and_continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the page has become held it might
|
||||
* be undergoing I/O, so skip it
|
||||
*/
|
||||
if (m->hold_count != 0) {
|
||||
vm_page_unlock(m);
|
||||
addl_page_shortage++;
|
||||
if (object->flags & OBJ_MIGHTBEDIRTY)
|
||||
vnodes_skipped++;
|
||||
goto unlock_and_continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page is dirty, then it is either being washed
|
||||
* (but not yet cleaned) or it is still in the
|
||||
* laundry. If it is still in the laundry, then we
|
||||
* start the cleaning operation.
|
||||
*
|
||||
* decrement page_shortage on success to account for
|
||||
* Decrement page_shortage on success to account for
|
||||
* the (future) cleaned page. Otherwise we could wind
|
||||
* up laundering or cleaning too many pages.
|
||||
*/
|
||||
if (vm_pageout_clean(m) != 0) {
|
||||
--page_shortage;
|
||||
--maxlaunder;
|
||||
}
|
||||
unlock_and_continue:
|
||||
vm_page_lock_assert(m, MA_NOTOWNED);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
if (mp != NULL) {
|
||||
if (vp != NULL)
|
||||
vput(vp);
|
||||
vm_object_deallocate(object);
|
||||
vn_finished_write(mp);
|
||||
if (error == 0) {
|
||||
page_shortage--;
|
||||
maxlaunder--;
|
||||
} else if (error == EDEADLK) {
|
||||
pageout_lock_miss++;
|
||||
vnodes_skipped++;
|
||||
} else if (error == EBUSY) {
|
||||
addl_page_shortage++;
|
||||
}
|
||||
vm_page_lock_assert(m, MA_NOTOWNED);
|
||||
goto relock_queues;
|
||||
|
Loading…
Reference in New Issue
Block a user