Dequeue wired pages lazily.

Previously, wiring a page would cause it to be removed from its page
queue. In the common case, unwiring causes it to be enqueued at the tail
of that page queue. This change modifies vm_page_wire() to not dequeue
the page, thus avoiding the highly contended page queue locks. Instead,
vm_page_unwire() takes care of requeuing the page as a single operation,
and the page daemon dequeues wired pages as they are encountered during
a queue scan to avoid needlessly revisiting them later. For pages in
PQ_ACTIVE we do even better, since a requeue is unnecessary.

The change improves scalability for some common workloads. For instance,
threads wiring pages into the buffer cache no longer need to modify
global page queues, and unwiring is usually done by the bufspace thread,
so concurrency is not as much of an issue. As another example, many
sysctl handlers wire the output buffer to avoid faults on copyout, and
since the buffer is likely to be in PQ_ACTIVE, we now entirely avoid
modifying the page queue in this case.

The change also adds a block comment describing some properties of
struct vm_page's reference counters, and the busy lock.

Reviewed by:	jeff
Discussed with:	alc, kib
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D11943
This commit is contained in:
Mark Johnston 2018-02-07 16:57:10 +00:00
parent 207efdb345
commit 1d3a1bcfac
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=328977
7 changed files with 175 additions and 76 deletions

View File

@ -128,6 +128,7 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
static void
sendfile_free_page(vm_page_t pg, bool nocache)
{
bool freed;
vm_page_lock(pg);
/*
@ -136,15 +137,15 @@ sendfile_free_page(vm_page_t pg, bool nocache)
* responsible for freeing the page. In 'noncache' case try to free
* the page, but only if it is cheap to.
*/
if (vm_page_unwire(pg, nocache ? PQ_NONE : PQ_INACTIVE)) {
if (vm_page_unwire_noq(pg)) {
vm_object_t obj;
if ((obj = pg->object) == NULL)
vm_page_free(pg);
else if (nocache) {
if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) {
bool freed;
else {
freed = false;
if (nocache && !vm_page_xbusied(pg) &&
VM_OBJECT_TRYWLOCK(obj)) {
/* Only free unmapped pages. */
if (obj->ref_count == 0 ||
!pmap_page_is_mapped(pg))
@ -153,13 +154,24 @@ sendfile_free_page(vm_page_t pg, bool nocache)
* locked cannot be relied upon.
*/
freed = vm_page_try_to_free(pg);
else
freed = false;
VM_OBJECT_WUNLOCK(obj);
if (!freed)
}
if (!freed) {
/*
* If we were asked to not cache the page, place
* it near the head of the inactive queue so
* that it is reclaimed sooner. Otherwise,
* maintain LRU.
*/
if (nocache)
vm_page_deactivate_noreuse(pg);
} else
vm_page_deactivate_noreuse(pg);
else if (pg->queue == PQ_ACTIVE)
vm_page_reference(pg);
else if (pg->queue != PQ_INACTIVE)
vm_page_deactivate(pg);
else
vm_page_requeue(pg);
}
}
}
vm_page_unlock(pg);

View File

@ -2621,7 +2621,7 @@ vfs_vmio_unwire(struct buf *bp, vm_page_t m)
bool freed;
vm_page_lock(m);
if (vm_page_unwire(m, PQ_NONE)) {
if (vm_page_unwire_noq(m)) {
/*
* Determine if the page should be freed before adding
* it to the inactive queue.
@ -2637,14 +2637,16 @@ vfs_vmio_unwire(struct buf *bp, vm_page_t m)
if (!freed) {
/*
* If the page is unlikely to be reused, let the
* VM know. Otherwise, maintain LRU page
* ordering and put the page at the tail of the
* inactive queue.
* VM know. Otherwise, maintain LRU.
*/
if ((bp->b_flags & B_NOREUSE) != 0)
vm_page_deactivate_noreuse(m);
else
else if (m->queue == PQ_ACTIVE)
vm_page_reference(m);
else if (m->queue != PQ_INACTIVE)
vm_page_deactivate(m);
else
vm_page_requeue(m);
}
}
vm_page_unlock(m);

View File

@ -1275,7 +1275,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
if (tm->valid != VM_PAGE_BITS_ALL)
goto next_pindex;
vm_page_lock(tm);
if (tm->hold_count != 0 || tm->wire_count != 0) {
if (vm_page_held(tm)) {
vm_page_unlock(tm);
goto next_pindex;
}

View File

@ -2053,8 +2053,7 @@ vm_page_alloc_check(vm_page_t m)
KASSERT(m->object == NULL, ("page %p has object", m));
KASSERT(m->queue == PQ_NONE,
("page %p has unexpected queue %d", m, m->queue));
KASSERT(m->wire_count == 0, ("page %p is wired", m));
KASSERT(m->hold_count == 0, ("page %p is held", m));
KASSERT(!vm_page_held(m), ("page %p is held", m));
KASSERT(!vm_page_busied(m), ("page %p is busy", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
@ -2220,7 +2219,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
vm_page_change_lock(m, &m_mtx);
m_inc = 1;
retry:
if (m->wire_count != 0 || m->hold_count != 0)
if (vm_page_held(m))
run_ext = 0;
#if VM_NRESERVLEVEL > 0
else if ((level = vm_reserv_level(m)) >= 0 &&
@ -2248,8 +2247,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
*/
VM_OBJECT_RUNLOCK(object);
goto retry;
} else if (m->wire_count != 0 ||
m->hold_count != 0) {
} else if (vm_page_held(m)) {
run_ext = 0;
goto unlock;
}
@ -2391,7 +2389,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
*/
vm_page_change_lock(m, &m_mtx);
retry:
if (m->wire_count != 0 || m->hold_count != 0)
if (vm_page_held(m))
error = EBUSY;
else if ((object = m->object) != NULL) {
/*
@ -2408,8 +2406,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
*/
VM_OBJECT_WUNLOCK(object);
goto retry;
} else if (m->wire_count != 0 ||
m->hold_count != 0) {
} else if (vm_page_held(m)) {
error = EBUSY;
goto unlock;
}
@ -3042,9 +3039,7 @@ vm_page_activate(vm_page_t m)
if (queue != PQ_NONE)
vm_page_dequeue(m);
vm_page_enqueue(PQ_ACTIVE, m);
} else
KASSERT(queue == PQ_NONE,
("vm_page_activate: wired page %p is queued", m));
}
} else {
if (m->act_count < ACT_INIT)
m->act_count = ACT_INIT;
@ -3241,26 +3236,18 @@ vm_page_free_toq(vm_page_t m)
}
/*
* vm_page_wire:
* vm_page_wire:
*
* Mark this page as wired down by yet
* another map, removing it from paging queues
* as necessary.
* Mark this page as wired down. If the page is fictitious, then
* its wire count must remain one.
*
* If the page is fictitious, then its wire count must remain one.
*
* The page must be locked.
* The page must be locked.
*/
void
vm_page_wire(vm_page_t m)
{
/*
* Only bump the wire statistics if the page is not already wired,
* and only unqueue the page if it is on some queue (if it is unmanaged
* it is already off the queues).
*/
vm_page_lock_assert(m, MA_OWNED);
vm_page_assert_locked(m);
if ((m->flags & PG_FICTITIOUS) != 0) {
KASSERT(m->wire_count == 1,
("vm_page_wire: fictitious page %p's wire count isn't one",
@ -3271,7 +3258,6 @@ vm_page_wire(vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
m->queue == PQ_NONE,
("vm_page_wire: unmanaged page %p is queued", m));
vm_page_remque(m);
atomic_add_int(&vm_cnt.v_wire_count, 1);
}
m->wire_count++;
@ -3288,38 +3274,69 @@ vm_page_wire(vm_page_t m)
* Only managed pages belonging to an object can be paged out. If the number
* of wirings transitions to zero and the page is eligible for page out, then
* the page is added to the specified paging queue (unless PQ_NONE is
* specified).
* specified, in which case the page is dequeued if it belongs to a paging
* queue).
*
* If a page is fictitious, then its wire count must always be one.
*
* A managed page must be locked.
*/
boolean_t
bool
vm_page_unwire(vm_page_t m, uint8_t queue)
{
bool unwired;
KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
("vm_page_unwire: invalid queue %u request for page %p",
queue, m));
unwired = vm_page_unwire_noq(m);
if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) {
if (m->queue == queue) {
if (queue == PQ_ACTIVE)
vm_page_reference(m);
else if (queue != PQ_NONE)
vm_page_requeue(m);
} else {
vm_page_remque(m);
if (queue != PQ_NONE) {
vm_page_enqueue(queue, m);
if (queue == PQ_ACTIVE)
/* Initialize act_count. */
vm_page_activate(m);
}
}
}
return (unwired);
}
/*
*
* vm_page_unwire_noq:
*
* Unwire a page without (re-)inserting it into a page queue. It is up
* to the caller to enqueue, requeue, or free the page as appropriate.
* In most cases, vm_page_unwire() should be used instead.
*/
bool
vm_page_unwire_noq(vm_page_t m)
{
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_assert_locked(m);
if ((m->flags & PG_FICTITIOUS) != 0) {
KASSERT(m->wire_count == 1,
("vm_page_unwire: fictitious page %p's wire count isn't one", m));
return (FALSE);
return (false);
}
if (m->wire_count > 0) {
m->wire_count--;
if (m->wire_count == 0) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
if ((m->oflags & VPO_UNMANAGED) == 0 &&
m->object != NULL && queue != PQ_NONE)
vm_page_enqueue(queue, m);
return (TRUE);
} else
return (FALSE);
} else
if (m->wire_count == 0)
panic("vm_page_unwire: page %p's wire count is zero", m);
m->wire_count--;
if (m->wire_count == 0) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
return (true);
} else
return (false);
}
/*
@ -3448,8 +3465,7 @@ vm_page_try_to_free(vm_page_t m)
vm_page_assert_locked(m);
VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m));
if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 ||
vm_page_busied(m))
if (m->dirty != 0 || vm_page_held(m) || vm_page_busied(m))
return (false);
if (m->object->ref_count != 0) {
pmap_remove_all(m);

View File

@ -97,7 +97,7 @@
* or the lock for either the free or paging queue (Q). If a field is
* annotated below with two of these locks, then holding either lock is
* sufficient for read access, but both locks are required for write
* access.
* access. An annotation of (C) indicates that the field is immutable.
*
* In contrast, the synchronization of accesses to the page's
* dirty field is machine dependent (M). In the
@ -111,6 +111,38 @@
* contains the dirty field. In the machine-independent layer,
* the implementation of read-modify-write operations on the
* field is encapsulated in vm_page_clear_dirty_mask().
*
* The page structure contains two counters which prevent page reuse.
* Both counters are protected by the page lock (P). The hold
* counter counts transient references obtained via a pmap lookup, and
* is also used to prevent page reclamation in situations where it is
* undesirable to block other accesses to the page. The wire counter
* is used to implement mlock(2) and is non-zero for pages containing
* kernel memory. Pages that are wired or held will not be reclaimed
* or laundered by the page daemon, but are treated differently during
* a page queue scan: held pages remain at their position in the queue,
* while wired pages are removed from the queue and must later be
* re-enqueued appropriately by the unwiring thread. It is legal to
* call vm_page_free() on a held page; doing so causes it to be removed
* from its object and page queue, and the page is released to the
* allocator once the last hold reference is dropped. In contrast,
* wired pages may not be freed.
*
* In some pmap implementations, the wire count of a page table page is
* used to track the number of populated entries.
*
* The busy lock is an embedded reader-writer lock which protects the
* page's contents and identity (i.e., its <object, pindex> tuple) and
* interlocks with the object lock (O). In particular, a page may be
* busied or unbusied only with the object write lock held. To avoid
* bloating the page structure, the busy lock lacks some of the
* features available to the kernel's general-purpose synchronization
* primitives. As a result, busy lock ordering rules are not verified,
* lock recursion is not detected, and an attempt to xbusy a busy page
* or sbusy an xbusy page results will trigger a panic rather than
* causing the thread to block. vm_page_sleep_if_busy() can be used to
* sleep until the page's busy state changes, after which the caller
* must re-lookup the page and re-evaluate its state.
*/
#if PAGE_SIZE == 4096
@ -152,9 +184,9 @@ struct vm_page {
uint8_t oflags; /* page VPO_* flags (O) */
uint8_t queue; /* page queue index (P,Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind;
int8_t segind; /* vm_phys segment index (C) */
uint8_t order; /* index of the buddy queue */
uint8_t pool;
uint8_t pool; /* vm_phys freepool index (Q) */
u_char act_count; /* page usage count (P) */
/* NOTE that these must support one bit per DEV_BSIZE in a page */
/* so, on normal X86 kernels, they must be at least 8 bits wide */
@ -489,7 +521,8 @@ bool vm_page_try_to_free(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);
boolean_t vm_page_unwire(vm_page_t m, uint8_t queue);
bool vm_page_unwire(vm_page_t m, uint8_t queue);
bool vm_page_unwire_noq(vm_page_t m);
void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_wire (vm_page_t);
void vm_page_xunbusy_hard(vm_page_t m);
@ -716,5 +749,17 @@ vm_page_in_laundry(vm_page_t m)
return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE);
}
/*
* vm_page_held:
*
* Return true if a reference prevents the page from being reclaimable.
*/
static inline bool
vm_page_held(vm_page_t m)
{
return (m->hold_count > 0 || m->wire_count > 0);
}
#endif /* _KERNEL */
#endif /* !_VM_PAGE_ */

View File

@ -335,11 +335,8 @@ vm_pageout_cluster(vm_page_t m)
VM_OBJECT_ASSERT_WLOCKED(object);
pindex = m->pindex;
/*
* We can't clean the page if it is busy or held.
*/
vm_page_assert_unbusied(m);
KASSERT(m->hold_count == 0, ("page %p is held", m));
KASSERT(!vm_page_held(m), ("page %p is held", m));
pmap_remove_write(m);
vm_page_unlock(m);
@ -378,8 +375,7 @@ vm_pageout_cluster(vm_page_t m)
break;
}
vm_page_lock(p);
if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
if (!vm_page_in_laundry(p) || vm_page_held(p)) {
vm_page_unlock(p);
ib = 0;
break;
@ -405,8 +401,7 @@ vm_pageout_cluster(vm_page_t m)
if (p->dirty == 0)
break;
vm_page_lock(p);
if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
if (!vm_page_in_laundry(p) || vm_page_held(p)) {
vm_page_unlock(p);
break;
}
@ -655,10 +650,10 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
}
/*
* The page may have been busied or held while the object
* The page may have been busied or referenced while the object
* and page locks were released.
*/
if (vm_page_busied(m) || m->hold_count != 0) {
if (vm_page_busied(m) || vm_page_held(m)) {
vm_page_unlock(m);
error = EBUSY;
goto unlock_all;
@ -747,11 +742,18 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
vm_page_unlock(m);
continue;
}
if (m->wire_count != 0) {
vm_page_dequeue_locked(m);
vm_page_unlock(m);
continue;
}
object = m->object;
if ((!VM_OBJECT_TRYWLOCK(object) &&
(!vm_pageout_fallback_object_lock(m, &next) ||
m->hold_count != 0)) || vm_page_busied(m)) {
vm_page_held(m))) || vm_page_busied(m)) {
VM_OBJECT_WUNLOCK(object);
if (m->wire_count != 0 && vm_page_pagequeue(m) == pq)
vm_page_dequeue_locked(m);
vm_page_unlock(m);
continue;
}
@ -1190,7 +1192,16 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
*/
if (!vm_pageout_page_lock(m, &next))
goto unlock_page;
else if (m->hold_count != 0) {
else if (m->wire_count != 0) {
/*
* Wired pages may not be freed, and unwiring a queued
* page will cause it to be requeued. Thus, remove them
* from the queue now to avoid unnecessary revisits.
*/
vm_page_dequeue_locked(m);
addl_page_shortage++;
goto unlock_page;
} else if (m->hold_count != 0) {
/*
* Held pages are essentially stuck in the
* queue. So, they ought to be discounted
@ -1205,7 +1216,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
if (!VM_OBJECT_TRYWLOCK(object)) {
if (!vm_pageout_fallback_object_lock(m, &next))
goto unlock_object;
else if (m->hold_count != 0) {
else if (m->wire_count != 0) {
vm_page_dequeue_locked(m);
addl_page_shortage++;
goto unlock_object;
} else if (m->hold_count != 0) {
addl_page_shortage++;
goto unlock_object;
}
@ -1226,7 +1241,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
vm_page_unlock(m);
continue;
}
KASSERT(m->hold_count == 0, ("Held page %p", m));
KASSERT(!vm_page_held(m), ("Held page %p", m));
/*
* Dequeue the inactive page and unlock the inactive page
@ -1432,6 +1447,15 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
*/
VM_CNT_INC(v_pdpages);
/*
* Wired pages are dequeued lazily.
*/
if (m->wire_count != 0) {
vm_page_dequeue_locked(m);
vm_page_unlock(m);
continue;
}
/*
* Check to see "how much" the page has been used.
*/

View File

@ -209,7 +209,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
continue;
VM_CNT_INC(v_pdpages);
vm_page_lock(p);
if (p->wire_count != 0 || p->hold_count != 0 ||
if (vm_page_held(p) ||
!pmap_page_exists_quick(pmap, p)) {
vm_page_unlock(p);
continue;