diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 3650ce15c66e..9b81f90458b2 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -128,6 +128,7 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, static void sendfile_free_page(vm_page_t pg, bool nocache) { + bool freed; vm_page_lock(pg); /* @@ -136,15 +137,15 @@ sendfile_free_page(vm_page_t pg, bool nocache) * responsible for freeing the page. In 'noncache' case try to free * the page, but only if it is cheap to. */ - if (vm_page_unwire(pg, nocache ? PQ_NONE : PQ_INACTIVE)) { + if (vm_page_unwire_noq(pg)) { vm_object_t obj; if ((obj = pg->object) == NULL) vm_page_free(pg); - else if (nocache) { - if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { - bool freed; - + else { + freed = false; + if (nocache && !vm_page_xbusied(pg) && + VM_OBJECT_TRYWLOCK(obj)) { /* Only free unmapped pages. */ if (obj->ref_count == 0 || !pmap_page_is_mapped(pg)) @@ -153,13 +154,24 @@ sendfile_free_page(vm_page_t pg, bool nocache) * locked cannot be relied upon. */ freed = vm_page_try_to_free(pg); - else - freed = false; VM_OBJECT_WUNLOCK(obj); - if (!freed) + } + if (!freed) { + /* + * If we were asked to not cache the page, place + * it near the head of the inactive queue so + * that it is reclaimed sooner. Otherwise, + * maintain LRU. + */ + if (nocache) vm_page_deactivate_noreuse(pg); - } else - vm_page_deactivate_noreuse(pg); + else if (pg->queue == PQ_ACTIVE) + vm_page_reference(pg); + else if (pg->queue != PQ_INACTIVE) + vm_page_deactivate(pg); + else + vm_page_requeue(pg); + } } } vm_page_unlock(pg); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index cc2edc719c14..2d46836d03d1 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -2621,7 +2621,7 @@ vfs_vmio_unwire(struct buf *bp, vm_page_t m) bool freed; vm_page_lock(m); - if (vm_page_unwire(m, PQ_NONE)) { + if (vm_page_unwire_noq(m)) { /* * Determine if the page should be freed before adding * it to the inactive queue. @@ -2637,14 +2637,16 @@ vfs_vmio_unwire(struct buf *bp, vm_page_t m) if (!freed) { /* * If the page is unlikely to be reused, let the - * VM know. Otherwise, maintain LRU page - * ordering and put the page at the tail of the - * inactive queue. + * VM know. Otherwise, maintain LRU. */ if ((bp->b_flags & B_NOREUSE) != 0) vm_page_deactivate_noreuse(m); - else + else if (m->queue == PQ_ACTIVE) + vm_page_reference(m); + else if (m->queue != PQ_INACTIVE) vm_page_deactivate(m); + else + vm_page_requeue(m); } } vm_page_unlock(m); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 758d9f31eda4..66294889ae7b 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1275,7 +1275,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, if (tm->valid != VM_PAGE_BITS_ALL) goto next_pindex; vm_page_lock(tm); - if (tm->hold_count != 0 || tm->wire_count != 0) { + if (vm_page_held(tm)) { vm_page_unlock(tm); goto next_pindex; } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index b66b0f0c45e0..e53688b20f67 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2053,8 +2053,7 @@ vm_page_alloc_check(vm_page_t m) KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->queue == PQ_NONE, ("page %p has unexpected queue %d", m, m->queue)); - KASSERT(m->wire_count == 0, ("page %p is wired", m)); - KASSERT(m->hold_count == 0, ("page %p is held", m)); + KASSERT(!vm_page_held(m), ("page %p is held", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, @@ -2220,7 +2219,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: - if (m->wire_count != 0 || m->hold_count != 0) + if (vm_page_held(m)) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 && @@ -2248,8 +2247,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, */ VM_OBJECT_RUNLOCK(object); goto retry; - } else if (m->wire_count != 0 || - m->hold_count != 0) { + } else if (vm_page_held(m)) { run_ext = 0; goto unlock; } @@ -2391,7 +2389,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, */ vm_page_change_lock(m, &m_mtx); retry: - if (m->wire_count != 0 || m->hold_count != 0) + if (vm_page_held(m)) error = EBUSY; else if ((object = m->object) != NULL) { /* @@ -2408,8 +2406,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, */ VM_OBJECT_WUNLOCK(object); goto retry; - } else if (m->wire_count != 0 || - m->hold_count != 0) { + } else if (vm_page_held(m)) { error = EBUSY; goto unlock; } @@ -3042,9 +3039,7 @@ vm_page_activate(vm_page_t m) if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); - } else - KASSERT(queue == PQ_NONE, - ("vm_page_activate: wired page %p is queued", m)); + } } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; @@ -3241,26 +3236,18 @@ vm_page_free_toq(vm_page_t m) } /* - * vm_page_wire: + * vm_page_wire: * - * Mark this page as wired down by yet - * another map, removing it from paging queues - * as necessary. + * Mark this page as wired down. If the page is fictitious, then + * its wire count must remain one. * - * If the page is fictitious, then its wire count must remain one. - * - * The page must be locked. + * The page must be locked. */ void vm_page_wire(vm_page_t m) { - /* - * Only bump the wire statistics if the page is not already wired, - * and only unqueue the page if it is on some queue (if it is unmanaged - * it is already off the queues). - */ - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_wire: fictitious page %p's wire count isn't one", @@ -3271,7 +3258,6 @@ vm_page_wire(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0 || m->queue == PQ_NONE, ("vm_page_wire: unmanaged page %p is queued", m)); - vm_page_remque(m); atomic_add_int(&vm_cnt.v_wire_count, 1); } m->wire_count++; @@ -3288,38 +3274,69 @@ vm_page_wire(vm_page_t m) * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then * the page is added to the specified paging queue (unless PQ_NONE is - * specified). + * specified, in which case the page is dequeued if it belongs to a paging + * queue). * * If a page is fictitious, then its wire count must always be one. * * A managed page must be locked. */ -boolean_t +bool vm_page_unwire(vm_page_t m, uint8_t queue) { + bool unwired; KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); + + unwired = vm_page_unwire_noq(m); + if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) { + if (m->queue == queue) { + if (queue == PQ_ACTIVE) + vm_page_reference(m); + else if (queue != PQ_NONE) + vm_page_requeue(m); + } else { + vm_page_remque(m); + if (queue != PQ_NONE) { + vm_page_enqueue(queue, m); + if (queue == PQ_ACTIVE) + /* Initialize act_count. */ + vm_page_activate(m); + } + } + } + return (unwired); +} + +/* + * + * vm_page_unwire_noq: + * + * Unwire a page without (re-)inserting it into a page queue. It is up + * to the caller to enqueue, requeue, or free the page as appropriate. + * In most cases, vm_page_unwire() should be used instead. + */ +bool +vm_page_unwire_noq(vm_page_t m) +{ + if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return (FALSE); + return (false); } - if (m->wire_count > 0) { - m->wire_count--; - if (m->wire_count == 0) { - atomic_subtract_int(&vm_cnt.v_wire_count, 1); - if ((m->oflags & VPO_UNMANAGED) == 0 && - m->object != NULL && queue != PQ_NONE) - vm_page_enqueue(queue, m); - return (TRUE); - } else - return (FALSE); - } else + if (m->wire_count == 0) panic("vm_page_unwire: page %p's wire count is zero", m); + m->wire_count--; + if (m->wire_count == 0) { + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + return (true); + } else + return (false); } /* @@ -3448,8 +3465,7 @@ vm_page_try_to_free(vm_page_t m) vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 || - vm_page_busied(m)) + if (m->dirty != 0 || vm_page_held(m) || vm_page_busied(m)) return (false); if (m->object->ref_count != 0) { pmap_remove_all(m); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 831a7c1838fb..6ca808bbfbe4 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -97,7 +97,7 @@ * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write - * access. + * access. An annotation of (C) indicates that the field is immutable. * * In contrast, the synchronization of accesses to the page's * dirty field is machine dependent (M). In the @@ -111,6 +111,38 @@ * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). + * + * The page structure contains two counters which prevent page reuse. + * Both counters are protected by the page lock (P). The hold + * counter counts transient references obtained via a pmap lookup, and + * is also used to prevent page reclamation in situations where it is + * undesirable to block other accesses to the page. The wire counter + * is used to implement mlock(2) and is non-zero for pages containing + * kernel memory. Pages that are wired or held will not be reclaimed + * or laundered by the page daemon, but are treated differently during + * a page queue scan: held pages remain at their position in the queue, + * while wired pages are removed from the queue and must later be + * re-enqueued appropriately by the unwiring thread. It is legal to + * call vm_page_free() on a held page; doing so causes it to be removed + * from its object and page queue, and the page is released to the + * allocator once the last hold reference is dropped. In contrast, + * wired pages may not be freed. + * + * In some pmap implementations, the wire count of a page table page is + * used to track the number of populated entries. + * + * The busy lock is an embedded reader-writer lock which protects the + * page's contents and identity (i.e., its tuple) and + * interlocks with the object lock (O). In particular, a page may be + * busied or unbusied only with the object write lock held. To avoid + * bloating the page structure, the busy lock lacks some of the + * features available to the kernel's general-purpose synchronization + * primitives. As a result, busy lock ordering rules are not verified, + * lock recursion is not detected, and an attempt to xbusy a busy page + * or sbusy an xbusy page results will trigger a panic rather than + * causing the thread to block. vm_page_sleep_if_busy() can be used to + * sleep until the page's busy state changes, after which the caller + * must re-lookup the page and re-evaluate its state. */ #if PAGE_SIZE == 4096 @@ -152,9 +184,9 @@ struct vm_page { uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (P,Q) */ int8_t psind; /* pagesizes[] index (O) */ - int8_t segind; + int8_t segind; /* vm_phys segment index (C) */ uint8_t order; /* index of the buddy queue */ - uint8_t pool; + uint8_t pool; /* vm_phys freepool index (Q) */ u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ @@ -489,7 +521,8 @@ bool vm_page_try_to_free(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); +bool vm_page_unwire(vm_page_t m, uint8_t queue); +bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); @@ -716,5 +749,17 @@ vm_page_in_laundry(vm_page_t m) return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE); } +/* + * vm_page_held: + * + * Return true if a reference prevents the page from being reclaimable. + */ +static inline bool +vm_page_held(vm_page_t m) +{ + + return (m->hold_count > 0 || m->wire_count > 0); +} + #endif /* _KERNEL */ #endif /* !_VM_PAGE_ */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 87163053d052..3993a5641707 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -335,11 +335,8 @@ vm_pageout_cluster(vm_page_t m) VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; - /* - * We can't clean the page if it is busy or held. - */ vm_page_assert_unbusied(m); - KASSERT(m->hold_count == 0, ("page %p is held", m)); + KASSERT(!vm_page_held(m), ("page %p is held", m)); pmap_remove_write(m); vm_page_unlock(m); @@ -378,8 +375,7 @@ vm_pageout_cluster(vm_page_t m) break; } vm_page_lock(p); - if (!vm_page_in_laundry(p) || - p->hold_count != 0) { /* may be undergoing I/O */ + if (!vm_page_in_laundry(p) || vm_page_held(p)) { vm_page_unlock(p); ib = 0; break; @@ -405,8 +401,7 @@ vm_pageout_cluster(vm_page_t m) if (p->dirty == 0) break; vm_page_lock(p); - if (!vm_page_in_laundry(p) || - p->hold_count != 0) { /* may be undergoing I/O */ + if (!vm_page_in_laundry(p) || vm_page_held(p)) { vm_page_unlock(p); break; } @@ -655,10 +650,10 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) } /* - * The page may have been busied or held while the object + * The page may have been busied or referenced while the object * and page locks were released. */ - if (vm_page_busied(m) || m->hold_count != 0) { + if (vm_page_busied(m) || vm_page_held(m)) { vm_page_unlock(m); error = EBUSY; goto unlock_all; @@ -747,11 +742,18 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) vm_page_unlock(m); continue; } + if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + vm_page_unlock(m); + continue; + } object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) || vm_page_busied(m)) { + vm_page_held(m))) || vm_page_busied(m)) { VM_OBJECT_WUNLOCK(object); + if (m->wire_count != 0 && vm_page_pagequeue(m) == pq) + vm_page_dequeue_locked(m); vm_page_unlock(m); continue; } @@ -1190,7 +1192,16 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) */ if (!vm_pageout_page_lock(m, &next)) goto unlock_page; - else if (m->hold_count != 0) { + else if (m->wire_count != 0) { + /* + * Wired pages may not be freed, and unwiring a queued + * page will cause it to be requeued. Thus, remove them + * from the queue now to avoid unnecessary revisits. + */ + vm_page_dequeue_locked(m); + addl_page_shortage++; + goto unlock_page; + } else if (m->hold_count != 0) { /* * Held pages are essentially stuck in the * queue. So, they ought to be discounted @@ -1205,7 +1216,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) if (!VM_OBJECT_TRYWLOCK(object)) { if (!vm_pageout_fallback_object_lock(m, &next)) goto unlock_object; - else if (m->hold_count != 0) { + else if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + addl_page_shortage++; + goto unlock_object; + } else if (m->hold_count != 0) { addl_page_shortage++; goto unlock_object; } @@ -1226,7 +1241,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) vm_page_unlock(m); continue; } - KASSERT(m->hold_count == 0, ("Held page %p", m)); + KASSERT(!vm_page_held(m), ("Held page %p", m)); /* * Dequeue the inactive page and unlock the inactive page @@ -1432,6 +1447,15 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) */ VM_CNT_INC(v_pdpages); + /* + * Wired pages are dequeued lazily. + */ + if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + vm_page_unlock(m); + continue; + } + /* * Check to see "how much" the page has been used. */ diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index 97d9eb474e73..cddc3a5a4dc5 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -209,7 +209,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, continue; VM_CNT_INC(v_pdpages); vm_page_lock(p); - if (p->wire_count != 0 || p->hold_count != 0 || + if (vm_page_held(p) || !pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue;