diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 9bafadeeeb7d..50962e5d0385 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -101,12 +101,13 @@ #define VM_PHYSSEG_MAX 31 /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for page tables and small UMA * objects are allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index 5185a485a181..35b8d41954c5 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -59,12 +59,13 @@ #define VM_PHYSSEG_DENSE /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 8f97e1b53796..2aa2848a1100 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -93,12 +93,13 @@ #define VM_PHYSSEG_MAX 17 /* - * Create one free page pool. Since the i386 kernel virtual address + * Create two free page pools. Since the i386 kernel virtual address * space does not include a mapping onto the machine's entire physical * memory, VM_FREEPOOL_DIRECT is defined as an alias for the default * pool, VM_FREEPOOL_DEFAULT. */ -#define VM_NFREEPOOL 1 +#define VM_NFREEPOOL 2 +#define VM_FREEPOOL_CACHE 1 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 0 diff --git a/sys/ia64/include/vmparam.h b/sys/ia64/include/vmparam.h index de047bfed45f..c7dac2c8e3d3 100644 --- a/sys/ia64/include/vmparam.h +++ b/sys/ia64/include/vmparam.h @@ -122,12 +122,13 @@ #define VM_PHYSSEG_MAX 49 /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index b0c107c00616..b3884d063e3d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -832,7 +832,7 @@ exec_map_first_page(imgp) vm_page_busy(ma[i]); } else { ma[i] = vm_page_alloc(object, i, - VM_ALLOC_NORMAL); + VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); if (ma[i] == NULL) break; } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d66b45dce50a..bb457db5f82b 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -2898,7 +2898,8 @@ allocbuf(struct buf *bp, int size) VM_WAIT; VM_OBJECT_LOCK(obj); } else { - bp->b_flags &= ~B_CACHE; + if (m->valid == 0) + bp->b_flags &= ~B_CACHE; bp->b_pages[bp->b_npages] = m; ++bp->b_npages; } @@ -2916,20 +2917,13 @@ allocbuf(struct buf *bp, int size) * vm_fault->getpages->cluster_read->allocbuf * */ - vm_page_lock_queues(); if (vm_page_sleep_if_busy(m, FALSE, "pgtblk")) continue; /* - * We have a good page. Should we wakeup the - * page daemon? + * We have a good page. */ - if ((curproc != pageproc) && - (VM_PAGE_INQUEUE1(m, PQ_CACHE)) && - ((cnt.v_free_count + cnt.v_cache_count) < - (cnt.v_free_min + cnt.v_cache_min))) { - pagedaemon_wakeup(); - } + vm_page_lock_queues(); vm_page_wire(m); vm_page_unlock_queues(); bp->b_pages[bp->b_npages] = m; diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index 60e240cb9973..0f8298e05c55 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -110,12 +110,13 @@ struct pmap_physseg { #define VM_PHYSSEG_DENSE /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/sparc64/include/vmparam.h b/sys/sparc64/include/vmparam.h index 5d83f60b1ac6..5609e8e9ec11 100644 --- a/sys/sparc64/include/vmparam.h +++ b/sys/sparc64/include/vmparam.h @@ -91,12 +91,13 @@ #define VM_PHYSSEG_MAX 64 /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/sun4v/include/vmparam.h b/sys/sun4v/include/vmparam.h index 42af1dfea9df..a994401eb33a 100644 --- a/sys/sun4v/include/vmparam.h +++ b/sys/sun4v/include/vmparam.h @@ -91,12 +91,13 @@ #define VM_PHYSSEG_MAX 64 /* - * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ -#define VM_NFREEPOOL 2 +#define VM_NFREEPOOL 3 +#define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index bdd196464357..2b4794a44a8e 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -68,7 +68,7 @@ struct vmmeter { u_int v_vnodepgsin; /* (p) vnode_pager pages paged in */ u_int v_vnodepgsout; /* (p) vnode pager pages paged out */ u_int v_intrans; /* (p) intransit blocking page faults */ - u_int v_reactivated; /* (q) pages reactivated from free list */ + u_int v_reactivated; /* (f) pages reactivated from free list */ u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */ u_int v_pdpages; /* (q) pages analyzed by daemon */ @@ -89,7 +89,7 @@ struct vmmeter { u_int v_active_count; /* (q) pages active */ u_int v_inactive_target; /* (c) pages desired inactive */ u_int v_inactive_count; /* (q) pages inactive */ - u_int v_cache_count; /* (q) pages on buffer cache queue */ + u_int v_cache_count; /* (f) pages on buffer cache queue */ u_int v_cache_min; /* (c) min pages desired on cache queue */ u_int v_cache_max; /* (c) max pages in cached obj */ u_int v_pageout_free_min; /* (c) min pages reserved for kernel */ diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 955df304c102..b40a9514f1ba 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -231,8 +231,7 @@ contigmalloc( unsigned long boundary) { void * ret; - vm_object_t object; - vm_page_t m, m_next, pages; + vm_page_t pages; unsigned long npgs; int actl, actmax, inactl, inactmax, tries; @@ -258,14 +257,6 @@ again: actl++; goto again; } - TAILQ_FOREACH_SAFE(m, &vm_page_queues[PQ_CACHE].pl, - pageq, m_next) { - if (m->hold_count == 0 && - VM_OBJECT_TRYLOCK(object = m->object)) { - vm_page_free(m); - VM_OBJECT_UNLOCK(object); - } - } vm_page_unlock_queues(); tries++; goto retry; diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 8b843dc9bddb..b26972e96f81 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -328,8 +328,6 @@ RetryFault:; */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { - int queue; - /* * check for page-based copy on write. * We check fs.object == fs.first_object so @@ -398,20 +396,7 @@ RetryFault:; vm_object_deallocate(fs.first_object); goto RetryFault; } - queue = fs.m->queue; - - vm_pageq_remove_nowakeup(fs.m); - - if (VM_PAGE_RESOLVEQUEUE(fs.m, queue) == PQ_CACHE) { - cnt.v_reactivated++; - if (vm_page_count_severe()) { - vm_page_activate(fs.m); - vm_page_unlock_queues(); - unlock_and_deallocate(&fs); - VM_WAITPFAULT; - goto RetryFault; - } - } + vm_pageq_remove(fs.m); vm_page_unlock_queues(); /* @@ -446,6 +431,8 @@ RetryFault:; if (!vm_page_count_severe()) { fs.m = vm_page_alloc(fs.object, fs.pindex, (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO); + if ((fs.m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) + break; } if (fs.m == NULL) { unlock_and_deallocate(&fs); @@ -993,9 +980,7 @@ vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) (m->flags & PG_FICTITIOUS) == 0) { vm_page_lock_queues(); - if (!VM_PAGE_INQUEUE1(m, PQ_CACHE)) - pmap_enter_quick(pmap, addr, m, - entry->protection); + pmap_enter_quick(pmap, addr, m, entry->protection); vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(lobject); @@ -1273,7 +1258,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) for (i = 0, tpindex = pindex - 1; tpindex >= startpindex && tpindex < pindex; i++, tpindex--) { - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); + rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); if (rtm == NULL) { /* * Shift the allocated pages to the @@ -1311,7 +1297,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) for (; tpindex < endpindex; i++, tpindex++) { - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); + rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); if (rtm == NULL) { break; } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 905201ee46bb..cc6628b75451 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1518,28 +1518,24 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, start = addr + ptoa(tmpidx); p_start = p; } + } else if (p_start != NULL) { if (!are_queues_locked) { are_queues_locked = TRUE; vm_page_lock_queues(); } - if (VM_PAGE_INQUEUE1(p, PQ_CACHE)) { - if ((flags & MAP_PREFAULT_MADVISE) != 0) - vm_page_deactivate(p); - else if (p_start != NULL) { - pmap_enter_object(map->pmap, start, addr + - ptoa(tmpidx), p_start, prot); - p_start = NULL; - } - } - } else if (p_start != NULL) { pmap_enter_object(map->pmap, start, addr + ptoa(tmpidx), p_start, prot); p_start = NULL; } } - if (p_start != NULL) + if (p_start != NULL) { + if (!are_queues_locked) { + are_queues_locked = TRUE; + vm_page_lock_queues(); + } pmap_enter_object(map->pmap, start, addr + ptoa(psize), p_start, prot); + } if (are_queues_locked) vm_page_unlock_queues(); unlock_return: diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 47416389f902..0d2d61c49e92 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -170,6 +170,9 @@ vm_object_zdtor(void *mem, int size, void *arg) KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); + KASSERT(object->cache == NULL, + ("object %p has cached pages", + object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); @@ -217,6 +220,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) object->handle = NULL; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; + object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -648,6 +652,9 @@ vm_object_terminate(vm_object_t object) } vm_page_unlock_queues(); + if (__predict_false(object->cache != NULL)) + vm_page_cache_free(object); + /* * Let the pager know object is dead. */ @@ -732,8 +739,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int while (tscan < tend) { curgeneration = object->generation; p = vm_page_lookup(object, tscan); - if (p == NULL || p->valid == 0 || - VM_PAGE_INQUEUE1(p, PQ_CACHE)) { + if (p == NULL || p->valid == 0) { if (--scanlimit == 0) break; ++tscan; @@ -821,8 +827,7 @@ again: pi = p->pindex; if ((p->oflags & VPO_CLEANCHK) == 0 || (pi < tstart) || (pi >= tend) || - (p->valid == 0) || - VM_PAGE_INQUEUE1(p, PQ_CACHE)) { + p->valid == 0) { p->oflags &= ~VPO_CLEANCHK; continue; } @@ -900,10 +905,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; - if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) { - tp->oflags &= ~VPO_CLEANCHK; - break; - } vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { tp->oflags &= ~VPO_CLEANCHK; @@ -928,10 +929,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; - if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) { - tp->oflags &= ~VPO_CLEANCHK; - break; - } vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { tp->oflags &= ~VPO_CLEANCHK; @@ -1104,6 +1101,12 @@ shadowlookup: } } m = vm_page_lookup(tobject, tpindex); + if (m == NULL && advise == MADV_WILLNEED) { + /* + * If the page is cached, reactivate it. + */ + m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED); + } if (m == NULL) { /* * There may be swap even if there is no backing page @@ -1356,6 +1359,13 @@ retry: * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); + + /* + * Transfer any cached pages from orig_object to new_object. + */ + if (__predict_false(orig_object->cache != NULL)) + vm_page_cache_transfer(orig_object, offidxstart, + new_object); } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) @@ -1390,8 +1400,8 @@ vm_object_backing_scan(vm_object_t object, int op) */ if (op & OBSC_TEST_ALL_SHADOWED) { /* - * We do not want to have to test for the existence of - * swap pages in the backing object. XXX but with the + * We do not want to have to test for the existence of cache + * or swap pages in the backing object. XXX but with the * new swapper this would be pretty easy to do. * * XXX what about anonymous MAP_SHARED memory that hasn't @@ -1664,6 +1674,12 @@ vm_object_collapse(vm_object_t object) backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); + + /* + * Free any cached pages from backing_object. + */ + if (__predict_false(backing_object->cache != NULL)) + vm_page_cache_free(backing_object); } /* * Object now shadows whatever backing_object did. diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 5a3f3c764097..b0da10b01284 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -100,6 +100,7 @@ struct vm_object { struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ + vm_page_t cache; /* root of the cache page splay tree */ void *handle; union { /* diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6504f604c1c7..36fee281a157 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -547,7 +547,7 @@ vm_page_sleep(vm_page_t m, const char *msg) void vm_page_dirty(vm_page_t m) { - KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE, + KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_dirty: page in cache!")); KASSERT(!VM_PAGE_IS_FREE(m), ("vm_page_dirty: page is free!")); @@ -790,50 +790,165 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) vm_page_remove(m); vm_page_insert(m, new_object, new_pindex); - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - vm_page_deactivate(m); vm_page_dirty(m); } /* - * vm_page_select_cache: - * - * Move a page of the given color from the cache queue to the free - * queue. As pages might be found, but are not applicable, they are - * deactivated. - * - * This routine may not block. + * Convert all of the cached pages belonging to the given object + * into free pages. If the given object has cached pages and is + * backed by a vnode, reduce the vnode's hold count. */ -vm_page_t -vm_page_select_cache(void) +void +vm_page_cache_free(vm_object_t object) { - vm_object_t object; - vm_page_t m; - boolean_t was_trylocked; + vm_page_t m, root; + boolean_t empty; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) { - KASSERT(m->dirty == 0, ("Found dirty cache page %p", m)); - KASSERT(!pmap_page_is_mapped(m), - ("Found mapped cache page %p", m)); - KASSERT((m->flags & PG_UNMANAGED) == 0, - ("Found unmanaged cache page %p", m)); - KASSERT(m->wire_count == 0, ("Found wired cache page %p", m)); - if (m->hold_count == 0 && (object = m->object, - (was_trylocked = VM_OBJECT_TRYLOCK(object)) || - VM_OBJECT_LOCKED(object))) { - KASSERT((m->oflags & VPO_BUSY) == 0 && m->busy == 0, - ("Found busy cache page %p", m)); - vm_page_free(m); - if (was_trylocked) - VM_OBJECT_UNLOCK(object); - break; + mtx_lock(&vm_page_queue_free_mtx); + empty = object->cache == NULL; + while ((m = object->cache) != NULL) { + if (m->left == NULL) + root = m->right; + else if (m->right == NULL) + root = m->left; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; } - vm_page_deactivate(m); + m->object->cache = root; + m->object = NULL; + m->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + m->flags ^= PG_CACHED | PG_FREE; + KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, + ("vm_page_cache_free: page %p has inconsistent flags", m)); + cnt.v_cache_count--; + cnt.v_free_count++; + } + mtx_unlock(&vm_page_queue_free_mtx); + if (object->type == OBJT_VNODE && !empty) + vdrop(object->handle); +} + +/* + * Returns the cached page that is associated with the given + * object and offset. If, however, none exists, returns NULL. + * + * The free page queue must be locked. + */ +static inline vm_page_t +vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) +{ + vm_page_t m; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + if ((m = object->cache) != NULL && m->pindex != pindex) { + m = vm_page_splay(pindex, m); + if ((object->cache = m)->pindex != pindex) + m = NULL; } return (m); } +/* + * Remove the given cached page from its containing object's + * collection of cached pages. + * + * The free page queue must be locked. + */ +void +vm_page_cache_remove(vm_page_t m) +{ + vm_object_t object; + vm_page_t root; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + KASSERT((m->flags & PG_CACHED) != 0, + ("vm_page_cache_remove: page %p is not cached", m)); + object = m->object; + if (m != object->cache) { + root = vm_page_splay(m->pindex, object->cache); + KASSERT(root == m, + ("vm_page_cache_remove: page %p is not cached in object %p", + m, object)); + } + if (m->left == NULL) + root = m->right; + else if (m->right == NULL) + root = m->left; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; + } + object->cache = root; + m->object = NULL; + cnt.v_cache_count--; +} + +/* + * Transfer all of the cached pages with offset greater than or + * equal to 'offidxstart' from the original object's cache to the + * new object's cache. Initially, the new object's cache must be + * empty. Offset 'offidxstart' in the original object must + * correspond to offset zero in the new object. + * + * The new object must be locked. + */ +void +vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, + vm_object_t new_object) +{ + vm_page_t m, m_next; + + /* + * Insertion into an object's collection of cached pages + * requires the object to be locked. In contrast, removal does + * not. + */ + VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); + KASSERT(new_object->cache == NULL, + ("vm_page_cache_transfer: object %p has cached pages", + new_object)); + mtx_lock(&vm_page_queue_free_mtx); + if ((m = orig_object->cache) != NULL) { + /* + * Transfer all of the pages with offset greater than or + * equal to 'offidxstart' from the original object's + * cache to the new object's cache. + */ + m = vm_page_splay(offidxstart, m); + if (m->pindex < offidxstart) { + orig_object->cache = m; + new_object->cache = m->right; + m->right = NULL; + } else { + orig_object->cache = m->left; + new_object->cache = m; + m->left = NULL; + } + KASSERT(new_object->cache == NULL || + new_object->type == OBJT_SWAP, + ("vm_page_cache_transfer: object %p's type is incompatible" + " with cached pages", new_object)); + + /* + * Update the object and offset of each page that was + * transferred to the new object's cache. + */ + while ((m = new_object->cache) != NULL) { + m_next = vm_page_splay(m->pindex, m->right); + m->object = new_object; + m->pindex -= offidxstart; + if (m_next == NULL) + break; + m->right = NULL; + m_next->left = m; + new_object->cache = m_next; + } + } + mtx_unlock(&vm_page_queue_free_mtx); +} + /* * vm_page_alloc: * @@ -847,15 +962,13 @@ vm_page_select_cache(void) * VM_ALLOC_ZERO zero page * * This routine may not block. - * - * Additional special handling is required when called from an - * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with - * the page cache in this case. */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { - vm_page_t m = NULL; + struct vnode *vp = NULL; + vm_object_t m_object; + vm_page_t m; int flags, page_req; page_req = req & VM_ALLOC_CLASS_MASK; @@ -876,52 +989,32 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) page_req = VM_ALLOC_SYSTEM; }; -loop: mtx_lock(&vm_page_queue_free_mtx); - if (cnt.v_free_count > cnt.v_free_reserved || + if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || (page_req == VM_ALLOC_SYSTEM && - cnt.v_cache_count == 0 && - cnt.v_free_count > cnt.v_interrupt_free_min) || - (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0)) { + cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || + (page_req == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count > 0)) { /* * Allocate from the free queue if the number of free pages * exceeds the minimum for the request class. */ - m = vm_phys_alloc_pages(object != NULL ? - VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); - } else if (page_req != VM_ALLOC_INTERRUPT) { - mtx_unlock(&vm_page_queue_free_mtx); - /* - * Allocatable from cache (non-interrupt only). On success, - * we must free the page and try again, thus ensuring that - * cnt.v_*_free_min counters are replenished. - */ - vm_page_lock_queues(); - if ((m = vm_page_select_cache()) == NULL) { - KASSERT(cnt.v_cache_count == 0, - ("vm_page_alloc: cache queue is missing %d pages", - cnt.v_cache_count)); - vm_page_unlock_queues(); - atomic_add_int(&vm_pageout_deficit, 1); - pagedaemon_wakeup(); - - if (page_req != VM_ALLOC_SYSTEM) - return (NULL); - - mtx_lock(&vm_page_queue_free_mtx); - if (cnt.v_free_count <= cnt.v_interrupt_free_min) { + if (object != NULL && + (m = vm_page_cache_lookup(object, pindex)) != NULL) { + if ((req & VM_ALLOC_IFNOTCACHED) != 0) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } + vm_phys_unfree_page(m); + } else if ((req & VM_ALLOC_IFCACHED) != 0) { + mtx_unlock(&vm_page_queue_free_mtx); + return (NULL); + } else m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); - } else { - vm_page_unlock_queues(); - goto loop; - } } else { /* - * Not allocatable from cache from interrupt, give up. + * Not allocatable, give up. */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, 1); @@ -937,8 +1030,24 @@ loop: m != NULL, ("vm_page_alloc(): missing page on free queue") ); - KASSERT(VM_PAGE_IS_FREE(m), - ("vm_page_alloc: page %p is not free", m)); + if ((m->flags & PG_CACHED) != 0) { + KASSERT(m->valid != 0, + ("vm_page_alloc: cached page %p is invalid", m)); + if (m->object == object && m->pindex == pindex) + cnt.v_reactivated++; + else + m->valid = 0; + m_object = m->object; + vm_page_cache_remove(m); + if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + vp = m_object->handle; + } else { + KASSERT(VM_PAGE_IS_FREE(m), + ("vm_page_alloc: page %p is not free", m)); + KASSERT(m->valid == 0, + ("vm_page_alloc: free page %p is valid", m)); + cnt.v_free_count--; + } /* * Initialize structure. Only the PG_ZERO flag is inherited. @@ -964,7 +1073,6 @@ loop: m->hold_count = 0; m->act_count = 0; m->busy = 0; - m->valid = 0; KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m)); mtx_unlock(&vm_page_queue_free_mtx); @@ -973,6 +1081,15 @@ loop: else m->pindex = pindex; + /* + * The following call to vdrop() must come after the above call + * to vm_page_insert() in case both affect the same object and + * vnode. Otherwise, the affected vnode's hold count could + * temporarily become zero. + */ + if (vp != NULL) + vdrop(vp); + /* * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. @@ -1047,8 +1164,6 @@ vm_page_activate(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) { - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - cnt.v_reactivated++; vm_pageq_remove(m); if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) @@ -1133,7 +1248,7 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - vm_pageq_remove_nowakeup(m); + vm_pageq_remove(m); vm_page_remove(m); /* @@ -1160,6 +1275,7 @@ vm_page_free_toq(vm_page_t m) } else { m->flags |= PG_FREE; mtx_lock(&vm_page_queue_free_mtx); + cnt.v_free_count++; if ((m->flags & PG_ZERO) != 0) { vm_phys_free_pages(m, 0); ++vm_page_zero_count; @@ -1279,8 +1395,6 @@ _vm_page_deactivate(vm_page_t m, int athead) if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) return; if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - cnt.v_reactivated++; vm_page_flag_clear(m, PG_WINATCFLS); vm_pageq_remove(m); if (athead) @@ -1354,15 +1468,26 @@ vm_page_try_to_free(vm_page_t m) void vm_page_cache(vm_page_t m) { + vm_object_t object; + vm_page_t root; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + object = m->object; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy || m->hold_count || m->wire_count) { panic("vm_page_cache: attempting to cache busy page"); } - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) + if (m->valid == 0 || object->type == OBJT_DEFAULT) { + /* + * Hypothesis: A cache-elgible page belonging to a + * default object must be zero filled. + */ + vm_page_free(m); return; + } + KASSERT((m->flags & PG_CACHED) == 0, + ("vm_page_cache: page %p is already cached", m)); cnt.v_tcached++; /* @@ -1374,11 +1499,72 @@ vm_page_cache(vm_page_t m) panic("vm_page_cache: caching a dirty page, pindex: %ld", (long)m->pindex); } - vm_pageq_remove_nowakeup(m); - vm_pageq_enqueue(PQ_CACHE, m); + + /* + * Remove the page from the paging queues. + */ + vm_pageq_remove(m); + + /* + * Remove the page from the object's collection of resident + * pages. + */ + if (m != object->root) + vm_page_splay(m->pindex, object->root); + if (m->left == NULL) + root = m->right; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; + } + object->root = root; + TAILQ_REMOVE(&object->memq, m, listq); + object->resident_page_count--; + object->generation++; + + /* + * Insert the page into the object's collection of cached pages + * and the physical memory allocator's cache/free page queues. + */ + vm_page_flag_set(m, PG_CACHED); + vm_page_flag_clear(m, PG_ZERO); mtx_lock(&vm_page_queue_free_mtx); + vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0); + cnt.v_cache_count++; + root = object->cache; + if (root == NULL) { + m->left = NULL; + m->right = NULL; + } else { + root = vm_page_splay(m->pindex, root); + if (m->pindex < root->pindex) { + m->left = root->left; + m->right = root; + root->left = NULL; + } else if (__predict_false(m->pindex == root->pindex)) + panic("vm_page_cache: offset already cached"); + else { + m->right = root->right; + m->left = root; + root->right = NULL; + } + } + object->cache = m; + vm_phys_free_pages(m, 0); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); + + /* + * Increment the vnode's hold count if this is the object's only + * cached page. Decrement the vnode's hold count if this was + * the object's only resident page. + */ + if (object->type == OBJT_VNODE) { + if (root == NULL && object->resident_page_count != 0) + vhold(object->handle); + else if (root != NULL && object->resident_page_count == 0) + vdrop(object->handle); + } } /* @@ -1416,9 +1602,7 @@ vm_page_dontneed(vm_page_t m) * occassionally leave the page alone */ if ((dnw & 0x01F0) == 0 || - VM_PAGE_INQUEUE2(m, PQ_INACTIVE) || - VM_PAGE_INQUEUE1(m, PQ_CACHE) - ) { + VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) { if (m->act_count >= ACT_INIT) --m->act_count; return; @@ -1482,7 +1666,8 @@ retrylookup: if ((allocflags & VM_ALLOC_RETRY) == 0) return (NULL); goto retrylookup; - } + } else if (m->valid != 0) + return (m); if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); return (m); @@ -1813,7 +1998,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) db_printf("\n"); db_printf("PQ_CACHE:"); - db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt); + db_printf(" %d", cnt.v_cache_count); db_printf("\n"); db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index b06a19e1d843..3ed2f75c7166 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -160,27 +160,20 @@ CTASSERT(sizeof(u_long) >= 8); #define PQ_NONE 0 #define PQ_INACTIVE 1 #define PQ_ACTIVE 2 -#define PQ_CACHE 3 -#define PQ_HOLD 4 -#define PQ_COUNT 5 -#define PQ_MAXCOUNT 5 +#define PQ_HOLD 3 +#define PQ_COUNT 4 +#define PQ_MAXCOUNT 4 /* Returns the real queue a page is on. */ #define VM_PAGE_GETQUEUE(m) ((m)->queue) /* Returns the well known queue a page is on. */ -#define VM_PAGE_GETKNOWNQUEUE1(m) VM_PAGE_GETQUEUE(m) #define VM_PAGE_GETKNOWNQUEUE2(m) VM_PAGE_GETQUEUE(m) -/* Given the real queue number and a page color return the well know queue. */ -#define VM_PAGE_RESOLVEQUEUE(m, q) (q) - /* Returns true if the page is in the named well known queue. */ -#define VM_PAGE_INQUEUE1(m, q) (VM_PAGE_GETKNOWNQUEUE1(m) == (q)) #define VM_PAGE_INQUEUE2(m, q) (VM_PAGE_GETKNOWNQUEUE2(m) == (q)) /* Sets the queue a page is on. */ -#define VM_PAGE_SETQUEUE1(m, q) (VM_PAGE_GETQUEUE(m) = (q)) #define VM_PAGE_SETQUEUE2(m, q) (VM_PAGE_GETQUEUE(m) = (q)) struct vpgqueues { @@ -201,6 +194,7 @@ extern struct mtx vm_page_queue_free_mtx; * pte mappings, nor can they be removed from their objects via * the object, and such pages are also not on any PQ queue. */ +#define PG_CACHED 0x0001 /* page is cached */ #define PG_FREE 0x0002 /* page is free */ #define PG_WINATCFLS 0x0004 /* flush dirty page on inactive q */ #define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */ @@ -230,9 +224,8 @@ extern struct mtx vm_page_queue_free_mtx; * Available for allocation now. * * cache - * Almost available for allocation. Still in an - * object, but clean and immediately freeable at - * non-interrupt times. + * Almost available for allocation. Still associated with + * an object, but clean and immediately freeable. * * hold * Will become free after a pending I/O operation @@ -302,6 +295,8 @@ extern struct mtx vm_page_queue_mtx; #define VM_ALLOC_RETRY 0x0080 /* vm_page_grab() only */ #define VM_ALLOC_NOOBJ 0x0100 /* No associated object */ #define VM_ALLOC_NOBUSY 0x0200 /* Do not busy the page */ +#define VM_ALLOC_IFCACHED 0x0400 /* Fail if the page is not cached */ +#define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */ void vm_page_flag_set(vm_page_t m, unsigned short bits); void vm_page_flag_clear(vm_page_t m, unsigned short bits); @@ -318,7 +313,6 @@ void vm_page_wakeup(vm_page_t m); void vm_pageq_init(void); void vm_pageq_enqueue(int queue, vm_page_t m); -void vm_pageq_remove_nowakeup(vm_page_t m); void vm_pageq_remove(vm_page_t m); void vm_pageq_requeue(vm_page_t m); @@ -326,6 +320,9 @@ void vm_page_activate (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache (register vm_page_t); +void vm_page_cache_free(vm_object_t); +void vm_page_cache_remove(vm_page_t); +void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t); int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_dontneed (register vm_page_t); @@ -334,7 +331,6 @@ void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); -vm_page_t vm_page_select_cache(void); void vm_page_sleep(vm_page_t m, const char *msg); vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); vm_offset_t vm_page_startup(vm_offset_t vaddr); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 94788091c8de..2c267271b325 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -342,8 +342,7 @@ more: ib = 0; break; } - if (VM_PAGE_INQUEUE1(p, PQ_CACHE) || - (p->oflags & VPO_BUSY) || p->busy) { + if ((p->oflags & VPO_BUSY) || p->busy) { ib = 0; break; } @@ -372,8 +371,7 @@ more: if ((p = vm_page_lookup(object, pindex + is)) == NULL) break; - if (VM_PAGE_INQUEUE1(p, PQ_CACHE) || - (p->oflags & VPO_BUSY) || p->busy) { + if ((p->oflags & VPO_BUSY) || p->busy) { break; } vm_page_test_dirty(p); @@ -1139,37 +1137,6 @@ unlock_and_continue: VM_OBJECT_UNLOCK(object); m = next; } - - /* - * We try to maintain some *really* free pages, this allows interrupt - * code to be guaranteed space. Since both cache and free queues - * are considered basically 'free', moving pages from cache to free - * does not effect other calculations. - */ - while (cnt.v_free_count < cnt.v_free_reserved) { - TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE].pl, pageq) { - KASSERT(m->dirty == 0, - ("Found dirty cache page %p", m)); - KASSERT(!pmap_page_is_mapped(m), - ("Found mapped cache page %p", m)); - KASSERT((m->flags & PG_UNMANAGED) == 0, - ("Found unmanaged cache page %p", m)); - KASSERT(m->wire_count == 0, - ("Found wired cache page %p", m)); - if (m->hold_count == 0 && VM_OBJECT_TRYLOCK(object = - m->object)) { - KASSERT((m->oflags & VPO_BUSY) == 0 && - m->busy == 0, ("Found busy cache page %p", - m)); - vm_page_free(m); - VM_OBJECT_UNLOCK(object); - cnt.v_dfree++; - break; - } - } - if (m == NULL) - break; - } vm_page_unlock_queues(); #if !defined(NO_SWAPPING) /* diff --git a/sys/vm/vm_pageq.c b/sys/vm/vm_pageq.c index 5c10f62147ef..055bac513b1b 100644 --- a/sys/vm/vm_pageq.c +++ b/sys/vm/vm_pageq.c @@ -56,7 +56,6 @@ vm_pageq_init(void) { int i; - vm_page_queues[PQ_CACHE].cnt = &cnt.v_cache_count; vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; @@ -93,28 +92,6 @@ vm_pageq_enqueue(int queue, vm_page_t m) ++*vpq->cnt; } -/* - * vm_pageq_remove_nowakeup: - * - * vm_page_unqueue() without any wakeup - * - * The queue containing the given page must be locked. - * This routine may not block. - */ -void -vm_pageq_remove_nowakeup(vm_page_t m) -{ - int queue = VM_PAGE_GETQUEUE(m); - struct vpgqueues *pq; - - if (queue != PQ_NONE) { - pq = &vm_page_queues[queue]; - VM_PAGE_SETQUEUE2(m, PQ_NONE); - TAILQ_REMOVE(&pq->pl, m, pageq); - (*pq->cnt)--; - } -} - /* * vm_pageq_remove: * @@ -134,9 +111,5 @@ vm_pageq_remove(vm_page_t m) pq = &vm_page_queues[queue]; TAILQ_REMOVE(&pq->pl, m, pageq); (*pq->cnt)--; - if (VM_PAGE_RESOLVEQUEUE(m, queue) == PQ_CACHE) { - if (vm_paging_needed()) - pagedaemon_wakeup(); - } } } diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 81d597c5cc44..8efdf3df8405 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -89,7 +90,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); static int vm_phys_paddr_to_segind(vm_paddr_t pa); -static void vm_phys_set_pool(int pool, vm_page_t m, int order); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); @@ -286,6 +286,7 @@ vm_phys_add_page(vm_paddr_t pa) m->pool = VM_FREEPOOL_DEFAULT; pmap_page_init(m); mtx_lock(&vm_page_queue_free_mtx); + cnt.v_free_count++; vm_phys_free_pages(m, 0); mtx_unlock(&vm_page_queue_free_mtx); } @@ -318,7 +319,6 @@ vm_phys_alloc_pages(int pool, int order) fl[oind].lcnt--; m->order = VM_NFREEORDER; vm_phys_split_pages(m, oind, fl, order); - cnt.v_free_count -= 1 << order; return (m); } } @@ -339,7 +339,6 @@ vm_phys_alloc_pages(int pool, int order) m->order = VM_NFREEORDER; vm_phys_set_pool(pool, m, oind); vm_phys_split_pages(m, oind, fl, order); - cnt.v_free_count -= 1 << order; return (m); } } @@ -428,7 +427,6 @@ vm_phys_free_pages(vm_page_t m, int order) mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); pa = VM_PAGE_TO_PHYS(m); seg = &vm_phys_segs[m->segind]; - cnt.v_free_count += 1 << order; while (order < VM_NFREEORDER - 1) { pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); if (pa_buddy < seg->start || @@ -456,7 +454,7 @@ vm_phys_free_pages(vm_page_t m, int order) /* * Set the pool for a contiguous, power of two-sized set of physical pages. */ -static void +void vm_phys_set_pool(int pool, vm_page_t m, int order) { vm_page_t m_tmp; @@ -466,44 +464,113 @@ vm_phys_set_pool(int pool, vm_page_t m, int order) } /* - * Try to zero one or more physical pages. Used by an idle priority thread. + * Remove the given physical page "m" from the free lists. + * + * The free page queues must be locked. + */ +void +vm_phys_unfree_page(vm_page_t m) +{ + struct vm_freelist *fl; + struct vm_phys_seg *seg; + vm_paddr_t pa, pa_half; + vm_page_t m_set, m_tmp; + int order; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + + /* + * First, find the contiguous, power of two-sized set of free + * physical pages containing the given physical page "m" and + * assign it to "m_set". + */ + seg = &vm_phys_segs[m->segind]; + for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && + order < VM_NFREEORDER; ) { + order++; + pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); + KASSERT(pa >= seg->start && pa < seg->end, + ("vm_phys_unfree_page: paddr %#jx is not within segment %p", + (uintmax_t)pa, seg)); + m_set = &seg->first_page[atop(pa - seg->start)]; + } + KASSERT(m_set->order >= order, ("vm_phys_unfree_page: page %p's order" + " (%d) is less than expected (%d)", m_set, m_set->order, order)); + KASSERT(m_set->order < VM_NFREEORDER, + ("vm_phys_unfree_page: page %p has unexpected order %d", + m_set, m_set->order)); + KASSERT(order < VM_NFREEORDER, + ("vm_phys_unfree_page: order %d is out of range", order)); + + /* + * Next, remove "m_set" from the free lists. Finally, extract + * "m" from "m_set" using an iterative algorithm: While "m_set" + * is larger than a page, shrink "m_set" by returning the half + * of "m_set" that does not contain "m" to the free lists. + */ + fl = (*seg->free_queues)[m_set->pool]; + order = m_set->order; + TAILQ_REMOVE(&fl[order].pl, m_set, pageq); + fl[order].lcnt--; + m_set->order = VM_NFREEORDER; + while (order > 0) { + order--; + pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); + if (m->phys_addr < pa_half) + m_tmp = &seg->first_page[atop(pa_half - seg->start)]; + else { + m_tmp = m_set; + m_set = &seg->first_page[atop(pa_half - seg->start)]; + } + m_tmp->order = order; + TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); + fl[order].lcnt++; + } + KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); +} + +/* + * Try to zero one physical page. Used by an idle priority thread. */ boolean_t vm_phys_zero_pages_idle(void) { - struct vm_freelist *fl; + static struct vm_freelist *fl = vm_phys_free_queues[0][0]; + static int flind, oind, pind; vm_page_t m, m_tmp; - int flind, pind, q, zeroed; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - for (flind = 0; flind < vm_nfreelists; flind++) { - pind = VM_FREEPOOL_DEFAULT; - fl = vm_phys_free_queues[flind][pind]; - for (q = 0; q < VM_NFREEORDER; q++) { - m = TAILQ_FIRST(&fl[q].pl); - if (m != NULL && (m->flags & PG_ZERO) == 0) { - TAILQ_REMOVE(&fl[q].pl, m, pageq); - fl[q].lcnt--; - m->order = VM_NFREEORDER; - cnt.v_free_count -= 1 << q; - mtx_unlock(&vm_page_queue_free_mtx); - zeroed = 0; - for (m_tmp = m; m_tmp < &m[1 << q]; m_tmp++) { - if ((m_tmp->flags & PG_ZERO) == 0) { - pmap_zero_page_idle(m_tmp); - m_tmp->flags |= PG_ZERO; - zeroed++; - } + for (;;) { + TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { + for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { + if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { + vm_phys_unfree_page(m_tmp); + cnt.v_free_count--; + mtx_unlock(&vm_page_queue_free_mtx); + pmap_zero_page_idle(m_tmp); + m_tmp->flags |= PG_ZERO; + mtx_lock(&vm_page_queue_free_mtx); + cnt.v_free_count++; + vm_phys_free_pages(m_tmp, 0); + vm_page_zero_count++; + cnt_prezero++; + return (TRUE); } - cnt_prezero += zeroed; - mtx_lock(&vm_page_queue_free_mtx); - vm_phys_free_pages(m, q); - vm_page_zero_count += zeroed; - return (TRUE); } } + oind++; + if (oind == VM_NFREEORDER) { + oind = 0; + pind++; + if (pind == VM_NFREEPOOL) { + pind = 0; + flind++; + if (flind == vm_nfreelists) + flind = 0; + } + fl = vm_phys_free_queues[flind][pind]; + } } - return (FALSE); } /* @@ -522,6 +589,7 @@ vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, { struct vm_freelist *fl; struct vm_phys_seg *seg; + vm_object_t m_object; vm_paddr_t pa, pa_last, size; vm_page_t m, m_ret; int flind, i, oind, order, pind; @@ -606,12 +674,19 @@ done: vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); fl = (*seg->free_queues)[m_ret->pool]; vm_phys_split_pages(m_ret, oind, fl, order); - cnt.v_free_count -= roundup2(npages, 1 << imin(oind, order)); for (i = 0; i < npages; i++) { m = &m_ret[i]; KASSERT(m->queue == PQ_NONE, ("vm_phys_alloc_contig: page %p has unexpected queue %d", m, m->queue)); + m_object = m->object; + if ((m->flags & PG_CACHED) != 0) + vm_page_cache_remove(m); + else { + KASSERT(VM_PAGE_IS_FREE(m), + ("vm_phys_alloc_contig: page %p is not free", m)); + cnt.v_free_count--; + } m->valid = VM_PAGE_BITS_ALL; if (m->flags & PG_ZERO) vm_page_zero_count--; @@ -622,6 +697,13 @@ done: ("vm_phys_alloc_contig: page %p was dirty", m)); m->wire_count = 0; m->busy = 0; + if (m_object != NULL && + m_object->type == OBJT_VNODE && + m_object->cache == NULL) { + mtx_unlock(&vm_page_queue_free_mtx); + vdrop(m_object->handle); + mtx_lock(&vm_page_queue_free_mtx); + } } for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { m = &m_ret[i]; diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 0debc0143e79..3e35f9b2cb18 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2002-2006 Rice University + * Copyright (c) 2007 Alan L. Cox * All rights reserved. * * This software was developed for the FreeBSD Project by Alan L. Cox, @@ -45,6 +46,8 @@ vm_page_t vm_phys_alloc_pages(int pool, int order); vm_paddr_t vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); +void vm_phys_set_pool(int pool, vm_page_t m, int order); +void vm_phys_unfree_page(vm_page_t m); boolean_t vm_phys_zero_pages_idle(void); #endif /* !_VM_PHYS_H_ */