From c869e672086f986a9f3aa1fe93fd2f0eb5c81156 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 19 Dec 2015 18:42:50 +0000 Subject: [PATCH] Introduce a new mechanism for relocating virtual pages to a new physical address and use this mechanism when: 1. kmem_alloc_{attr,contig}() can't find suitable free pages in the physical memory allocator's free page lists. This replaces the long-standing approach of scanning the inactive and inactive queues, converting clean pages into PG_CACHED pages and laundering dirty pages. In contrast, the new mechanism does not use PG_CACHED pages nor does it trigger a large number of I/O operations. 2. on 32-bit MIPS processors, uma_small_alloc() and the pmap can't find free pages in the physical memory allocator's free page lists that are covered by the direct map. Tested by: adrian 3. ttm_bo_global_init() and ttm_vm_page_alloc_dma32() can't find suitable free pages in the physical memory allocator's free page lists. In the coming months, I expect that this new mechanism will be applied in other places. For example, balloon drivers should use relocation to minimize fragmentation of the guest physical address space. Make vm_phys_alloc_contig() a little smarter (and more efficient in some cases). Specifically, use vm_phys_segs[] earlier to avoid scanning free page lists that can't possibly contain suitable pages. Reviewed by: kib, markj Glanced at: jhb Discussed with: jeff Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D4444 --- sys/dev/drm2/ttm/ttm_bo.c | 10 +- sys/dev/drm2/ttm/ttm_page_alloc.c | 10 +- sys/mips/include/pmap.h | 1 - sys/mips/mips/pmap.c | 37 +- sys/mips/mips/uma_machdep.c | 7 +- sys/vm/vm_kern.c | 15 +- sys/vm/vm_page.c | 589 ++++++++++++++++++++++++++++++ sys/vm/vm_page.h | 4 + sys/vm/vm_pageout.c | 166 --------- sys/vm/vm_pageout.h | 1 - sys/vm/vm_phys.c | 221 +++++++---- sys/vm/vm_phys.h | 2 + sys/vm/vm_reserv.c | 46 +++ sys/vm/vm_reserv.h | 3 + 14 files changed, 846 insertions(+), 266 deletions(-) diff --git a/sys/dev/drm2/ttm/ttm_bo.c b/sys/dev/drm2/ttm/ttm_bo.c index 70e17c4f4161..010afe6d8b3b 100644 --- a/sys/dev/drm2/ttm/ttm_bo.c +++ b/sys/dev/drm2/ttm/ttm_bo.c @@ -1488,21 +1488,21 @@ int ttm_bo_global_init(struct drm_global_reference *ref) struct ttm_bo_global_ref *bo_ref = container_of(ref, struct ttm_bo_global_ref, ref); struct ttm_bo_global *glob = ref->object; - int ret; + int req, ret; int tries; sx_init(&glob->device_list_mutex, "ttmdlm"); mtx_init(&glob->lru_lock, "ttmlru", NULL, MTX_DEF); glob->mem_glob = bo_ref->mem_glob; + req = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ; tries = 0; retry: - glob->dummy_read_page = vm_page_alloc_contig(NULL, 0, - VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ, + glob->dummy_read_page = vm_page_alloc_contig(NULL, 0, req, 1, 0, VM_MAX_ADDRESS, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); if (unlikely(glob->dummy_read_page == NULL)) { - if (tries < 1) { - vm_pageout_grow_cache(tries, 0, VM_MAX_ADDRESS); + if (tries < 1 && vm_page_reclaim_contig(req, 1, + 0, VM_MAX_ADDRESS, PAGE_SIZE, 0)) { tries++; goto retry; } diff --git a/sys/dev/drm2/ttm/ttm_page_alloc.c b/sys/dev/drm2/ttm/ttm_page_alloc.c index 75abd10cc872..2a2e9168fef1 100644 --- a/sys/dev/drm2/ttm/ttm_page_alloc.c +++ b/sys/dev/drm2/ttm/ttm_page_alloc.c @@ -166,13 +166,9 @@ ttm_vm_page_alloc_dma32(int req, vm_memattr_t memattr) PAGE_SIZE, 0, memattr); if (p != NULL || tries > 2) return (p); - - /* - * Before growing the cache see if this is just a normal - * memory shortage. - */ - VM_WAIT; - vm_pageout_grow_cache(tries, 0, 0xffffffff); + if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff, + PAGE_SIZE, 0)) + VM_WAIT; } } diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index 8392d8a1ca48..03541199a401 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -178,7 +178,6 @@ void *pmap_kenter_temporary(vm_paddr_t pa, int i); void pmap_kenter_temporary_free(vm_paddr_t pa); void pmap_flush_pvcache(vm_page_t m); int pmap_emulate_modified(pmap_t pmap, vm_offset_t va); -void pmap_grow_direct_page_cache(void); void pmap_page_set_memattr(vm_page_t, vm_memattr_t); #endif /* _KERNEL */ diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 7bf3ee08bc98..0827ad194435 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -166,6 +166,7 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, static vm_page_t pmap_alloc_direct_page(unsigned int index, int req); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static void pmap_grow_direct_page(int req); static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t pde); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); @@ -1040,14 +1041,16 @@ pmap_pinit0(pmap_t pmap) bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } -void -pmap_grow_direct_page_cache() +static void +pmap_grow_direct_page(int req) { #ifdef __mips_n64 VM_WAIT; #else - vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); + if (!vm_page_reclaim_contig(req, 1, 0, MIPS_KSEG0_LARGEST_PHYS, + PAGE_SIZE, 0)) + VM_WAIT; #endif } @@ -1077,13 +1080,15 @@ pmap_pinit(pmap_t pmap) { vm_offset_t ptdva; vm_page_t ptdpg; - int i; + int i, req_class; /* * allocate the page directory page */ - while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) - pmap_grow_direct_page_cache(); + req_class = VM_ALLOC_NORMAL; + while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, req_class)) == + NULL) + pmap_grow_direct_page(req_class); ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; @@ -1107,15 +1112,17 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags) { vm_offset_t pageva; vm_page_t m; + int req_class; /* * Find or fabricate a new pagetable page */ - if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { + req_class = VM_ALLOC_NORMAL; + if ((m = pmap_alloc_direct_page(ptepindex, req_class)) == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - pmap_grow_direct_page_cache(); + pmap_grow_direct_page(req_class); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } @@ -1241,9 +1248,10 @@ pmap_growkernel(vm_offset_t addr) vm_page_t nkpg; pd_entry_t *pde, *pdpe; pt_entry_t *pte; - int i; + int i, req_class; mtx_assert(&kernel_map->system_mtx, MA_OWNED); + req_class = VM_ALLOC_INTERRUPT; addr = roundup2(addr, NBSEG); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; @@ -1252,7 +1260,7 @@ pmap_growkernel(vm_offset_t addr) #ifdef __mips_n64 if (*pdpe == 0) { /* new intermediate page table entry */ - nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); + nkpg = pmap_alloc_direct_page(nkpt, req_class); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); @@ -1272,8 +1280,13 @@ pmap_growkernel(vm_offset_t addr) /* * This index is bogus, but out of the way */ - nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); - if (!nkpg) + nkpg = pmap_alloc_direct_page(nkpt, req_class); +#ifndef __mips_n64 + if (nkpg == NULL && vm_page_reclaim_contig(req_class, 1, + 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0)) + nkpg = pmap_alloc_direct_page(nkpt, req_class); +#endif + if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c index e47c50202704..b4006e1ead12 100644 --- a/sys/mips/mips/uma_machdep.c +++ b/sys/mips/mips/uma_machdep.c @@ -53,11 +53,16 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) for (;;) { m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags); +#ifndef __mips_n64 + if (m == NULL && vm_page_reclaim_contig(pflags, 1, + 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0)) + continue; +#endif if (m == NULL) { if (wait & M_NOWAIT) return (NULL); else - pmap_grow_direct_page_cache(); + VM_WAIT; } else break; } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index a05c26384780..f56dbca2fb66 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -181,7 +181,10 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - vm_pageout_grow_cache(tries, low, high); + if (!vm_page_reclaim_contig(pflags, 1, + low, high, PAGE_SIZE, 0) && + (flags & M_WAITOK) != 0) + VM_WAIT; VM_OBJECT_WLOCK(object); tries++; goto retry; @@ -217,6 +220,7 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_offset_t addr, tmp; vm_ooffset_t offset; vm_page_t end_m, m; + u_long npages; int pflags, tries; size = round_page(size); @@ -224,15 +228,18 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + npages = atop(size); VM_OBJECT_WLOCK(object); tries = 0; retry: m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, - atop(size), low, high, alignment, boundary, memattr); + npages, low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - vm_pageout_grow_cache(tries, low, high); + if (!vm_page_reclaim_contig(pflags, npages, low, high, + alignment, boundary) && (flags & M_WAITOK) != 0) + VM_WAIT; VM_OBJECT_WLOCK(object); tries++; goto retry; @@ -240,7 +247,7 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, vmem_free(vmem, addr, size); return (0); } - end_m = m + atop(size); + end_m = m + npages; tmp = addr; for (; m < end_m; m++) { if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 2e6b56a6d680..d27eb2d4eaf0 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -158,11 +158,14 @@ static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_cache_turn_free(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); +static void vm_page_free_wakeup(void); static void vm_page_init_fakepg(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); +static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, + vm_paddr_t high); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); @@ -2093,6 +2096,592 @@ vm_page_alloc_freelist(int flind, int req) return (m); } +#define VPSC_ANY 0 /* No restrictions. */ +#define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ +#define VPSC_NOSUPER 2 /* Skip superpages. */ + +/* + * vm_page_scan_contig: + * + * Scan vm_page_array[] between the specified entries "m_start" and + * "m_end" for a run of contiguous physical pages that satisfy the + * specified conditions, and return the lowest page in the run. The + * specified "alignment" determines the alignment of the lowest physical + * page in the run. If the specified "boundary" is non-zero, then the + * run of physical pages cannot span a physical address that is a + * multiple of "boundary". + * + * "m_end" is never dereferenced, so it need not point to a vm_page + * structure within vm_page_array[]. + * + * "npages" must be greater than zero. "m_start" and "m_end" must not + * span a hole (or discontiguity) in the physical address space. Both + * "alignment" and "boundary" must be a power of two. + */ +vm_page_t +vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, + u_long alignment, vm_paddr_t boundary, int options) +{ + struct mtx *m_mtx, *new_mtx; + vm_object_t object; + vm_paddr_t pa; + vm_page_t m, m_run; +#if VM_NRESERVLEVEL > 0 + int level; +#endif + int m_inc, order, run_ext, run_len; + + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + m_run = NULL; + run_len = 0; + m_mtx = NULL; + for (m = m_start; m < m_end && run_len < npages; m += m_inc) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, + ("page %p is PG_FICTITIOUS or PG_MARKER", m)); + + /* + * If the current page would be the start of a run, check its + * physical address against the end, alignment, and boundary + * conditions. If it doesn't satisfy these conditions, either + * terminate the scan or advance to the next page that + * satisfies the failed condition. + */ + if (run_len == 0) { + KASSERT(m_run == NULL, ("m_run != NULL")); + if (m + npages > m_end) + break; + pa = VM_PAGE_TO_PHYS(m); + if ((pa & (alignment - 1)) != 0) { + m_inc = atop(roundup2(pa, alignment) - pa); + continue; + } + if (((pa ^ (pa + ptoa(npages) - 1)) & ~(boundary - + 1)) != 0) { + m_inc = atop(roundup2(pa, boundary) - pa); + continue; + } + } else + KASSERT(m_run != NULL, ("m_run == NULL")); + + /* + * Avoid releasing and reacquiring the same page lock. + */ + new_mtx = vm_page_lockptr(m); + if (m_mtx != new_mtx) { + if (m_mtx != NULL) + mtx_unlock(m_mtx); + m_mtx = new_mtx; + mtx_lock(m_mtx); + } + m_inc = 1; +retry: + if (m->wire_count != 0 || m->hold_count != 0) + run_ext = 0; +#if VM_NRESERVLEVEL > 0 + else if ((level = vm_reserv_level(m)) >= 0 && + (options & VPSC_NORESERV) != 0) { + run_ext = 0; + /* Advance to the end of the reservation. */ + pa = VM_PAGE_TO_PHYS(m); + m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - + pa); + } +#endif + else if ((object = m->object) != NULL) { + /* + * The page is considered eligible for relocation if + * and only if it could be laundered or reclaimed by + * the page daemon. + */ + if (!VM_OBJECT_TRYRLOCK(object)) { + mtx_unlock(m_mtx); + VM_OBJECT_RLOCK(object); + mtx_lock(m_mtx); + if (m->object != object) { + /* + * The page may have been freed. + */ + VM_OBJECT_RUNLOCK(object); + goto retry; + } else if (m->wire_count != 0 || + m->hold_count != 0) { + run_ext = 0; + goto unlock; + } + } + KASSERT((m->flags & PG_UNHOLDFREE) == 0, + ("page %p is PG_UNHOLDFREE", m)); + /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */ + if (object->type != OBJT_DEFAULT && + object->type != OBJT_SWAP && + object->type != OBJT_VNODE) + run_ext = 0; + else if ((m->flags & PG_CACHED) != 0 || + m != vm_page_lookup(object, m->pindex)) { + /* + * The page is cached or recently converted + * from cached to free. + */ +#if VM_NRESERVLEVEL > 0 + if (level >= 0) { + /* + * The page is reserved. Extend the + * current run by one page. + */ + run_ext = 1; + } else +#endif + if ((order = m->order) < VM_NFREEORDER) { + /* + * The page is enqueued in the + * physical memory allocator's cache/ + * free page queues. Moreover, it is + * the first page in a power-of-two- + * sized run of contiguous cache/free + * pages. Add these pages to the end + * of the current run, and jump + * ahead. + */ + run_ext = 1 << order; + m_inc = 1 << order; + } else + run_ext = 0; +#if VM_NRESERVLEVEL > 0 + } else if ((options & VPSC_NOSUPER) != 0 && + (level = vm_reserv_level_iffullpop(m)) >= 0) { + run_ext = 0; + /* Advance to the end of the superpage. */ + pa = VM_PAGE_TO_PHYS(m); + m_inc = atop(roundup2(pa + 1, + vm_reserv_size(level)) - pa); +#endif + } else if (object->memattr == VM_MEMATTR_DEFAULT && + m->queue != PQ_NONE && !vm_page_busied(m)) { + /* + * The page is allocated but eligible for + * relocation. Extend the current run by one + * page. + */ + KASSERT(pmap_page_get_memattr(m) == + VM_MEMATTR_DEFAULT, + ("page %p has an unexpected memattr", m)); + KASSERT((m->oflags & (VPO_SWAPINPROG | + VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, + ("page %p has unexpected oflags", m)); + /* Don't care: VPO_NOSYNC. */ + run_ext = 1; + } else + run_ext = 0; +unlock: + VM_OBJECT_RUNLOCK(object); +#if VM_NRESERVLEVEL > 0 + } else if (level >= 0) { + /* + * The page is reserved but not yet allocated. In + * other words, it is still cached or free. Extend + * the current run by one page. + */ + run_ext = 1; +#endif + } else if ((order = m->order) < VM_NFREEORDER) { + /* + * The page is enqueued in the physical memory + * allocator's cache/free page queues. Moreover, it + * is the first page in a power-of-two-sized run of + * contiguous cache/free pages. Add these pages to + * the end of the current run, and jump ahead. + */ + run_ext = 1 << order; + m_inc = 1 << order; + } else { + /* + * Skip the page for one of the following reasons: (1) + * It is enqueued in the physical memory allocator's + * cache/free page queues. However, it is not the + * first page in a run of contiguous cache/free pages. + * (This case rarely occurs because the scan is + * performed in ascending order.) (2) It is not + * reserved, and it is transitioning from free to + * allocated. (Conversely, the transition from + * allocated to free for managed pages is blocked by + * the page lock.) (3) It is allocated but not + * contained by an object and not wired, e.g., + * allocated by Xen's balloon driver. + */ + run_ext = 0; + } + + /* + * Extend or reset the current run of pages. + */ + if (run_ext > 0) { + if (run_len == 0) + m_run = m; + run_len += run_ext; + } else { + if (run_len > 0) { + m_run = NULL; + run_len = 0; + } + } + } + if (m_mtx != NULL) + mtx_unlock(m_mtx); + if (run_len >= npages) + return (m_run); + return (NULL); +} + +/* + * vm_page_reclaim_run: + * + * Try to relocate each of the allocated virtual pages within the + * specified run of physical pages to a new physical address. Free the + * physical pages underlying the relocated virtual pages. A virtual page + * is relocatable if and only if it could be laundered or reclaimed by + * the page daemon. Whenever possible, a virtual page is relocated to a + * physical address above "high". + * + * Returns 0 if every physical page within the run was already free or + * just freed by a successful relocation. Otherwise, returns a non-zero + * value indicating why the last attempt to relocate a virtual page was + * unsuccessful. + * + * "req_class" must be an allocation class. + */ +static int +vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, + vm_paddr_t high) +{ + struct mtx *m_mtx, *new_mtx; + struct spglist free; + vm_object_t object; + vm_paddr_t pa; + vm_page_t m, m_end, m_new; + int error, order, req; + + KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, + ("req_class is not an allocation class")); + SLIST_INIT(&free); + error = 0; + m = m_run; + m_end = m_run + npages; + m_mtx = NULL; + for (; error == 0 && m < m_end; m++) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, + ("page %p is PG_FICTITIOUS or PG_MARKER", m)); + + /* + * Avoid releasing and reacquiring the same page lock. + */ + new_mtx = vm_page_lockptr(m); + if (m_mtx != new_mtx) { + if (m_mtx != NULL) + mtx_unlock(m_mtx); + m_mtx = new_mtx; + mtx_lock(m_mtx); + } +retry: + if (m->wire_count != 0 || m->hold_count != 0) + error = EBUSY; + else if ((object = m->object) != NULL) { + /* + * The page is relocated if and only if it could be + * laundered or reclaimed by the page daemon. + */ + if (!VM_OBJECT_TRYWLOCK(object)) { + mtx_unlock(m_mtx); + VM_OBJECT_WLOCK(object); + mtx_lock(m_mtx); + if (m->object != object) { + /* + * The page may have been freed. + */ + VM_OBJECT_WUNLOCK(object); + goto retry; + } else if (m->wire_count != 0 || + m->hold_count != 0) { + error = EBUSY; + goto unlock; + } + } + KASSERT((m->flags & PG_UNHOLDFREE) == 0, + ("page %p is PG_UNHOLDFREE", m)); + /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */ + if (object->type != OBJT_DEFAULT && + object->type != OBJT_SWAP && + object->type != OBJT_VNODE) + error = EINVAL; + else if ((m->flags & PG_CACHED) != 0 || + m != vm_page_lookup(object, m->pindex)) { + /* + * The page is cached or recently converted + * from cached to free. + */ + VM_OBJECT_WUNLOCK(object); + goto cached; + } else if (object->memattr != VM_MEMATTR_DEFAULT) + error = EINVAL; + else if (m->queue != PQ_NONE && !vm_page_busied(m)) { + KASSERT(pmap_page_get_memattr(m) == + VM_MEMATTR_DEFAULT, + ("page %p has an unexpected memattr", m)); + KASSERT((m->oflags & (VPO_SWAPINPROG | + VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, + ("page %p has unexpected oflags", m)); + /* Don't care: VPO_NOSYNC. */ + if (m->valid != 0) { + /* + * First, try to allocate a new page + * that is above "high". Failing + * that, try to allocate a new page + * that is below "m_run". Allocate + * the new page between the end of + * "m_run" and "high" only as a last + * resort. + */ + req = req_class | VM_ALLOC_NOOBJ; + if ((m->flags & PG_NODUMP) != 0) + req |= VM_ALLOC_NODUMP; + if (trunc_page(high) != + ~(vm_paddr_t)PAGE_MASK) { + m_new = vm_page_alloc_contig( + NULL, 0, req, 1, + round_page(high), + ~(vm_paddr_t)0, + PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + } else + m_new = NULL; + if (m_new == NULL) { + pa = VM_PAGE_TO_PHYS(m_run); + m_new = vm_page_alloc_contig( + NULL, 0, req, 1, + 0, pa - 1, PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + } + if (m_new == NULL) { + pa += ptoa(npages); + m_new = vm_page_alloc_contig( + NULL, 0, req, 1, + pa, high, PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + } + if (m_new == NULL) { + error = ENOMEM; + goto unlock; + } + KASSERT(m_new->wire_count == 0, + ("page %p is wired", m)); + + /* + * Replace "m" with the new page. For + * vm_page_replace(), "m" must be busy + * and dequeued. Finally, change "m" + * as if vm_page_free() was called. + */ + if (object->ref_count != 0) + pmap_remove_all(m); + m_new->aflags = m->aflags; + KASSERT(m_new->oflags == VPO_UNMANAGED, + ("page %p is managed", m)); + m_new->oflags = m->oflags & VPO_NOSYNC; + pmap_copy_page(m, m_new); + m_new->valid = m->valid; + m_new->dirty = m->dirty; + m->flags &= ~PG_ZERO; + vm_page_xbusy(m); + vm_page_remque(m); + vm_page_replace_checked(m_new, object, + m->pindex, m); + m->valid = 0; + vm_page_undirty(m); + + /* + * The new page must be deactivated + * before the object is unlocked. + */ + new_mtx = vm_page_lockptr(m_new); + if (m_mtx != new_mtx) { + mtx_unlock(m_mtx); + m_mtx = new_mtx; + mtx_lock(m_mtx); + } + vm_page_deactivate(m_new); + } else { + m->flags &= ~PG_ZERO; + vm_page_remque(m); + vm_page_remove(m); + KASSERT(m->dirty == 0, + ("page %p is dirty", m)); + } + SLIST_INSERT_HEAD(&free, m, plinks.s.ss); + } else + error = EBUSY; +unlock: + VM_OBJECT_WUNLOCK(object); + } else { +cached: + mtx_lock(&vm_page_queue_free_mtx); + order = m->order; + if (order < VM_NFREEORDER) { + /* + * The page is enqueued in the physical memory + * allocator's cache/free page queues. + * Moreover, it is the first page in a power- + * of-two-sized run of contiguous cache/free + * pages. Jump ahead to the last page within + * that run, and continue from there. + */ + m += (1 << order) - 1; + } +#if VM_NRESERVLEVEL > 0 + else if (vm_reserv_is_page_free(m)) + order = 0; +#endif + mtx_unlock(&vm_page_queue_free_mtx); + if (order == VM_NFREEORDER) + error = EINVAL; + } + } + if (m_mtx != NULL) + mtx_unlock(m_mtx); + if ((m = SLIST_FIRST(&free)) != NULL) { + mtx_lock(&vm_page_queue_free_mtx); + do { + SLIST_REMOVE_HEAD(&free, plinks.s.ss); + vm_phys_freecnt_adj(m, 1); +#if VM_NRESERVLEVEL > 0 + if (!vm_reserv_free_page(m)) +#else + if (true) +#endif + vm_phys_free_pages(m, 0); + } while ((m = SLIST_FIRST(&free)) != NULL); + vm_page_zero_idle_wakeup(); + vm_page_free_wakeup(); + mtx_unlock(&vm_page_queue_free_mtx); + } + return (error); +} + +#define NRUNS 16 + +CTASSERT(powerof2(NRUNS)); + +#define RUN_INDEX(count) ((count) & (NRUNS - 1)) + +#define MIN_RECLAIM 8 + +/* + * vm_page_reclaim_contig: + * + * Reclaim allocated, contiguous physical memory satisfying the specified + * conditions by relocating the virtual pages using that physical memory. + * Returns true if reclamation is successful and false otherwise. Since + * relocation requires the allocation of physical pages, reclamation may + * fail due to a shortage of cache/free pages. When reclamation fails, + * callers are expected to perform VM_WAIT before retrying a failed + * allocation operation, e.g., vm_page_alloc_contig(). + * + * The caller must always specify an allocation class through "req". + * + * allocation classes: + * VM_ALLOC_NORMAL normal process request + * VM_ALLOC_SYSTEM system *really* needs a page + * VM_ALLOC_INTERRUPT interrupt time request + * + * The optional allocation flags are ignored. + * + * "npages" must be greater than zero. Both "alignment" and "boundary" + * must be a power of two. + */ +bool +vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary) +{ + vm_paddr_t curr_low; + vm_page_t m_run, m_runs[NRUNS]; + u_long count, reclaimed; + int error, i, options, req_class; + + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + req_class = req & VM_ALLOC_CLASS_MASK; + + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) + req_class = VM_ALLOC_SYSTEM; + + /* + * Return if the number of cached and free pages cannot satisfy the + * requested allocation. + */ + count = vm_cnt.v_free_count + vm_cnt.v_cache_count; + if (count < npages + vm_cnt.v_free_reserved || (count < npages + + vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || + (count < npages && req_class == VM_ALLOC_INTERRUPT)) + return (false); + + /* + * Scan up to three times, relaxing the restrictions ("options") on + * the reclamation of reservations and superpages each time. + */ + for (options = VPSC_NORESERV;;) { + /* + * Find the highest runs that satisfy the given constraints + * and restrictions, and record them in "m_runs". + */ + curr_low = low; + count = 0; + for (;;) { + m_run = vm_phys_scan_contig(npages, curr_low, high, + alignment, boundary, options); + if (m_run == NULL) + break; + curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); + m_runs[RUN_INDEX(count)] = m_run; + count++; + } + + /* + * Reclaim the highest runs in LIFO (descending) order until + * the number of reclaimed pages, "reclaimed", is at least + * MIN_RECLAIM. Reset "reclaimed" each time because each + * reclamation is idempotent, and runs will (likely) recur + * from one scan to the next as restrictions are relaxed. + */ + reclaimed = 0; + for (i = 0; count > 0 && i < NRUNS; i++) { + count--; + m_run = m_runs[RUN_INDEX(count)]; + error = vm_page_reclaim_run(req_class, npages, m_run, + high); + if (error == 0) { + reclaimed += npages; + if (reclaimed >= MIN_RECLAIM) + return (true); + } + } + + /* + * Either relax the restrictions on the next scan or return if + * the last scan had no restrictions. + */ + if (options == VPSC_NORESERV) + options = VPSC_NOSUPER; + else if (options == VPSC_NOSUPER) + options = VPSC_ANY; + else if (options == VPSC_ANY) + return (reclaimed != 0); + } +} + /* * vm_wait: (also see VM_WAIT macro) * diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index a1f6fc405681..972603a04b98 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -474,6 +474,8 @@ vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); +bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary); void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); @@ -482,6 +484,8 @@ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); int vm_page_sbusied(vm_page_t m); +vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, + vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 4997c82351fd..b7f688732970 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -237,8 +237,6 @@ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); -static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int, vm_paddr_t, - vm_paddr_t); #if !defined(NO_SWAPPING) static void vm_pageout_map_deactivate_pages(vm_map_t, long); static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); @@ -595,170 +593,6 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, return (numpagedout); } -static boolean_t -vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low, - vm_paddr_t high) -{ - struct mount *mp; - struct vnode *vp; - vm_object_t object; - vm_paddr_t pa; - vm_page_t m, m_tmp, next; - int lockmode; - - vm_pagequeue_lock(pq); - TAILQ_FOREACH_SAFE(m, &pq->pq_pl, plinks.q, next) { - if ((m->flags & PG_MARKER) != 0) - continue; - pa = VM_PAGE_TO_PHYS(m); - if (pa < low || pa + PAGE_SIZE > high) - continue; - if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { - vm_page_unlock(m); - continue; - } - object = m->object; - if ((!VM_OBJECT_TRYWLOCK(object) && - (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) || vm_page_busied(m)) { - vm_page_unlock(m); - VM_OBJECT_WUNLOCK(object); - continue; - } - vm_page_test_dirty(m); - if (m->dirty == 0 && object->ref_count != 0) - pmap_remove_all(m); - if (m->dirty != 0) { - vm_page_unlock(m); - if (tries == 0 || (object->flags & OBJ_DEAD) != 0) { - VM_OBJECT_WUNLOCK(object); - continue; - } - if (object->type == OBJT_VNODE) { - vm_pagequeue_unlock(pq); - vp = object->handle; - vm_object_reference_locked(object); - VM_OBJECT_WUNLOCK(object); - (void)vn_start_write(vp, &mp, V_WAIT); - lockmode = MNT_SHARED_WRITES(vp->v_mount) ? - LK_SHARED : LK_EXCLUSIVE; - vn_lock(vp, lockmode | LK_RETRY); - VM_OBJECT_WLOCK(object); - vm_object_page_clean(object, 0, 0, OBJPC_SYNC); - VM_OBJECT_WUNLOCK(object); - VOP_UNLOCK(vp, 0); - vm_object_deallocate(object); - vn_finished_write(mp); - return (TRUE); - } else if (object->type == OBJT_SWAP || - object->type == OBJT_DEFAULT) { - vm_pagequeue_unlock(pq); - m_tmp = m; - vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, - 0, NULL, NULL); - VM_OBJECT_WUNLOCK(object); - return (TRUE); - } - } else { - /* - * Dequeue here to prevent lock recursion in - * vm_page_cache(). - */ - vm_page_dequeue_locked(m); - vm_page_cache(m); - vm_page_unlock(m); - } - VM_OBJECT_WUNLOCK(object); - } - vm_pagequeue_unlock(pq); - return (FALSE); -} - -/* - * Increase the number of cached pages. The specified value, "tries", - * determines which categories of pages are cached: - * - * 0: All clean, inactive pages within the specified physical address range - * are cached. Will not sleep. - * 1: The vm_lowmem handlers are called. All inactive pages within - * the specified physical address range are cached. May sleep. - * 2: The vm_lowmem handlers are called. All inactive and active pages - * within the specified physical address range are cached. May sleep. - */ -void -vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high) -{ - int actl, actmax, inactl, inactmax, dom, initial_dom; - static int start_dom = 0; - - if (tries > 0) { - /* - * Decrease registered cache sizes. The vm_lowmem handlers - * may acquire locks and/or sleep, so they can only be invoked - * when "tries" is greater than zero. - */ - SDT_PROBE0(vm, , , vm__lowmem_cache); - EVENTHANDLER_INVOKE(vm_lowmem, 0); - - /* - * We do this explicitly after the caches have been drained - * above. - */ - uma_reclaim(); - } - - /* - * Make the next scan start on the next domain. - */ - initial_dom = atomic_fetchadd_int(&start_dom, 1) % vm_ndomains; - - inactl = 0; - inactmax = vm_cnt.v_inactive_count; - actl = 0; - actmax = tries < 2 ? 0 : vm_cnt.v_active_count; - dom = initial_dom; - - /* - * Scan domains in round-robin order, first inactive queues, - * then active. Since domain usually owns large physically - * contiguous chunk of memory, it makes sense to completely - * exhaust one domain before switching to next, while growing - * the pool of contiguous physical pages. - * - * Do not even start launder a domain which cannot contain - * the specified address range, as indicated by segments - * constituting the domain. - */ -again_inact: - if (inactl < inactmax) { - if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs, - low, high) && - vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_INACTIVE], - tries, low, high)) { - inactl++; - goto again_inact; - } - if (++dom == vm_ndomains) - dom = 0; - if (dom != initial_dom) - goto again_inact; - } -again_act: - if (actl < actmax) { - if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs, - low, high) && - vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_ACTIVE], - tries, low, high)) { - actl++; - goto again_act; - } - if (++dom == vm_ndomains) - dom = 0; - if (dom != initial_dom) - goto again_act; - } -} - #if !defined(NO_SWAPPING) /* * vm_pageout_object_deactivate_pages diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 9c8da8bb30d6..2d0b961001a4 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -101,7 +101,6 @@ extern void vm_waitpfault(void); #ifdef _KERNEL int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *); -void vm_pageout_grow_cache(int, vm_paddr_t, vm_paddr_t); void vm_pageout_oom(int shortage); #endif #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index d26b8b580952..38799f2dac03 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -170,6 +170,9 @@ static struct vm_domain_policy vm_default_policy = static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order); +static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); static int vm_phys_paddr_to_segind(vm_paddr_t pa); @@ -1162,6 +1165,56 @@ vm_phys_free_contig(vm_page_t m, u_long npages) } } +/* + * Scan physical memory between the specified addresses "low" and "high" for a + * run of contiguous physical pages that satisfy the specified conditions, and + * return the lowest page in the run. The specified "alignment" determines + * the alignment of the lowest physical page in the run. If the specified + * "boundary" is non-zero, then the run of physical pages cannot span a + * physical address that is a multiple of "boundary". + * + * "npages" must be greater than zero. Both "alignment" and "boundary" must + * be a power of two. + */ +vm_page_t +vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, int options) +{ + vm_paddr_t pa_end; + vm_page_t m_end, m_run, m_start; + struct vm_phys_seg *seg; + int segind; + + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + if (low >= high) + return (NULL); + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + if (seg->start >= high) + break; + if (low >= seg->end) + continue; + if (low <= seg->start) + m_start = seg->first_page; + else + m_start = &seg->first_page[atop(low - seg->start)]; + if (high < seg->end) + pa_end = high; + else + pa_end = seg->end; + if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) + continue; + m_end = &seg->first_page[atop(pa_end - seg->start)]; + m_run = vm_page_scan_contig(npages, m_start, m_end, + alignment, boundary, options); + if (m_run != NULL) + return (m_run); + } + return (NULL); +} + /* * Set the pool for a contiguous, power of two-sized set of physical pages. */ @@ -1300,93 +1353,123 @@ vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { - struct vm_freelist *fl; - struct vm_phys_seg *seg; - vm_paddr_t pa, pa_last, size; - vm_page_t m, m_ret; - u_long npages_end; - int domain, flind, oind, order, pind; + vm_paddr_t pa_end, pa_start; + vm_page_t m_run; struct vm_domain_iterator vi; + struct vm_phys_seg *seg; + int domain, segind; + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - size = npages << PAGE_SHIFT; - KASSERT(size != 0, - ("vm_phys_alloc_contig: size must not be 0")); - KASSERT((alignment & (alignment - 1)) == 0, - ("vm_phys_alloc_contig: alignment must be a power of 2")); - KASSERT((boundary & (boundary - 1)) == 0, - ("vm_phys_alloc_contig: boundary must be a power of 2")); - /* Compute the queue that is the best fit for npages. */ - for (order = 0; (1 << order) < npages; order++); - + if (low >= high) + return (NULL); vm_policy_iterator_init(&vi); - restartdom: if (vm_domain_iterator_run(&vi, &domain) != 0) { vm_policy_iterator_finish(&vi); return (NULL); } + m_run = NULL; + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + if (seg->start >= high) + break; + if (low >= seg->end || seg->domain != domain) + continue; + if (low <= seg->start) + pa_start = seg->start; + else + pa_start = low; + if (high < seg->end) + pa_end = high; + else + pa_end = seg->end; + if (pa_end - pa_start < ptoa(npages)) + continue; + m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, + alignment, boundary); + if (m_run != NULL) + break; + } + if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) + goto restartdom; + vm_policy_iterator_finish(&vi); + return (m_run); +} - for (flind = 0; flind < vm_nfreelists; flind++) { - for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { - for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = &vm_phys_free_queues[domain][flind][pind][0]; - TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { - /* - * A free list may contain physical pages - * from one or more segments. - */ - seg = &vm_phys_segs[m_ret->segind]; - if (seg->start > high || - low >= seg->end) - continue; +/* + * Allocate a run of contiguous physical pages from the free list for the + * specified segment. + */ +static vm_page_t +vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) +{ + struct vm_freelist *fl; + vm_paddr_t pa, pa_end, size; + vm_page_t m, m_ret; + u_long npages_end; + int oind, order, pind; + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + /* Compute the queue that is the best fit for npages. */ + for (order = 0; (1 << order) < npages; order++); + /* Search for a run satisfying the specified conditions. */ + size = npages << PAGE_SHIFT; + for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; + oind++) { + for (pind = 0; pind < VM_NFREEPOOL; pind++) { + fl = (*seg->free_queues)[pind]; + TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { + /* + * Is the size of this allocation request + * larger than the largest block size? + */ + if (order >= VM_NFREEORDER) { /* - * Is the size of this allocation request - * larger than the largest block size? - */ - if (order >= VM_NFREEORDER) { - /* - * Determine if a sufficient number - * of subsequent blocks to satisfy - * the allocation request are free. - */ - pa = VM_PAGE_TO_PHYS(m_ret); - pa_last = pa + size; - for (;;) { - pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); - if (pa >= pa_last) - break; - if (pa < seg->start || - pa >= seg->end) - break; - m = &seg->first_page[atop(pa - seg->start)]; - if (m->order != VM_NFREEORDER - 1) - break; - } - /* If not, continue to the next block. */ - if (pa < pa_last) - continue; - } - - /* - * Determine if the blocks are within the given range, - * satisfy the given alignment, and do not cross the - * given boundary. + * Determine if a sufficient number of + * subsequent blocks to satisfy the + * allocation request are free. */ pa = VM_PAGE_TO_PHYS(m_ret); - if (pa >= low && - pa + size <= high && - (pa & (alignment - 1)) == 0 && - ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) - goto done; + pa_end = pa + size; + for (;;) { + pa += 1 << (PAGE_SHIFT + + VM_NFREEORDER - 1); + if (pa >= pa_end || + pa < seg->start || + pa >= seg->end) + break; + m = &seg->first_page[atop(pa - + seg->start)]; + if (m->order != VM_NFREEORDER - + 1) + break; + } + /* If not, go to the next block. */ + if (pa < pa_end) + continue; } + + /* + * Determine if the blocks are within the + * given range, satisfy the given alignment, + * and do not cross the given boundary. + */ + pa = VM_PAGE_TO_PHYS(m_ret); + pa_end = pa + size; + if (pa >= low && pa_end <= high && (pa & + (alignment - 1)) == 0 && ((pa ^ (pa_end - + 1)) & ~(boundary - 1)) == 0) + goto done; } } } - if (!vm_domain_iterator_isdone(&vi)) - goto restartdom; - vm_policy_iterator_finish(&vi); return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 37864dbd832b..f9c776d06919 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -84,6 +84,8 @@ void vm_phys_free_contig(vm_page_t m, u_long npages); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); +vm_page_t vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, int options); void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); boolean_t vm_phys_zero_pages_idle(void); diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index 8f34b3150461..8bb178817e6e 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -865,6 +865,35 @@ vm_reserv_init(void) } } +/* + * Returns true if the given page belongs to a reservation and that page is + * free. Otherwise, returns false. + */ +bool +vm_reserv_is_page_free(vm_page_t m) +{ + vm_reserv_t rv; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + rv = vm_reserv_from_page(m); + if (rv->object == NULL) + return (false); + return (popmap_is_clear(rv->popmap, m - rv->pages)); +} + +/* + * If the given page belongs to a reservation, returns the level of that + * reservation. Otherwise, returns -1. + */ +int +vm_reserv_level(vm_page_t m) +{ + vm_reserv_t rv; + + rv = vm_reserv_from_page(m); + return (rv->object != NULL ? 0 : -1); +} + /* * Returns a reservation level if the given page belongs to a fully-populated * reservation and -1 otherwise. @@ -1075,6 +1104,23 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, } } +/* + * Returns the size (in bytes) of a reservation of the specified level. + */ +int +vm_reserv_size(int level) +{ + + switch (level) { + case 0: + return (VM_LEVEL_0_SIZE); + case -1: + return (PAGE_SIZE); + default: + return (0); + } +} + /* * Allocates the virtual and physical memory required by the reservation * management system's data structures, in particular, the reservation array. diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h index 4c142c773e84..52f6ab25477a 100644 --- a/sys/vm/vm_reserv.h +++ b/sys/vm/vm_reserv.h @@ -53,6 +53,8 @@ vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); void vm_reserv_init(void); +bool vm_reserv_is_page_free(vm_page_t m); +int vm_reserv_level(vm_page_t m); int vm_reserv_level_iffullpop(vm_page_t m); boolean_t vm_reserv_reactivate_page(vm_page_t m); boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, @@ -60,6 +62,7 @@ boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, boolean_t vm_reserv_reclaim_inactive(void); void vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, vm_pindex_t old_object_offset); +int vm_reserv_size(int level); vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water);