From 3c355d849c9773c9c1c59217da9511e500bb4e15 Mon Sep 17 00:00:00 2001 From: jeff Date: Wed, 8 Nov 2017 02:39:37 +0000 Subject: [PATCH] Replace manyinstances of VM_WAIT with blocking page allocation flags similar to the kernel memory allocator. This simplifies NUMA allocation because the domain will be known at wait time and races between failure and sleeping are eliminated. This also reduces boilerplate code and simplifies callers. A wait primitive is supplied for uma zones for similar reasons. This eliminates some non-specific VM_WAIT calls in favor of more explicit sleeps that may be satisfied without new pages. Reviewed by: alc, kib, markj Tested by: pho Sponsored by: Netflix, Dell/EMC Isilon --- sys/amd64/amd64/pmap.c | 5 +- sys/amd64/amd64/uma_machdep.c | 16 ++--- sys/arm64/arm64/uma_machdep.c | 16 ++--- sys/fs/tmpfs/tmpfs_subr.c | 9 +-- sys/kern/uipc_shm.c | 9 +-- sys/kern/vfs_bio.c | 8 +-- sys/mips/mips/uma_machdep.c | 4 ++ sys/powerpc/aim/mmu_oea64.c | 16 ++--- sys/powerpc/aim/slb.c | 18 ++--- sys/powerpc/powerpc/uma_machdep.c | 15 ++--- sys/sparc64/sparc64/vm_machdep.c | 16 ++--- sys/vm/phys_pager.c | 9 +-- sys/vm/swap_pager.c | 4 +- sys/vm/uma.h | 5 ++ sys/vm/uma_core.c | 17 +++-- sys/vm/vm_kern.c | 15 +++-- sys/vm/vm_object.c | 5 +- sys/vm/vm_page.c | 106 +++++++++++++++++++++++------- sys/vm/vm_page.h | 8 ++- sys/vm/vm_radix.c | 6 ++ sys/vm/vm_radix.h | 1 + sys/x86/iommu/intel_utils.c | 9 +-- 22 files changed, 172 insertions(+), 145 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 9050c9fcfeec..e93936685347 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2414,9 +2414,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) /* * allocate the page directory page */ - while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) - VM_WAIT; + pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); pml4phys = VM_PAGE_TO_PHYS(pml4pg); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys); diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index 3c8a4f7c3a9a..cc94c1b78353 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -47,20 +47,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) vm_page_t m; vm_paddr_t pa; void *va; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); pa = m->phys_addr; if ((wait & M_NODUMP) == 0) dump_add_page(pa); diff --git a/sys/arm64/arm64/uma_machdep.c b/sys/arm64/arm64/uma_machdep.c index 76e2692b2ee2..620b8c46828a 100644 --- a/sys/arm64/arm64/uma_machdep.c +++ b/sys/arm64/arm64/uma_machdep.c @@ -47,20 +47,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) vm_page_t m; vm_paddr_t pa; void *va; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); pa = m->phys_addr; if ((wait & M_NODUMP) == 0) dump_add_page(pa); diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 8180a2cd2a4c..960750daf187 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1407,13 +1407,10 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) goto retry; MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { - m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); - if (m == NULL) { - VM_OBJECT_WUNLOCK(uobj); - VM_WAIT; - VM_OBJECT_WLOCK(uobj); + m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | + VM_ALLOC_WAITFAIL); + if (m == NULL) goto retry; - } rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); vm_page_lock(m); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index a4fdb85a95c3..c8e5f0d1ccd9 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -454,13 +454,10 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) if (vm_page_sleep_if_busy(m, "shmtrc")) goto retry; } else if (vm_pager_has_page(object, idx, NULL, NULL)) { - m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); - if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); + m = vm_page_alloc(object, idx, + VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); + if (m == NULL) goto retry; - } rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); vm_page_lock(m); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index dd1f9bbd235a..35d67d1b37bb 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -4515,18 +4515,14 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { -tryagain: /* * note: must allocate system pages since blocking here * could interfere with paging I/O, no matter which * process we are. */ p = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT)); - if (p == NULL) { - VM_WAIT; - goto tryagain; - } + VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT) | + VM_ALLOC_WAITOK); pmap_qenter(pg, &p, 1); bp->b_pages[index] = p; } diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c index aa40215ed77e..69d135932a37 100644 --- a/sys/mips/mips/uma_machdep.c +++ b/sys/mips/mips/uma_machdep.c @@ -51,6 +51,10 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) *flags = UMA_SLAB_PRIV; pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; +#ifndef __mips_n64 + pflags &= ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; +#endif for (;;) { m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags); diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 28c9c79916f1..ba0f9792d1ac 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1506,7 +1506,7 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, struct pvo_entry *pvo; vm_offset_t va; vm_page_t m; - int pflags, needed_lock; + int needed_lock; /* * This entire routine is a horrible hack to avoid bothering kmem @@ -1517,17 +1517,11 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); va = VM_PAGE_TO_PHYS(m); diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 72a595be6c70..caf6f4f75dd2 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -483,24 +483,16 @@ slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) static vm_offset_t realmax = 0; void *va; vm_page_t m; - int pflags; if (realmax == 0) realmax = platform_real_maxaddr(); *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - - for (;;) { - m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax, - PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + m = vm_page_alloc_contig(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, + 1, 0, realmax, PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); + if (m == NULL) + return (NULL); va = (void *) VM_PAGE_TO_PHYS(m); diff --git a/sys/powerpc/powerpc/uma_machdep.c b/sys/powerpc/powerpc/uma_machdep.c index b4a4ec41d9ae..e121c2bbf637 100644 --- a/sys/powerpc/powerpc/uma_machdep.c +++ b/sys/powerpc/powerpc/uma_machdep.c @@ -56,20 +56,13 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) void *va; vm_paddr_t pa; vm_page_t m; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); pa = VM_PAGE_TO_PHYS(m); diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index 7618ea901540..858e5fc6f49c 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -394,24 +394,16 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { vm_paddr_t pa; vm_page_t m; - int pflags; void *va; PMAP_STATS_INC(uma_nsmall_alloc); *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); pa = VM_PAGE_TO_PHYS(m); if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 7adc7c675857..0977cfd3b2d8 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -209,13 +209,10 @@ phys_pager_populate(vm_object_t object, vm_pindex_t pidx, if (m == NULL) { ahead = MIN(end - i, PHYSALLOC); m = vm_page_alloc(object, i, VM_ALLOC_NORMAL | - VM_ALLOC_ZERO | VM_ALLOC_COUNT(ahead)); - if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); + VM_ALLOC_ZERO | VM_ALLOC_WAITFAIL | + VM_ALLOC_COUNT(ahead)); + if (m == NULL) goto retry; - } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 02891e90880f..19601431d440 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1820,7 +1820,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonxb", 10); } else - VM_WAIT; + uma_zwait(swblk_zone); VM_OBJECT_WLOCK(object); sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); @@ -1850,7 +1850,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonxp", 10); } else - VM_WAIT; + uma_zwait(swpctrie_zone); VM_OBJECT_WLOCK(object); sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 71027b898cff..3e758050a1ff 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -365,6 +365,11 @@ uma_zfree(uma_zone_t zone, void *item) uma_zfree_arg(zone, item, NULL); } +/* + * Wait until the specified zone can allocate an item. + */ +void uma_zwait(uma_zone_t zone); + /* * XXX The rest of the prototypes in this header are h0h0 magic for the VM. * If you think you need to use it for a normal zone you're probably incorrect. diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index b2e02dd68540..91a57f12111e 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1109,7 +1109,8 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) npages = howmany(bytes, PAGE_SIZE); while (npages > 0) { p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | - VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | + (wait & M_WAITOK) ? VM_ALLOC_WAITOK : VM_ALLOC_NOWAIT); if (p != NULL) { /* * Since the page does not belong to an object, its @@ -1119,11 +1120,6 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) npages--; continue; } - if (wait & M_WAITOK) { - VM_WAIT; - continue; - } - /* * Page allocation failed, free intermediate pages and * exit. @@ -2026,6 +2022,15 @@ uma_zdestroy(uma_zone_t zone) sx_sunlock(&uma_drain_lock); } +void +uma_zwait(uma_zone_t zone) +{ + void *item; + + item = uma_zalloc_arg(zone, NULL, M_WAITOK); + uma_zfree(zone, item); +} + /* See uma.h */ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 3f609b0339d2..8b95d70e205b 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -172,6 +172,8 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; @@ -227,6 +229,8 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; npages = atop(size); VM_OBJECT_WLOCK(object); tries = 0; @@ -338,10 +342,13 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + if (flags & M_WAITOK) + pflags |= VM_ALLOC_WAITFAIL; i = 0; -retry: VM_OBJECT_WLOCK(object); +retry: mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); for (; i < size; i += PAGE_SIZE, mpred = m) { m = vm_page_alloc_after(object, atop(offset + i), pflags, @@ -353,11 +360,9 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) * aren't on any queues. */ if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - if ((flags & M_NOWAIT) == 0) { - VM_WAIT; + if ((flags & M_NOWAIT) == 0) goto retry; - } + VM_OBJECT_WUNLOCK(object); kmem_unback(object, addr, i); return (KERN_NO_SPACE); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index bcf1204b9937..1cd6eb589b9f 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1471,7 +1471,7 @@ vm_object_split(vm_map_entry_t entry) if (vm_page_rename(m, new_object, idx)) { VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); - VM_WAIT; + vm_radix_wait(); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; @@ -1533,8 +1533,9 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, vm_page_lock(p); VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(backing_object); + /* The page is only NULL when rename fails. */ if (p == NULL) - VM_WAIT; + vm_radix_wait(); else vm_page_busy_sleep(p, "vmocol", false); VM_OBJECT_WLOCK(object); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 4604a7ebc6b6..4efc3d9f2d87 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -172,6 +172,7 @@ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); +static int vm_page_alloc_fail(vm_object_t object, int req); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); @@ -1595,6 +1596,8 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("inconsistent object(%p)/req(%x)", object, req)); + KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, + ("Can't sleep and retry object insertion.")); KASSERT(mpred == NULL || mpred->pindex < pindex, ("mpred %p doesn't precede pindex 0x%jx", mpred, (uintmax_t)pindex)); @@ -1613,6 +1616,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, * Allocate a page if the number of free pages exceeds the minimum * for the request class. */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1645,10 +1649,8 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, /* * Not allocatable, give up. */ - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, - max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); - pagedaemon_wakeup(); + if (vm_page_alloc_fail(object, req)) + goto again; return (NULL); } @@ -1700,6 +1702,11 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); + if (req & VM_ALLOC_WAITFAIL) { + VM_OBJECT_WUNLOCK(object); + vm_radix_wait(); + VM_OBJECT_WLOCK(object); + } return (NULL); } @@ -1777,6 +1784,8 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, req)); + KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, + ("Can't sleep and retry object insertion.")); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_FICTITIOUS) == 0, @@ -1802,6 +1811,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, * Can we allocate the pages without the number of free pages falling * below the lower bound for the allocation class? */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1823,9 +1833,8 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, m_ret = vm_phys_alloc_contig(npages, low, high, alignment, boundary); } else { - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, npages); - pagedaemon_wakeup(); + if (vm_page_alloc_fail(object, req)) + goto again; return (NULL); } if (m_ret != NULL) @@ -1891,6 +1900,11 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, /* Don't change PG_ZERO. */ vm_page_free_toq(m); } + if (req & VM_ALLOC_WAITFAIL) { + VM_OBJECT_WUNLOCK(object); + vm_radix_wait(); + VM_OBJECT_WLOCK(object); + } return (NULL); } mpred = m; @@ -1963,18 +1977,17 @@ vm_page_alloc_freelist(int flind, int req) /* * Do not allocate reserved pages unless the req has asked for it. */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) + vm_cnt.v_free_count > 0)) { m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); - else { - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, - max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); - pagedaemon_wakeup(); + } else { + if (vm_page_alloc_fail(NULL, req)) + goto again; return (NULL); } if (m == NULL) { @@ -2533,11 +2546,11 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, * Sleep until free pages are available for allocation. * - Called in various places before memory allocations. */ -void -vm_wait(void) +static void +_vm_wait(void) { - mtx_lock(&vm_page_queue_free_mtx); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); if (curproc == pageproc) { vm_pageout_pages_needed = 1; msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, @@ -2555,6 +2568,46 @@ vm_wait(void) } } +void +vm_wait(void) +{ + + mtx_lock(&vm_page_queue_free_mtx); + _vm_wait(); +} + +/* + * vm_page_alloc_fail: + * + * Called when a page allocation function fails. Informs the + * pagedaemon and performs the requested wait. Requires the + * page_queue_free and object lock on entry. Returns with the + * object lock held and free lock released. Returns an error when + * retry is necessary. + * + */ +static int +vm_page_alloc_fail(vm_object_t object, int req) +{ + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + + atomic_add_int(&vm_pageout_deficit, + max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); + pagedaemon_wakeup(); + if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { + if (object != NULL) + VM_OBJECT_WUNLOCK(object); + _vm_wait(); + if (object != NULL) + VM_OBJECT_WLOCK(object); + if (req & VM_ALLOC_WAITOK) + return (EAGAIN); + } else + mtx_unlock(&vm_page_queue_free_mtx); + return (0); +} + /* * vm_waitpfault: (also see VM_WAITPFAULT macro) * @@ -3179,11 +3232,16 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int sleep; + int pflags; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); + pflags = allocflags & + ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + if ((allocflags & VM_ALLOC_NOWAIT) == 0) + pflags |= VM_ALLOC_WAITFAIL; retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? @@ -3217,13 +3275,10 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) return (m); } } - m = vm_page_alloc(object, pindex, allocflags); + m = vm_page_alloc(object, pindex, pflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); goto retrylookup; } if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) @@ -3262,6 +3317,7 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count) { vm_page_t m, mpred; + int pflags; int i; bool sleep; @@ -3276,6 +3332,10 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, ("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch")); if (count == 0) return (0); + pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | + VM_ALLOC_WAITFAIL | VM_ALLOC_IGN_SBUSY); + if ((allocflags & VM_ALLOC_NOWAIT) == 0) + pflags |= VM_ALLOC_WAITFAIL; i = 0; retrylookup: m = vm_radix_lookup_le(&object->rtree, pindex + i); @@ -3316,14 +3376,10 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_sbusy(m); } else { m = vm_page_alloc_after(object, pindex + i, - (allocflags & ~VM_ALLOC_IGN_SBUSY) | - VM_ALLOC_COUNT(count - i), mpred); + pflags | VM_ALLOC_COUNT(count - i), mpred); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) break; - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); goto retrylookup; } } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 9bbe7291e46e..17f45c87153d 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -415,6 +415,8 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 #define VM_ALLOC_CLASS_MASK 3 +#define VM_ALLOC_WAITOK 0x0008 /* (acf) Sleep and retry */ +#define VM_ALLOC_WAITFAIL 0x0010 /* (acf) Sleep and return error */ #define VM_ALLOC_WIRED 0x0020 /* (acfgp) Allocate a wired page */ #define VM_ALLOC_ZERO 0x0040 /* (acfgp) Allocate a prezeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ @@ -422,7 +424,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */ -#define VM_ALLOC_NOWAIT 0x8000 /* (gp) Do not sleep */ +#define VM_ALLOC_NOWAIT 0x8000 /* (acfgp) Do not sleep */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) @@ -441,6 +443,10 @@ malloc2vm_flags(int malloc_flags) pflags |= VM_ALLOC_ZERO; if ((malloc_flags & M_NODUMP) != 0) pflags |= VM_ALLOC_NODUMP; + if ((malloc_flags & M_NOWAIT)) + pflags |= VM_ALLOC_NOWAIT; + if ((malloc_flags & M_WAITOK)) + pflags |= VM_ALLOC_WAITOK; return (pflags); } #endif diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c index 1a19dd93af93..546d316211da 100644 --- a/sys/vm/vm_radix.c +++ b/sys/vm/vm_radix.c @@ -775,6 +775,12 @@ vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) panic("%s: original replacing page not found", __func__); } +void +vm_radix_wait(void) +{ + uma_zwait(vm_radix_node_zone); +} + #ifdef DDB /* * Show details about the given radix node. diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h index d96e50b01e48..df81180161a4 100644 --- a/sys/vm/vm_radix.h +++ b/sys/vm/vm_radix.h @@ -36,6 +36,7 @@ #ifdef _KERNEL int vm_radix_insert(struct vm_radix *rtree, vm_page_t page); +void vm_radix_wait(void); boolean_t vm_radix_is_singleton(struct vm_radix *rtree); vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index); diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c index 0dcb618d31c6..2d05cf75e624 100644 --- a/sys/x86/iommu/intel_utils.c +++ b/sys/x86/iommu/intel_utils.c @@ -270,7 +270,9 @@ dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) break; } m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY | - VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0, + VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed | + (flags & DMAR_PGF_WAITOK) ? + VM_ALLOC_WAITFAIL : VM_ALLOC_NOWAIT, 1, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); @@ -282,11 +284,6 @@ dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) } if ((flags & DMAR_PGF_WAITOK) == 0) break; - if ((flags & DMAR_PGF_OBJL) != 0) - VM_OBJECT_WUNLOCK(obj); - VM_WAIT; - if ((flags & DMAR_PGF_OBJL) != 0) - VM_OBJECT_WLOCK(obj); } return (m); }