diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 885a5bea8e69..d3ad661c16e9 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -830,47 +830,39 @@ vm_page_reference(vm_page_t m) vm_page_aflag_set(m, PGA_REFERENCED); } +/* + * vm_page_trybusy + * + * Helper routine for grab functions to trylock busy. + * + * Returns true on success and false on failure. + */ static bool -vm_page_acquire_flags(vm_page_t m, int allocflags) +vm_page_trybusy(vm_page_t m, int allocflags) { - bool locked; if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0) - locked = vm_page_trysbusy(m); + return (vm_page_trysbusy(m)); else - locked = vm_page_tryxbusy(m); - if (locked && (allocflags & VM_ALLOC_WIRED) != 0) - vm_page_wire(m); - return (locked); + return (vm_page_tryxbusy(m)); } /* - * vm_page_busy_sleep_flags + * vm_page_tryacquire * - * Sleep for busy according to VM_ALLOC_ parameters. Returns true - * if the caller should retry and false otherwise. + * Helper routine for grab functions to trylock busy and wire. + * + * Returns true on success and false on failure. */ -static bool -vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg, - int allocflags) +static inline bool +vm_page_tryacquire(vm_page_t m, int allocflags) { + bool locked; - if ((allocflags & VM_ALLOC_NOWAIT) != 0) - return (false); - - /* - * Reference the page before unlocking and sleeping so that - * the page daemon is less likely to reclaim it. - */ - if ((allocflags & VM_ALLOC_NOCREAT) == 0) - vm_page_reference(m); - - if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, true)) - VM_OBJECT_WLOCK(object); - if ((allocflags & VM_ALLOC_WAITFAIL) != 0) - return (false); - - return (true); + locked = vm_page_trybusy(m, allocflags); + if (locked && (allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + return (locked); } /* @@ -894,7 +886,7 @@ vm_page_busy_acquire(vm_page_t m, int allocflags) */ obj = m->object; for (;;) { - if (vm_page_acquire_flags(m, allocflags)) + if (vm_page_tryacquire(m, allocflags)) return (true); if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (false); @@ -1604,6 +1596,7 @@ vm_page_object_remove(vm_page_t m) if ((m->a.flags & PGA_SWAP_FREE) != 0) vm_pager_page_unswapped(m); + m->object = NULL; mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -1658,7 +1651,6 @@ vm_page_remove_xbusy(vm_page_t m) { vm_page_object_remove(m); - m->object = NULL; return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } @@ -1678,6 +1670,38 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex) return (vm_radix_lookup(&object->rtree, pindex)); } +/* + * This should only be used by lockless functions for releasing transient + * incorrect acquires. The page may have been freed after we acquired a + * busy lock. In this case busy_lock == VPB_FREED and we have nothing + * further to do. + */ +static void +vm_page_busy_release(vm_page_t m) +{ + u_int x; + + x = atomic_load_int(&m->busy_lock); + for (;;) { + if (x == VPB_FREED) + break; + if ((x & VPB_BIT_SHARED) != 0 && VPB_SHARERS(x) > 1) { + if (atomic_fcmpset_int(&m->busy_lock, &x, + x - VPB_ONE_SHARER)) + break; + continue; + } + KASSERT((x & VPB_BIT_SHARED) != 0 || + (x & ~VPB_BIT_WAITERS) == VPB_CURTHREAD_EXCLUSIVE, + ("vm_page_busy_release: %p xbusy not owned.", m)); + if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED)) + continue; + if ((x & VPB_BIT_WAITERS) != 0) + wakeup(m); + break; + } +} + /* * vm_page_find_least: * @@ -3688,7 +3712,6 @@ vm_page_free_prep(vm_page_t m) ("vm_page_free_prep: page %p has unexpected ref_count %u", m, m->ref_count)); vm_page_object_remove(m); - m->object = NULL; m->ref_count -= VPRC_OBJREF; } else vm_page_assert_unbusied(m); @@ -4245,18 +4268,81 @@ vm_page_advise(vm_page_t m, int advice) vm_page_launder(m); } +/* + * vm_page_grab_release + * + * Helper routine for grab functions to release busy on return. + */ +static inline void +vm_page_grab_release(vm_page_t m, int allocflags) +{ + + if ((allocflags & VM_ALLOC_NOBUSY) != 0) { + if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) + vm_page_sunbusy(m); + else + vm_page_xunbusy(m); + } +} + +/* + * vm_page_grab_sleep + * + * Sleep for busy according to VM_ALLOC_ parameters. Returns true + * if the caller should retry and false otherwise. + * + * If the object is locked on entry the object will be unlocked with + * false returns and still locked but possibly having been dropped + * with true returns. + */ +static bool +vm_page_grab_sleep(vm_object_t object, vm_page_t m, vm_pindex_t pindex, + const char *wmesg, int allocflags, bool locked) +{ + + if ((allocflags & VM_ALLOC_NOWAIT) != 0) + return (false); + + /* + * Reference the page before unlocking and sleeping so that + * the page daemon is less likely to reclaim it. + */ + if (locked && (allocflags & VM_ALLOC_NOCREAT) == 0) + vm_page_reference(m); + + if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, + locked) && locked) + VM_OBJECT_WLOCK(object); + if ((allocflags & VM_ALLOC_WAITFAIL) != 0) + return (false); + + return (true); +} + +/* + * Assert that the grab flags are valid. + */ +static inline void +vm_page_grab_check(int allocflags) +{ + + KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || + (allocflags & VM_ALLOC_WIRED) != 0, + ("vm_page_grab*: the pages must be busied or wired")); + + KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_SBUSY) != 0, + ("vm_page_grab*: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); +} + +/* + * Calculate the page allocation flags for grab. + */ static inline int vm_page_grab_pflags(int allocflags) { int pflags; - KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || - (allocflags & VM_ALLOC_WIRED) != 0, - ("vm_page_grab_pflags: the pages must be busied or wired")); - KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || - (allocflags & VM_ALLOC_IGN_SBUSY) != 0, - ("vm_page_grab_pflags: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY " - "mismatch")); pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL | VM_ALLOC_NOBUSY); @@ -4283,15 +4369,15 @@ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; - int pflags; VM_OBJECT_ASSERT_WLOCKED(object); - pflags = vm_page_grab_pflags(allocflags); + vm_page_grab_check(allocflags); + retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - if (!vm_page_acquire_flags(m, allocflags)) { - if (vm_page_busy_sleep_flags(object, m, "pgrbwt", - allocflags)) + if (!vm_page_tryacquire(m, allocflags)) { + if (vm_page_grab_sleep(object, m, pindex, "pgrbwt", + allocflags, true)) goto retrylookup; return (NULL); } @@ -4299,7 +4385,7 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) } if ((allocflags & VM_ALLOC_NOCREAT) != 0) return (NULL); - m = vm_page_alloc(object, pindex, pflags); + m = vm_page_alloc(object, pindex, vm_page_grab_pflags(allocflags)); if (m == NULL) { if ((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0) return (NULL); @@ -4309,12 +4395,104 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) pmap_zero_page(m); out: - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) - vm_page_sunbusy(m); - else - vm_page_xunbusy(m); + vm_page_grab_release(m, allocflags); + + return (m); +} + +/* + * Locklessly attempt to acquire a page given a (object, pindex) tuple + * and an optional previous page to avoid the radix lookup. The resulting + * page will be validated against the identity tuple and busied or wired + * as requested. A NULL *mp return guarantees that the page was not in + * radix at the time of the call but callers must perform higher level + * synchronization or retry the operation under a lock if they require + * an atomic answer. This is the only lock free validation routine, + * other routines can depend on the resulting page state. + * + * The return value indicates whether the operation failed due to caller + * flags. The return is tri-state with mp: + * + * (true, *mp != NULL) - The operation was successful. + * (true, *mp == NULL) - The page was not found in tree. + * (false, *mp == NULL) - WAITFAIL or NOWAIT prevented acquisition. + */ +static bool +vm_page_acquire_unlocked(vm_object_t object, vm_pindex_t pindex, + vm_page_t prev, vm_page_t *mp, int allocflags) +{ + vm_page_t m; + + vm_page_grab_check(allocflags); + MPASS(prev == NULL || vm_page_busied(prev) || vm_page_wired(prev)); + + *mp = NULL; + for (;;) { + /* + * We may see a false NULL here because the previous page + * has been removed or just inserted and the list is loaded + * without barriers. Switch to radix to verify. + */ + if (prev == NULL || (m = TAILQ_NEXT(prev, listq)) == NULL || + m->pindex != pindex || + atomic_load_ptr(&m->object) != object) { + prev = NULL; + /* + * This guarantees the result is instantaneously + * correct. + */ + m = vm_radix_lookup_unlocked(&object->rtree, pindex); + } + if (m == NULL) + return (true); + if (vm_page_trybusy(m, allocflags)) { + if (m->object == object && m->pindex == pindex) + break; + /* relookup. */ + vm_page_busy_release(m); + cpu_spinwait(); + continue; + } + if (!vm_page_grab_sleep(object, m, pindex, "pgnslp", + allocflags, false)) + return (false); } + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + vm_page_grab_release(m, allocflags); + *mp = m; + return (true); +} + +/* + * Try to locklessly grab a page and fall back to the object lock if NOCREAT + * is not set. + */ +vm_page_t +vm_page_grab_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + + vm_page_grab_check(allocflags); + + if (!vm_page_acquire_unlocked(object, pindex, NULL, &m, allocflags)) + return (NULL); + if (m != NULL) + return (m); + + /* + * The radix lockless lookup should never return a false negative + * errors. If the user specifies NOCREAT they are guaranteed there + * was no page present at the instant of the call. A NOCREAT caller + * must handle create races gracefully. + */ + if ((allocflags & VM_ALLOC_NOCREAT) != 0) + return (NULL); + + VM_OBJECT_WLOCK(object); + m = vm_page_grab(object, pindex, allocflags); + VM_OBJECT_WUNLOCK(object); + return (m); } @@ -4330,7 +4508,6 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al { vm_page_t m; vm_page_t ma[VM_INITIAL_PAGEIN]; - bool sleep, xbusy; int after, i, pflags, rv; KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || @@ -4344,7 +4521,6 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al pflags |= VM_ALLOC_WAITFAIL; retrylookup: - xbusy = false; if ((m = vm_page_lookup(object, pindex)) != NULL) { /* * If the page is fully valid it can only become invalid @@ -4355,41 +4531,27 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al * However, we will not end up with an invalid page and a * shared lock. */ - if (!vm_page_all_valid(m) || - (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { - sleep = !vm_page_tryxbusy(m); - xbusy = true; - } else - sleep = !vm_page_trysbusy(m); - if (sleep) { - (void)vm_page_busy_sleep_flags(object, m, "pgrbwt", - allocflags); + if (!vm_page_trybusy(m, + vm_page_all_valid(m) ? allocflags : 0)) { + (void)vm_page_grab_sleep(object, m, pindex, "pgrbwt", + allocflags, true); goto retrylookup; } - if ((allocflags & VM_ALLOC_NOCREAT) != 0 && - !vm_page_all_valid(m)) { - if (xbusy) - vm_page_xunbusy(m); - else - vm_page_sunbusy(m); + if (vm_page_all_valid(m)) + goto out; + if ((allocflags & VM_ALLOC_NOCREAT) != 0) { + vm_page_busy_release(m); *mp = NULL; return (VM_PAGER_FAIL); } - if ((allocflags & VM_ALLOC_WIRED) != 0) - vm_page_wire(m); - if (vm_page_all_valid(m)) - goto out; } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { *mp = NULL; return (VM_PAGER_FAIL); - } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) { - xbusy = true; - } else { + } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { goto retrylookup; } vm_page_assert_xbusied(m); - MPASS(xbusy); if (vm_pager_has_page(object, pindex, NULL, &after)) { after = MIN(after, VM_INITIAL_PAGEIN); after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT); @@ -4415,8 +4577,6 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al /* Pager may have replaced a page. */ m = ma[0]; if (rv != VM_PAGER_OK) { - if ((allocflags & VM_ALLOC_WIRED) != 0) - vm_page_unwire_noq(m); for (i = 0; i < after; i++) { if (!vm_page_wired(ma[i])) vm_page_free(ma[i]); @@ -4433,18 +4593,59 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al vm_page_zero_invalid(m, TRUE); } out: - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if (xbusy) - vm_page_xunbusy(m); - else - vm_page_sunbusy(m); - } - if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy) + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + if ((allocflags & VM_ALLOC_SBUSY) != 0 && vm_page_xbusied(m)) vm_page_busy_downgrade(m); + else if ((allocflags & VM_ALLOC_NOBUSY) != 0) + vm_page_busy_release(m); *mp = m; return (VM_PAGER_OK); } +/* + * Locklessly grab a valid page. If the page is not valid or not yet + * allocated this will fall back to the object lock method. + */ +int +vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object, + vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + int flags; + int error; + + KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_SBUSY) != 0, + ("vm_page_grab_valid_unlocked: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY " + "mismatch")); + KASSERT((allocflags & + (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0, + ("vm_page_grab_valid_unlocked: Invalid flags 0x%X", allocflags)); + + /* + * Attempt a lockless lookup and busy. We need at least an sbusy + * before we can inspect the valid field and return a wired page. + */ + flags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); + if (!vm_page_acquire_unlocked(object, pindex, NULL, mp, flags)) + return (VM_PAGER_FAIL); + if ((m = *mp) != NULL) { + if (vm_page_all_valid(m)) { + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + vm_page_grab_release(m, allocflags); + return (VM_PAGER_OK); + } + vm_page_busy_release(m); + } + VM_OBJECT_WLOCK(object); + error = vm_page_grab_valid(mp, object, pindex, allocflags); + VM_OBJECT_WUNLOCK(object); + + return (error); +} + /* * Return the specified range of pages from the given object. For each * page offset within the range, if a page already exists within the object @@ -4482,6 +4683,7 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); + vm_page_grab_check(allocflags); pflags = vm_page_grab_pflags(allocflags); if (count == 0) @@ -4497,9 +4699,9 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, mpred = TAILQ_PREV(m, pglist, listq); for (; i < count; i++) { if (m != NULL) { - if (!vm_page_acquire_flags(m, allocflags)) { - if (vm_page_busy_sleep_flags(object, m, - "grbmaw", allocflags)) + if (!vm_page_tryacquire(m, allocflags)) { + if (vm_page_grab_sleep(object, m, pindex, + "grbmaw", allocflags, true)) goto retrylookup; break; } @@ -4521,18 +4723,57 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, pmap_zero_page(m); vm_page_valid(m); } - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) - vm_page_sunbusy(m); - else - vm_page_xunbusy(m); - } + vm_page_grab_release(m, allocflags); ma[i] = mpred = m; m = vm_page_next(m); } return (i); } +/* + * Unlocked variant of vm_page_grab_pages(). This accepts the same flags + * and will fall back to the locked variant to handle allocation. + */ +int +vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, + int allocflags, vm_page_t *ma, int count) +{ + vm_page_t m, pred; + int flags; + int i; + + vm_page_grab_check(allocflags); + + /* + * Modify flags for lockless acquire to hold the page until we + * set it valid if necessary. + */ + flags = allocflags & ~VM_ALLOC_NOBUSY; + pred = NULL; + for (i = 0; i < count; i++, pindex++) { + if (!vm_page_acquire_unlocked(object, pindex, pred, &m, flags)) + return (i); + if (m == NULL) + break; + if ((flags & VM_ALLOC_ZERO) != 0 && vm_page_none_valid(m)) { + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + vm_page_valid(m); + } + /* m will still be wired or busy according to flags. */ + vm_page_grab_release(m, allocflags); + pred = ma[i] = m; + } + if ((allocflags & VM_ALLOC_NOCREAT) != 0) + return (i); + count -= i; + VM_OBJECT_WLOCK(object); + i += vm_page_grab_pages(object, pindex, allocflags, &ma[i], count); + VM_OBJECT_WUNLOCK(object); + + return (i); +} + /* * Mapping function for valid or dirty bits in a page. * diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 649531f58f9d..c4d8914dcb15 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -614,11 +614,16 @@ vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_alloc_freelist_domain(int, int, int); void vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set); bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose); -vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_grab(vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_grab_unlocked(vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); +int vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, + int allocflags, vm_page_t *ma, int count); int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags); +int vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object, + vm_pindex_t pindex, int allocflags); void vm_page_deactivate(vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); @@ -629,7 +634,7 @@ void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_invalid(vm_page_t m); void vm_page_launder(vm_page_t m); -vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); +vm_page_t vm_page_lookup(vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); void vm_page_pqbatch_drain(void); void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue);