Use the sleepq lock rather than the page lock to protect against wakeup

races with page busy state.  The object lock is still used as an interlock
to ensure that the identity stays valid.  Most callers should use
vm_page_sleep_if_busy() to handle the locking particulars.

Reviewed by:	alc, kib, markj
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D21255
This commit is contained in:
Jeff Roberson 2019-09-10 18:27:45 +00:00
parent d461d3e760
commit 4cdea4a853
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=352174
8 changed files with 89 additions and 173 deletions

View File

@ -422,10 +422,7 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
* likely to reclaim it.
*/
vm_page_reference(pp);
vm_page_lock(pp);
zfs_vmobject_wunlock(obj);
vm_page_busy_sleep(pp, "zfsmwb", true);
zfs_vmobject_wlock(obj);
vm_page_sleep_if_xbusy(pp, "zfsmwb");
continue;
}
vm_page_sbusy(pp);
@ -473,10 +470,7 @@ page_wire(vnode_t *vp, int64_t start)
* likely to reclaim it.
*/
vm_page_reference(pp);
vm_page_lock(pp);
zfs_vmobject_wunlock(obj);
vm_page_busy_sleep(pp, "zfsmwb", true);
zfs_vmobject_wlock(obj);
vm_page_sleep_if_xbusy(pp, "zfsmwb");
continue;
}

View File

@ -232,10 +232,7 @@ ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset,
VM_OBJECT_WLOCK(vm_obj);
if (vm_page_busied(m)) {
vm_page_lock(m);
VM_OBJECT_WUNLOCK(vm_obj);
vm_page_busy_sleep(m, "ttmpbs", false);
VM_OBJECT_WLOCK(vm_obj);
vm_page_sleep_if_busy(m, "ttmpbs");
ttm_mem_io_unlock(man);
ttm_bo_unreserve(bo);
goto retry;

View File

@ -2931,12 +2931,8 @@ vfs_vmio_invalidate(struct buf *bp)
presid = resid > (PAGE_SIZE - poffset) ?
(PAGE_SIZE - poffset) : resid;
KASSERT(presid >= 0, ("brelse: extra page"));
while (vm_page_xbusied(m)) {
vm_page_lock(m);
VM_OBJECT_WUNLOCK(obj);
vm_page_busy_sleep(m, "mbncsh", true);
VM_OBJECT_WLOCK(obj);
}
while (vm_page_xbusied(m))
vm_page_sleep_if_xbusy(m, "mbncsh");
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
vm_page_release_locked(m, flags);
@ -4565,10 +4561,7 @@ vfs_drain_busy_pages(struct buf *bp)
for (; last_busied < i; last_busied++)
vm_page_sbusy(bp->b_pages[last_busied]);
while (vm_page_xbusied(m)) {
vm_page_lock(m);
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
vm_page_busy_sleep(m, "vbpage", true);
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
vm_page_sleep_if_xbusy(m, "vbpage");
}
}
}

View File

@ -219,10 +219,7 @@ phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
} else if (vm_page_xbusied(m)) {
vm_page_lock(m);
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(m, "physb", true);
VM_OBJECT_WLOCK(object);
vm_page_sleep_if_xbusy(m, "physb");
goto retry;
} else {
vm_page_xbusy(m);

View File

@ -510,7 +510,7 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
*m_hold = &m[i];
vm_page_wire(&m[i]);
}
vm_page_xunbusy_maybelocked(&m[i]);
vm_page_xunbusy(&m[i]);
}
if (m_mtx != NULL)
mtx_unlock(m_mtx);

View File

@ -1160,9 +1160,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
("vm_object_madvise: page %p is not managed", tm));
if (vm_page_busied(tm)) {
if (object != tobject)
VM_OBJECT_WUNLOCK(tobject);
vm_page_lock(tm);
VM_OBJECT_WUNLOCK(object);
VM_OBJECT_WUNLOCK(object);
if (advice == MADV_WILLNEED) {
/*
* Reference the page before unlocking and
@ -1345,10 +1343,7 @@ vm_object_split(vm_map_entry_t entry)
*/
if (vm_page_busied(m)) {
VM_OBJECT_WUNLOCK(new_object);
vm_page_lock(m);
VM_OBJECT_WUNLOCK(orig_object);
vm_page_busy_sleep(m, "spltwt", false);
VM_OBJECT_WLOCK(orig_object);
vm_page_sleep_if_busy(m, "spltwt");
VM_OBJECT_WLOCK(new_object);
goto retry;
}
@ -1415,15 +1410,16 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next,
("invalid ownership %p %p %p", p, object, backing_object));
if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
return (next);
if (p != NULL)
vm_page_lock(p);
VM_OBJECT_WUNLOCK(object);
VM_OBJECT_WUNLOCK(backing_object);
/* The page is only NULL when rename fails. */
if (p == NULL)
if (p == NULL) {
vm_radix_wait();
else
} else {
if (p->object == object)
VM_OBJECT_WUNLOCK(backing_object);
else
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(p, "vmocol", false);
}
VM_OBJECT_WLOCK(object);
VM_OBJECT_WLOCK(backing_object);
return (TAILQ_FIRST(&backing_object->memq));
@ -1837,7 +1833,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
int options)
{
vm_page_t p, next;
struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@ -1848,7 +1843,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
vm_object_pip_add(object, 1);
again:
p = vm_page_find_least(object, start);
mtx = NULL;
/*
* Here, the variable "p" is either (1) the page with the least pindex
@ -1865,17 +1859,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
* however, be invalidated if the option OBJPR_CLEANONLY is
* not specified.
*/
vm_page_change_lock(p, &mtx);
if (vm_page_xbusied(p)) {
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(p, "vmopax", true);
VM_OBJECT_WLOCK(object);
goto again;
}
if (vm_page_busied(p)) {
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(p, "vmopar", false);
VM_OBJECT_WLOCK(object);
vm_page_sleep_if_busy(p, "vmopar");
goto again;
}
if (vm_page_wired(p)) {
@ -1904,8 +1889,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
goto wired;
vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
vm_object_pip_wakeup(object);
}
@ -2190,8 +2173,7 @@ vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
m = TAILQ_NEXT(m, listq);
}
if (vm_page_xbusied(tm)) {
vm_page_lock(tm);
for (tobject = object; locked_depth >= 1;
for (tobject = object; locked_depth > 1;
locked_depth--) {
t1object = tobject->backing_object;
VM_OBJECT_RUNLOCK(tobject);

View File

@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sleepqueue.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
@ -873,27 +874,17 @@ void
vm_page_busy_downgrade(vm_page_t m)
{
u_int x;
bool locked;
vm_page_assert_xbusied(m);
locked = mtx_owned(vm_page_lockptr(m));
x = m->busy_lock;
for (;;) {
x = m->busy_lock;
x &= VPB_BIT_WAITERS;
if (x != 0 && !locked)
vm_page_lock(m);
if (atomic_cmpset_rel_int(&m->busy_lock,
VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1)))
if (atomic_fcmpset_rel_int(&m->busy_lock,
&x, VPB_SHARERS_WORD(1)))
break;
if (x != 0 && !locked)
vm_page_unlock(m);
}
if (x != 0) {
if ((x & VPB_BIT_WAITERS) != 0)
wakeup(m);
if (!locked)
vm_page_unlock(m);
}
}
/*
@ -920,35 +911,23 @@ vm_page_sunbusy(vm_page_t m)
{
u_int x;
vm_page_lock_assert(m, MA_NOTOWNED);
vm_page_assert_sbusied(m);
x = m->busy_lock;
for (;;) {
x = m->busy_lock;
if (VPB_SHARERS(x) > 1) {
if (atomic_cmpset_int(&m->busy_lock, x,
if (atomic_fcmpset_int(&m->busy_lock, &x,
x - VPB_ONE_SHARER))
break;
continue;
}
if ((x & VPB_BIT_WAITERS) == 0) {
KASSERT(x == VPB_SHARERS_WORD(1),
("vm_page_sunbusy: invalid lock state"));
if (atomic_cmpset_int(&m->busy_lock,
VPB_SHARERS_WORD(1), VPB_UNBUSIED))
break;
KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
("vm_page_sunbusy: invalid lock state"));
if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
continue;
}
KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
("vm_page_sunbusy: invalid lock state for waiters"));
vm_page_lock(m);
if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
vm_page_unlock(m);
continue;
}
if ((x & VPB_BIT_WAITERS) == 0)
break;
wakeup(m);
vm_page_unlock(m);
break;
}
}
@ -956,28 +935,35 @@ vm_page_sunbusy(vm_page_t m)
/*
* vm_page_busy_sleep:
*
* Sleep and release the page lock, using the page pointer as wchan.
* Sleep if the page is busy, using the page pointer as wchan.
* This is used to implement the hard-path of busying mechanism.
*
* The given page must be locked.
*
* If nonshared is true, sleep only if the page is xbusy.
*
* The object lock must be held on entry and will be released on exit.
*/
void
vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared)
{
vm_object_t obj;
u_int x;
vm_page_assert_locked(m);
obj = m->object;
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_LOCKED(obj);
sleepq_lock(m);
x = m->busy_lock;
if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) ||
((x & VPB_BIT_WAITERS) == 0 &&
!atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) {
vm_page_unlock(m);
VM_OBJECT_DROP(obj);
sleepq_release(m);
return;
}
msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
VM_OBJECT_DROP(obj);
sleepq_add(m, NULL, wmesg, 0, 0);
sleepq_wait(m, PVM);
}
/*
@ -992,55 +978,20 @@ vm_page_trysbusy(vm_page_t m)
{
u_int x;
x = m->busy_lock;
for (;;) {
x = m->busy_lock;
if ((x & VPB_BIT_SHARED) == 0)
return (0);
if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER))
if (atomic_fcmpset_acq_int(&m->busy_lock, &x,
x + VPB_ONE_SHARER))
return (1);
}
}
static void
vm_page_xunbusy_locked(vm_page_t m)
{
vm_page_assert_xbusied(m);
vm_page_assert_locked(m);
atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
/* There is a waiter, do wakeup() instead of vm_page_flash(). */
wakeup(m);
}
void
vm_page_xunbusy_maybelocked(vm_page_t m)
{
bool lockacq;
vm_page_assert_xbusied(m);
/*
* Fast path for unbusy. If it succeeds, we know that there
* are no waiters, so we do not need a wakeup.
*/
if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER,
VPB_UNBUSIED))
return;
lockacq = !mtx_owned(vm_page_lockptr(m));
if (lockacq)
vm_page_lock(m);
vm_page_xunbusy_locked(m);
if (lockacq)
vm_page_unlock(m);
}
/*
* vm_page_xunbusy_hard:
*
* Called after the first try the exclusive unbusy of a page failed.
* It is assumed that the waiters bit is on.
* Called when unbusy has failed because there is a waiter.
*/
void
vm_page_xunbusy_hard(vm_page_t m)
@ -1048,34 +999,10 @@ vm_page_xunbusy_hard(vm_page_t m)
vm_page_assert_xbusied(m);
vm_page_lock(m);
vm_page_xunbusy_locked(m);
vm_page_unlock(m);
}
/*
* vm_page_flash:
*
* Wakeup anyone waiting for the page.
* The ownership bits do not change.
*
* The given page must be locked.
*/
void
vm_page_flash(vm_page_t m)
{
u_int x;
vm_page_lock_assert(m, MA_OWNED);
for (;;) {
x = m->busy_lock;
if ((x & VPB_BIT_WAITERS) == 0)
return;
if (atomic_cmpset_int(&m->busy_lock, x,
x & (~VPB_BIT_WAITERS)))
break;
}
/*
* Wake the waiter.
*/
atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
wakeup(m);
}
@ -1264,7 +1191,7 @@ vm_page_readahead_finish(vm_page_t m)
/*
* vm_page_sleep_if_busy:
*
* Sleep and release the page queues lock if the page is busied.
* Sleep and release the object lock if the page is busied.
* Returns TRUE if the thread slept.
*
* The given page must be unlocked and object containing it must
@ -1287,8 +1214,6 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg)
* held by the callers.
*/
obj = m->object;
vm_page_lock(m);
VM_OBJECT_WUNLOCK(obj);
vm_page_busy_sleep(m, msg, false);
VM_OBJECT_WLOCK(obj);
return (TRUE);
@ -1296,6 +1221,39 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg)
return (FALSE);
}
/*
* vm_page_sleep_if_xbusy:
*
* Sleep and release the object lock if the page is xbusied.
* Returns TRUE if the thread slept.
*
* The given page must be unlocked and object containing it must
* be locked.
*/
int
vm_page_sleep_if_xbusy(vm_page_t m, const char *msg)
{
vm_object_t obj;
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (vm_page_xbusied(m)) {
/*
* The page-specific object must be cached because page
* identity can change during the sleep, causing the
* re-lock of a different object.
* It is assumed that a reference to the object is already
* held by the callers.
*/
obj = m->object;
vm_page_busy_sleep(m, msg, true);
VM_OBJECT_WLOCK(obj);
return (TRUE);
}
return (FALSE);
}
/*
* vm_page_dirty_KBI: [ internal use only ]
*
@ -1452,7 +1410,7 @@ vm_page_object_remove(vm_page_t m)
KASSERT((m->ref_count & VPRC_OBJREF) != 0,
("page %p is missing its object ref", m));
if (vm_page_xbusied(m))
vm_page_xunbusy_maybelocked(m);
vm_page_xunbusy(m);
mrem = vm_radix_remove(&object->rtree, m->pindex);
KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
@ -1598,7 +1556,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
mold->object = NULL;
atomic_clear_int(&mold->ref_count, VPRC_OBJREF);
vm_page_xunbusy_maybelocked(mold);
vm_page_xunbusy(mold);
/*
* The object's resident_page_count does not change because we have
@ -4089,8 +4047,6 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
* likely to reclaim it.
*/
vm_page_aflag_set(m, PGA_REFERENCED);
vm_page_lock(m);
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(m, "pgrbwt", (allocflags &
VM_ALLOC_IGN_SBUSY) != 0);
VM_OBJECT_WLOCK(object);
@ -4188,8 +4144,6 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
* likely to reclaim it.
*/
vm_page_aflag_set(m, PGA_REFERENCED);
vm_page_lock(m);
VM_OBJECT_WUNLOCK(object);
vm_page_busy_sleep(m, "grbmaw", (allocflags &
VM_ALLOC_IGN_SBUSY) != 0);
VM_OBJECT_WLOCK(object);

View File

@ -541,7 +541,6 @@ malloc2vm_flags(int malloc_flags)
void vm_page_busy_downgrade(vm_page_t m);
void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
void vm_page_flash(vm_page_t m);
void vm_page_free(vm_page_t m);
void vm_page_free_zero(vm_page_t m);
@ -604,6 +603,7 @@ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
void vm_page_set_valid_range(vm_page_t m, int base, int size);
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq);
@ -618,7 +618,6 @@ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_wire(vm_page_t);
bool vm_page_wire_mapped(vm_page_t m);
void vm_page_xunbusy_hard(vm_page_t m);
void vm_page_xunbusy_maybelocked(vm_page_t m);
void vm_page_set_validclean (vm_page_t, int, int);
void vm_page_clear_dirty (vm_page_t, int, int);
void vm_page_set_invalid (vm_page_t, int, int);