(3/6) Add a shared object busy synchronization mechanism that blocks new page

busy acquires while held.

This allows code that would need to acquire and release a very large number
of page busy locks to use the old mechanism where busy is only checked and
not held.  This comes at the cost of false positives but never false
negatives which the single consumer, vm_fault_soft_fast(), handles.

Reviewed by:    kib
Tested by:      pho
Sponsored by:   Netflix, Intel
Differential Revision:	https://reviews.freebsd.org/D21592
This commit is contained in:
Jeff Roberson 2019-10-15 03:41:36 +00:00
parent 8da1c09853
commit 205be21d99
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=353538
16 changed files with 188 additions and 85 deletions

View File

@ -5805,8 +5805,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
va >= kmi.clean_eva,
("pmap_enter: managed mapping within the clean submap"));
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
("pmap_enter: flags %u has reserved bits set", flags));
pa = VM_PAGE_TO_PHYS(m);

View File

@ -2979,8 +2979,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
pa = systempage.pv_pa;
m = NULL;
} else {
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
}
nflags = 0;

View File

@ -3876,8 +3876,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
va >= kmi.clean_eva,
("%s: managed mapping within the clean submap", __func__));
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
("%s: flags %u has reserved bits set", __func__, flags));
pa = VM_PAGE_TO_PHYS(m);

View File

@ -3181,8 +3181,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
int lvl, rv;
va = trunc_page(va);
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
L3_PAGE);

View File

@ -3634,8 +3634,8 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m,
KASSERT(pmap != kernel_pmap || (m->oflags & VPO_UNMANAGED) != 0 ||
va < kmi.clean_sva || va >= kmi.clean_eva,
("pmap_enter: managed mapping within the clean submap"));
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
("pmap_enter: flags %u has reserved bits set", flags));
pa = VM_PAGE_TO_PHYS(m);

View File

@ -2057,8 +2057,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
va >= kmi.clean_eva,
("pmap_enter: managed mapping within the clean submap"));
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, flags, prot);
if ((flags & PMAP_ENTER_WIRED) != 0)

View File

@ -1149,8 +1149,8 @@ moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if (pmap_bootstrapped)
rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
if ((m->oflags & VPO_UNMANAGED) != 0 || !moea_initialized) {
pvo_head = &moea_pvo_kunmanaged;

View File

@ -1406,8 +1406,8 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
uint64_t pte_lo;
int error;
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pvo = alloc_pvo_entry(0);
if (pvo == NULL)

View File

@ -2278,8 +2278,8 @@ mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
KASSERT((va <= VM_MAXUSER_ADDRESS),
("mmu_booke_enter_locked: user pmap, non user va"));
}
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);

View File

@ -2650,8 +2650,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
bool nosleep;
va = trunc_page(va);
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
pn = (pa / PAGE_SIZE);

View File

@ -1500,8 +1500,8 @@ pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
rw_assert(&tte_list_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pm, MA_OWNED);
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
PMAP_STATS_INC(pmap_nenter);
pa = VM_PAGE_TO_PHYS(m);
wired = (flags & PMAP_ENTER_WIRED) != 0;

View File

@ -281,11 +281,14 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
int psind, rv;
MPASS(fs->vp == NULL);
vm_object_busy(fs->first_object);
m = vm_page_lookup(fs->first_object, fs->first_pindex);
/* A busy page can be mapped for read|execute access. */
if (m == NULL || ((prot & VM_PROT_WRITE) != 0 &&
vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL)
return (KERN_FAILURE);
vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) {
rv = KERN_FAILURE;
goto out;
}
m_map = m;
psind = 0;
#if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \
@ -323,7 +326,7 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type |
PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind);
if (rv != KERN_SUCCESS)
return (rv);
goto out;
if (m_hold != NULL) {
*m_hold = m;
vm_page_wire(m);
@ -334,7 +337,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
VM_OBJECT_RUNLOCK(fs->first_object);
vm_map_lookup_done(fs->map, fs->entry);
curthread->td_ru.ru_minflt++;
return (KERN_SUCCESS);
out:
vm_object_unbusy(fs->first_object);
return (rv);
}
static void
@ -1351,8 +1357,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
if (hardfault)
fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE;
vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, true);
vm_page_assert_xbusied(fs.m);
vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, true);
/*
* Page must be completely valid or it is not fit to

View File

@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
#include <sys/refcount.h>
#include <sys/rwlock.h>
#include <sys/user.h>
#include <sys/vnode.h>
@ -198,6 +199,9 @@ vm_object_zdtor(void *mem, int size, void *arg)
KASSERT(REFCOUNT_COUNT(object->paging_in_progress) == 0,
("object %p paging_in_progress = %d",
object, REFCOUNT_COUNT(object->paging_in_progress)));
KASSERT(object->busy == 0,
("object %p busy = %d",
object, object->busy));
KASSERT(object->resident_page_count == 0,
("object %p resident_page_count = %d",
object, object->resident_page_count));
@ -223,6 +227,7 @@ vm_object_zinit(void *mem, int size, int flags)
object->ref_count = 0;
vm_radix_init(&object->rtree);
refcount_init(&object->paging_in_progress, 0);
refcount_init(&object->busy, 0);
object->resident_page_count = 0;
object->shadow_count = 0;
object->flags = OBJ_DEAD;
@ -2239,6 +2244,42 @@ vm_object_vnode(vm_object_t object)
return (vp);
}
/*
* Busy the vm object. This prevents new pages belonging to the object from
* becoming busy. Existing pages persist as busy. Callers are responsible
* for checking page state before proceeding.
*/
void
vm_object_busy(vm_object_t obj)
{
VM_OBJECT_ASSERT_LOCKED(obj);
refcount_acquire(&obj->busy);
/* The fence is required to order loads of page busy. */
atomic_thread_fence_acq_rel();
}
void
vm_object_unbusy(vm_object_t obj)
{
VM_OBJECT_ASSERT_LOCKED(obj);
refcount_release(&obj->busy);
}
void
vm_object_busy_wait(vm_object_t obj, const char *wmesg)
{
VM_OBJECT_ASSERT_UNLOCKED(obj);
if (obj->busy)
refcount_sleep(&obj->busy, wmesg, PVM);
}
/*
* Return the kvme type of the given object.
* If vpp is not NULL, set it to the object's vm_object_vnode() or NULL.

View File

@ -84,6 +84,7 @@
* vm_object_t Virtual memory object.
*
* List of locks
* (a) atomic
* (c) const until freed
* (o) per-object lock
* (f) free pages queue mutex
@ -112,6 +113,7 @@ struct vm_object {
u_short flags; /* see below */
u_short pg_color; /* (c) color of first page in obj */
volatile u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */
volatile u_int busy; /* (a) object is busy, disallow page busy. */
int resident_page_count; /* number of resident pages */
struct vm_object *backing_object; /* object that I'm a shadow of */
vm_ooffset_t backing_object_offset;/* Offset in backing object */
@ -313,6 +315,18 @@ void vm_object_pip_wakeupn(vm_object_t object, short i);
void vm_object_pip_wait(vm_object_t object, char *waitid);
void vm_object_pip_wait_unlocked(vm_object_t object, char *waitid);
void vm_object_busy(vm_object_t object);
void vm_object_unbusy(vm_object_t object);
void vm_object_busy_wait(vm_object_t object, const char *wmesg);
static inline bool
vm_object_busied(vm_object_t object)
{
return (object->busy != 0);
}
#define VM_OBJECT_ASSERT_BUSY(object) MPASS(vm_object_busied((object)))
void umtx_shm_object_init(vm_object_t object);
void umtx_shm_object_terminated(vm_object_t object);
extern int umtx_shm_vnobj_persistent;

View File

@ -180,6 +180,8 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m);
static void _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
const char *wmesg, bool nonshared, bool locked);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_dequeue_complete(vm_page_t m);
static void vm_page_enqueue(vm_page_t m, uint8_t queue);
@ -899,7 +901,6 @@ int
vm_page_busy_acquire(vm_page_t m, int allocflags)
{
vm_object_t obj;
u_int x;
bool locked;
/*
@ -920,27 +921,13 @@ vm_page_busy_acquire(vm_page_t m, int allocflags)
}
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (FALSE);
if (obj != NULL) {
if (obj != NULL)
locked = VM_OBJECT_WOWNED(obj);
} else {
MPASS(vm_page_wired(m));
else
locked = FALSE;
}
sleepq_lock(m);
x = m->busy_lock;
if (x == VPB_UNBUSIED ||
((allocflags & VM_ALLOC_SBUSY) != 0 &&
(x & VPB_BIT_SHARED) != 0) ||
((x & VPB_BIT_WAITERS) == 0 &&
!atomic_cmpset_int(&m->busy_lock, x,
x | VPB_BIT_WAITERS))) {
sleepq_release(m);
continue;
}
if (locked)
VM_OBJECT_WUNLOCK(obj);
sleepq_add(m, NULL, "vmpba", 0, 0);
sleepq_wait(m, PVM);
MPASS(locked || vm_page_wired(m));
_vm_page_busy_sleep(obj, m, "vmpba",
(allocflags & VM_ALLOC_SBUSY) != 0, locked);
if (locked)
VM_OBJECT_WLOCK(obj);
MPASS(m->object == obj || m->object == NULL);
@ -1056,22 +1043,42 @@ void
vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared)
{
vm_object_t obj;
u_int x;
obj = m->object;
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_LOCKED(obj);
vm_page_lock_assert(m, MA_NOTOWNED);
_vm_page_busy_sleep(obj, m, wmesg, nonshared, true);
}
static void
_vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg,
bool nonshared, bool locked)
{
u_int x;
/*
* If the object is busy we must wait for that to drain to zero
* before trying the page again.
*/
if (obj != NULL && vm_object_busied(obj)) {
if (locked)
VM_OBJECT_DROP(obj);
vm_object_busy_wait(obj, wmesg);
return;
}
sleepq_lock(m);
x = m->busy_lock;
if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) ||
((x & VPB_BIT_WAITERS) == 0 &&
!atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) {
VM_OBJECT_DROP(obj);
if (locked)
VM_OBJECT_DROP(obj);
sleepq_release(m);
return;
}
VM_OBJECT_DROP(obj);
if (locked)
VM_OBJECT_DROP(obj);
sleepq_add(m, NULL, wmesg, 0, 0);
sleepq_wait(m, PVM);
}
@ -1086,16 +1093,56 @@ vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared)
int
vm_page_trysbusy(vm_page_t m)
{
vm_object_t obj;
u_int x;
obj = m->object;
x = m->busy_lock;
for (;;) {
if ((x & VPB_BIT_SHARED) == 0)
return (0);
/*
* Reduce the window for transient busies that will trigger
* false negatives in vm_page_ps_test().
*/
if (obj != NULL && vm_object_busied(obj))
return (0);
if (atomic_fcmpset_acq_int(&m->busy_lock, &x,
x + VPB_ONE_SHARER))
return (1);
break;
}
/* Refetch the object now that we're guaranteed that it is stable. */
obj = m->object;
if (obj != NULL && vm_object_busied(obj)) {
vm_page_sunbusy(m);
return (0);
}
return (1);
}
/*
* vm_page_tryxbusy:
*
* Try to exclusive busy a page.
* If the operation succeeds 1 is returned otherwise 0.
* The operation never sleeps.
*/
int
vm_page_tryxbusy(vm_page_t m)
{
vm_object_t obj;
if (atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED,
VPB_SINGLE_EXCLUSIVER) == 0)
return (0);
obj = m->object;
if (obj != NULL && vm_object_busied(obj)) {
vm_page_xunbusy(m);
return (0);
}
return (1);
}
/*
@ -1317,15 +1364,15 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg)
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (vm_page_busied(m)) {
/*
* The page-specific object must be cached because page
* identity can change during the sleep, causing the
* re-lock of a different object.
* It is assumed that a reference to the object is already
* held by the callers.
*/
obj = m->object;
/*
* The page-specific object must be cached because page
* identity can change during the sleep, causing the
* re-lock of a different object.
* It is assumed that a reference to the object is already
* held by the callers.
*/
obj = m->object;
if (vm_page_busied(m) || (obj != NULL && obj->busy)) {
vm_page_busy_sleep(m, msg, false);
VM_OBJECT_WLOCK(obj);
return (TRUE);
@ -1350,15 +1397,15 @@ vm_page_sleep_if_xbusy(vm_page_t m, const char *msg)
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (vm_page_xbusied(m)) {
/*
* The page-specific object must be cached because page
* identity can change during the sleep, causing the
* re-lock of a different object.
* It is assumed that a reference to the object is already
* held by the callers.
*/
obj = m->object;
/*
* The page-specific object must be cached because page
* identity can change during the sleep, causing the
* re-lock of a different object.
* It is assumed that a reference to the object is already
* held by the callers.
*/
obj = m->object;
if (vm_page_xbusied(m) || (obj != NULL && obj->busy)) {
vm_page_busy_sleep(m, msg, true);
VM_OBJECT_WLOCK(obj);
return (TRUE);
@ -4883,17 +4930,15 @@ vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
#ifdef INVARIANTS
void
vm_page_object_lock_assert(vm_page_t m)
vm_page_object_busy_assert(vm_page_t m)
{
/*
* Certain of the page's fields may only be modified by the
* holder of the containing object's lock or the exclusive busy.
* holder. Unfortunately, the holder of the write busy is
* not recorded, and thus cannot be checked here.
* holder of a page or object busy.
*/
if (m->object != NULL && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (m->object != NULL && !vm_page_busied(m))
VM_OBJECT_ASSERT_BUSY(m->object);
}
void
@ -4911,7 +4956,7 @@ vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("PGA_WRITEABLE on unmanaged page"));
if (!vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
VM_OBJECT_ASSERT_BUSY(m->object);
}
#endif

View File

@ -615,6 +615,7 @@ void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq);
bool vm_page_try_remove_all(vm_page_t m);
bool vm_page_try_remove_write(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
int vm_page_tryxbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);
void vm_page_unwire(vm_page_t m, uint8_t queue);
@ -666,10 +667,6 @@ void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
(m)); \
} while (0)
#define vm_page_tryxbusy(m) \
(atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED, \
VPB_SINGLE_EXCLUSIVER))
#define vm_page_xbusied(m) \
(((m)->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0)
@ -687,13 +684,13 @@ void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
} while (0)
#ifdef INVARIANTS
void vm_page_object_lock_assert(vm_page_t m);
#define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m)
void vm_page_object_busy_assert(vm_page_t m);
#define VM_PAGE_OBJECT_BUSY_ASSERT(m) vm_page_object_busy_assert(m)
void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits);
#define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \
vm_page_assert_pga_writeable(m, bits)
#else
#define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0
#define VM_PAGE_OBJECT_BUSY_ASSERT(m) (void)0
#define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0
#endif
@ -835,7 +832,7 @@ static __inline void
vm_page_undirty(vm_page_t m)
{
VM_PAGE_OBJECT_LOCK_ASSERT(m);
VM_PAGE_OBJECT_BUSY_ASSERT(m);
m->dirty = 0;
}