Ensure that queue state is cleared when vm_page_dequeue() returns.

Per-page queue state is updated non-atomically, with either the page
lock or the page queue lock held.  When vm_page_dequeue() is called
without the page lock, in rare cases a different thread may be
concurrently dequeuing the page with the pagequeue lock held.  Because
of the non-atomic update, vm_page_dequeue() might return before queue
state is completely updated, which can lead to race conditions.

Restrict the vm_page_dequeue() interface so that it must be called
either with the page lock held or on a free page, and busy wait when
a different thread is concurrently updating queue state, which must
happen in a critical section.

While here, do some related cleanup: inline vm_page_dequeue_locked()
into its only caller and delete a prototype for the unimplemented
vm_page_requeue_locked().  Replace the volatile qualifier for "queue"
added in r333703 with explicit uses of atomic_load_8() where required.

Reported and tested by:	pho
Reviewed by:	alc
Differential Revision:	https://reviews.freebsd.org/D15980
This commit is contained in:
Mark Johnston 2018-08-23 20:34:22 +00:00
parent 608226d559
commit 99d92d732f
2 changed files with 49 additions and 50 deletions

View File

@ -521,7 +521,7 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
m->wire_count = 0;
m->busy_lock = VPB_UNBUSIED;
m->hold_count = 0;
m->flags = 0;
m->flags = m->aflags = 0;
m->phys_addr = pa;
m->queue = PQ_NONE;
m->psind = 0;
@ -2148,8 +2148,9 @@ vm_page_alloc_check(vm_page_t m)
{
KASSERT(m->object == NULL, ("page %p has object", m));
KASSERT(m->queue == PQ_NONE,
("page %p has unexpected queue %d", m, m->queue));
KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
("page %p has unexpected queue %d, flags %#x",
m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK)));
KASSERT(!vm_page_held(m), ("page %p is held", m));
KASSERT(!vm_page_busied(m), ("page %p is busy", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
@ -3090,7 +3091,7 @@ vm_page_pagequeue_lockptr(vm_page_t m)
{
uint8_t queue;
if ((queue = m->queue) == PQ_NONE)
if ((queue = atomic_load_8(&m->queue)) == PQ_NONE)
return (NULL);
return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue].pq_mutex);
}
@ -3101,6 +3102,7 @@ vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m)
struct vm_domain *vmd;
uint8_t aflags;
CRITICAL_ASSERT(curthread);
vm_pagequeue_assert_locked(pq);
KASSERT(pq == vm_page_pagequeue(m),
("page %p doesn't belong to %p", m, pq));
@ -3267,7 +3269,7 @@ vm_page_dequeue_deferred(vm_page_t m)
vm_page_assert_locked(m);
queue = m->queue;
queue = atomic_load_8(&m->queue);
if (queue == PQ_NONE) {
KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,
("page %p has queue state", m));
@ -3278,57 +3280,47 @@ vm_page_dequeue_deferred(vm_page_t m)
vm_pqbatch_submit_page(m, queue);
}
/*
* vm_page_dequeue_locked:
*
* Remove the page from its page queue, which must be locked.
* If the page lock is not held, there is no guarantee that the
* page will not be enqueued by another thread before this function
* returns. In this case, it is up to the caller to ensure that
* no other threads hold a reference to the page.
*
* The page queue lock must be held. If the page is not already
* logically dequeued, the page lock must be held as well.
*/
void
vm_page_dequeue_locked(vm_page_t m)
{
struct vm_pagequeue *pq;
pq = vm_page_pagequeue(m);
KASSERT(m->queue != PQ_NONE,
("%s: page %p queue field is PQ_NONE", __func__, m));
vm_pagequeue_assert_locked(pq);
KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
mtx_owned(vm_page_lockptr(m)),
("%s: queued unlocked page %p", __func__, m));
if ((m->aflags & PGA_ENQUEUED) != 0) {
TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_dec(pq);
}
vm_page_dequeue_complete(m);
}
/*
* vm_page_dequeue:
*
* Remove the page from whichever page queue it's in, if any.
* If the page lock is not held, there is no guarantee that the
* page will not be enqueued by another thread before this function
* returns. In this case, it is up to the caller to ensure that
* no other threads hold a reference to the page.
* The page must either be locked or unallocated. This constraint
* ensures that the queue state of the page will remain consistent
* after this function returns.
*/
void
vm_page_dequeue(vm_page_t m)
{
struct mtx *lock, *lock1;
struct vm_pagequeue *pq;
uint8_t aflags;
KASSERT(mtx_owned(vm_page_lockptr(m)) || m->order == VM_NFREEORDER,
("page %p is allocated and unlocked", m));
lock = vm_page_pagequeue_lockptr(m);
for (;;) {
if (lock == NULL)
return;
lock = vm_page_pagequeue_lockptr(m);
if (lock == NULL) {
/*
* A thread may be concurrently executing
* vm_page_dequeue_complete(). Ensure that all queue
* state is cleared before we return.
*/
aflags = atomic_load_8(&m->aflags);
if ((aflags & PGA_QUEUE_STATE_MASK) == 0)
return;
KASSERT((aflags & PGA_DEQUEUE) != 0,
("page %p has unexpected queue state flags %#x",
m, aflags));
/*
* Busy wait until the thread updating queue state is
* finished. Such a thread must be executing in a
* critical section.
*/
cpu_spinwait();
continue;
}
mtx_lock(lock);
if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock)
break;
@ -3337,7 +3329,16 @@ vm_page_dequeue(vm_page_t m)
}
KASSERT(lock == vm_page_pagequeue_lockptr(m),
("%s: page %p migrated directly between queues", __func__, m));
vm_page_dequeue_locked(m);
KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
mtx_owned(vm_page_lockptr(m)),
("%s: queued unlocked page %p", __func__, m));
if ((m->aflags & PGA_ENQUEUED) != 0) {
pq = vm_page_pagequeue(m);
TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_dec(pq);
}
vm_page_dequeue_complete(m);
mtx_unlock(lock);
}
@ -3376,7 +3377,7 @@ vm_page_requeue(vm_page_t m)
if ((m->aflags & PGA_REQUEUE) == 0)
vm_page_aflag_set(m, PGA_REQUEUE);
vm_pqbatch_submit_page(m, m->queue);
vm_pqbatch_submit_page(m, atomic_load_8(&m->queue));
}
/*

View File

@ -208,7 +208,7 @@ struct vm_page {
uint16_t flags; /* page PG_* flags (P) */
uint8_t aflags; /* access is atomic */
uint8_t oflags; /* page VPO_* flags (O) */
volatile uint8_t queue; /* page queue index (Q) */
uint8_t queue; /* page queue index (Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind; /* vm_phys segment index (C) */
uint8_t order; /* index of the buddy queue (F) */
@ -539,7 +539,6 @@ void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
void vm_page_dequeue_deferred(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
void vm_page_drain_pqbatch(void);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
bool vm_page_free_prep(vm_page_t m);
@ -565,7 +564,6 @@ int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex);
void vm_page_requeue(vm_page_t m);
void vm_page_requeue_locked(vm_page_t m);
int vm_page_sbusied(vm_page_t m);
vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);