freebsd-dev/sys/vm/phys_pager.c
Alan Cox b5e8f167b9 Consider a scenario in which one processor, call it Pt, is performing
vm_object_terminate() on a device-backed object at the same time that
another processor, call it Pa, is performing dev_pager_alloc() on the
same device.  The problem is that vm_pager_object_lookup() should not be
allowed to return a doomed object, i.e., an object with OBJ_DEAD set,
but it does.  In detail, the unfortunate sequence of events is: Pt in
vm_object_terminate() holds the doomed object's lock and sets OBJ_DEAD
on the object.  Pa in dev_pager_alloc() holds dev_pager_sx and calls
vm_pager_object_lookup(), which returns the doomed object.  Next, Pa
calls vm_object_reference(), which requires the doomed object's lock, so
Pa waits for Pt to release the doomed object's lock.  Pt proceeds to the
point in vm_object_terminate() where it releases the doomed object's
lock.  Pa is now able to complete vm_object_reference() because it can
now complete the acquisition of the doomed object's lock.  So, now the
doomed object has a reference count of one!  Pa releases dev_pager_sx
and returns the doomed object from dev_pager_alloc().  Pt now acquires
dev_pager_mtx, removes the doomed object from dev_pager_object_list,
releases dev_pager_mtx, and finally calls uma_zfree with the doomed
object.  However, the doomed object is still in use by Pa.

Repeating my key point, vm_pager_object_lookup() must not return a
doomed object.  Moreover, the test for the object's state, i.e.,
doomed or not, and the increment of the object's reference count
should be carried out atomically.

Reviewed by:	kib
Approved by:	re (kensmith)
MFC after:	3 weeks
2007-08-05 21:04:32 +00:00

200 lines
5.2 KiB
C

/*-
* Copyright (c) 2000 Peter Wemm
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/linker_set.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
/* prevent concurrent creation races */
static int phys_pager_alloc_lock;
/* list of device pager objects */
static struct pagerlst phys_pager_object_list;
/* protect access to phys_pager_object_list */
static struct mtx phys_pager_mtx;
static void
phys_pager_init(void)
{
TAILQ_INIT(&phys_pager_object_list);
mtx_init(&phys_pager_mtx, "phys_pager list", NULL, MTX_DEF);
}
/*
* MPSAFE
*/
static vm_object_t
phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff)
{
vm_object_t object;
vm_pindex_t pindex;
/*
* Offset should be page aligned.
*/
if (foff & PAGE_MASK)
return (NULL);
pindex = OFF_TO_IDX(foff + PAGE_MASK + size);
if (handle != NULL) {
mtx_lock(&Giant);
/*
* Lock to prevent object creation race condition.
*/
while (phys_pager_alloc_lock) {
phys_pager_alloc_lock = -1;
tsleep(&phys_pager_alloc_lock, PVM, "swpalc", 0);
}
phys_pager_alloc_lock = 1;
/*
* Look up pager, creating as necessary.
*/
object = vm_pager_object_lookup(&phys_pager_object_list, handle);
if (object == NULL) {
/*
* Allocate object and associate it with the pager.
*/
object = vm_object_allocate(OBJT_PHYS, pindex);
object->handle = handle;
mtx_lock(&phys_pager_mtx);
TAILQ_INSERT_TAIL(&phys_pager_object_list, object,
pager_object_list);
mtx_unlock(&phys_pager_mtx);
} else {
if (pindex > object->size)
object->size = pindex;
}
if (phys_pager_alloc_lock == -1)
wakeup(&phys_pager_alloc_lock);
phys_pager_alloc_lock = 0;
mtx_unlock(&Giant);
} else {
object = vm_object_allocate(OBJT_PHYS, pindex);
}
return (object);
}
/*
* MPSAFE
*/
static void
phys_pager_dealloc(vm_object_t object)
{
if (object->handle != NULL) {
mtx_lock(&phys_pager_mtx);
TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
mtx_unlock(&phys_pager_mtx);
}
}
/*
* Fill as many pages as vm_fault has allocated for us.
*/
static int
phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
{
int i;
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
for (i = 0; i < count; i++) {
if (m[i]->valid == 0) {
if ((m[i]->flags & PG_ZERO) == 0)
pmap_zero_page(m[i]);
m[i]->valid = VM_PAGE_BITS_ALL;
}
KASSERT(m[i]->valid == VM_PAGE_BITS_ALL,
("phys_pager_getpages: partially valid page %p", m[i]));
m[i]->dirty = 0;
/* The requested page must remain busy, the others not. */
if (reqpage != i) {
m[i]->oflags &= ~VPO_BUSY;
m[i]->busy = 0;
}
}
return (VM_PAGER_OK);
}
static void
phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync,
int *rtvals)
{
panic("phys_pager_putpage called");
}
/*
* Implement a pretty aggressive clustered getpages strategy. Hint that
* everything in an entire 4MB window should be prefaulted at once.
*
* XXX 4MB (1024 slots per page table page) is convenient for x86,
* but may not be for other arches.
*/
#ifndef PHYSCLUSTER
#define PHYSCLUSTER 1024
#endif
static boolean_t
phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
int *after)
{
vm_pindex_t base, end;
base = pindex & (~(PHYSCLUSTER - 1));
end = base + (PHYSCLUSTER - 1);
if (before != NULL)
*before = pindex - base;
if (after != NULL)
*after = end - pindex;
return (TRUE);
}
struct pagerops physpagerops = {
.pgo_init = phys_pager_init,
.pgo_alloc = phys_pager_alloc,
.pgo_dealloc = phys_pager_dealloc,
.pgo_getpages = phys_pager_getpages,
.pgo_putpages = phys_pager_putpages,
.pgo_haspage = phys_pager_haspage,
};