Long, long ago in r27464 special case code for mapping device-backed

memory with 4MB pages was added to pmap_object_init_pt().  This code
assumes that the pages of a OBJT_DEVICE object are always physically
contiguous.  Unfortunately, this is not always the case.  For example,
jhb@ informs me that the recently introduced /dev/ksyms driver creates
a OBJT_DEVICE object that violates this assumption.  Thus, this
revision modifies pmap_object_init_pt() to abort the mapping if the
OBJT_DEVICE object's pages are not physically contiguous.  This
revision also changes some inconsistent if not buggy behavior.  For
example, the i386 version aborts if the first 4MB virtual page that
would be mapped is already valid.  However, it incorrectly replaces
any subsequent 4MB virtual page mappings that it encounters,
potentially leaking a page table page.  The amd64 version has a bug of
my own creation.  It potentially busies the wrong page and always an
insufficent number of pages if it blocks allocating a page table page.

To my knowledge, there have been no reports of these bugs, hence,
their persistance.  I suspect that the existing restrictions that
pmap_object_init_pt() placed on the OBJT_DEVICE objects that it would
choose to map, for example, that the first page must be aligned on a 2
or 4MB physical boundary and that the size of the mapping must be a
multiple of the large page size, were enough to avoid triggering the
bug for drivers like ksyms.  However, one side effect of testing the
OBJT_DEVICE object's pages for physical contiguity is that a dubious
difference between pmap_object_init_pt() and the standard path for
mapping devices pages, i.e., vm_fault(), has been eliminated.
Previously, pmap_object_init_pt() would only instantiate the first
PG_FICTITOUS page being mapped because it never examined the rest.
Now, however, pmap_object_init_pt() uses the new function
vm_object_populate() to instantiate them all (in order to support
testing their physical contiguity).  These pages need to be
instantiated for the mechanism that I have prototyped for
automatically maintaining the consistency of the PAT settings across
multiple mappings, particularly, amd64's direct mapping, to work.
(Translation: This change is also being made to support jhb@'s work on
the Nvidia feature requests.)

Discussed with:	jhb@
This commit is contained in:
alc 2009-06-14 19:51:43 +00:00
parent c2624ed238
commit 07cfd3813e
4 changed files with 133 additions and 92 deletions

View File

@ -3322,78 +3322,74 @@ void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
vm_pindex_t pindex, vm_size_t size)
{
vm_offset_t va;
pd_entry_t *pde;
vm_paddr_t pa, ptepa;
vm_page_t p, pdpg;
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
KASSERT(object->type == OBJT_DEVICE,
("pmap_object_init_pt: non-device object"));
if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
vm_page_t m[1];
pd_entry_t ptepa, *pde;
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, addr);
if (pde != 0 && (*pde & PG_V) != 0)
goto out;
PMAP_UNLOCK(pmap);
retry:
if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
if (!vm_object_populate(object, pindex, pindex + atop(size)))
return;
p = vm_page_lookup(object, pindex);
if (p != NULL) {
if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
goto retry;
} else {
p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
if (p == NULL)
return;
m[0] = p;
if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
vm_page_lock_queues();
vm_page_free(p);
vm_page_unlock_queues();
return;
}
p = vm_page_lookup(object, pindex);
vm_page_wakeup(p);
}
KASSERT(p->valid == VM_PAGE_BITS_ALL,
("pmap_object_init_pt: invalid page %p", p));
/*
* Abort the mapping if the first page is not physically
* aligned to a 2MB page boundary.
*/
ptepa = VM_PAGE_TO_PHYS(p);
if (ptepa & (NBPDR - 1))
return;
p->valid = VM_PAGE_BITS_ALL;
/*
* Skip the first page. Abort the mapping if the rest of
* the pages are not physically contiguous.
*/
p = TAILQ_NEXT(p, listq);
for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
pa += PAGE_SIZE) {
KASSERT(p->valid == VM_PAGE_BITS_ALL,
("pmap_object_init_pt: invalid page %p", p));
if (pa != VM_PAGE_TO_PHYS(p))
return;
p = TAILQ_NEXT(p, listq);
}
/* Map using 2MB pages. */
PMAP_LOCK(pmap);
for (va = addr; va < addr + size; va += NBPDR) {
while ((pdpg =
pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
PMAP_UNLOCK(pmap);
vm_page_busy(p);
VM_OBJECT_UNLOCK(object);
VM_WAIT;
VM_OBJECT_LOCK(object);
vm_page_wakeup(p);
PMAP_LOCK(pmap);
for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
if (pdpg == NULL) {
/*
* The creation of mappings below is only an
* optimization. If a page directory page
* cannot be allocated without blocking,
* continue on to the next mapping rather than
* blocking.
*/
addr += NBPDR;
continue;
}
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
pde = &pde[pmap_pde_index(va)];
pde = &pde[pmap_pde_index(addr)];
if ((*pde & PG_V) == 0) {
pde_store(pde, ptepa | PG_PS | PG_M | PG_A |
pde_store(pde, pa | PG_PS | PG_M | PG_A |
PG_U | PG_RW | PG_V);
pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
pmap->pm_stats.resident_count += NBPDR /
PAGE_SIZE;
pmap_pde_mappings++;
} else {
/* Continue on if the PDE is already valid. */
pdpg->wire_count--;
KASSERT(pdpg->wire_count > 0,
("pmap_object_init_pt: missing reference "
"to page directory page, va: 0x%lx", va));
"to page directory page, va: 0x%lx", addr));
}
ptepa += NBPDR;
addr += NBPDR;
}
pmap_invalidate_all(pmap);
out:
PMAP_UNLOCK(pmap);
}
}

View File

@ -3442,62 +3442,57 @@ void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
vm_pindex_t pindex, vm_size_t size)
{
pd_entry_t *pde;
vm_paddr_t pa, ptepa;
vm_page_t p;
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
KASSERT(object->type == OBJT_DEVICE,
("pmap_object_init_pt: non-device object"));
if (pseflag &&
((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
int i;
vm_page_t m[1];
unsigned int ptepindex;
int npdes;
pd_entry_t ptepa;
PMAP_LOCK(pmap);
if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
goto out;
PMAP_UNLOCK(pmap);
retry:
(addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
if (!vm_object_populate(object, pindex, pindex + atop(size)))
return;
p = vm_page_lookup(object, pindex);
if (p != NULL) {
if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
goto retry;
} else {
p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
if (p == NULL)
return;
m[0] = p;
if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
vm_page_lock_queues();
vm_page_free(p);
vm_page_unlock_queues();
return;
}
p = vm_page_lookup(object, pindex);
vm_page_wakeup(p);
}
KASSERT(p->valid == VM_PAGE_BITS_ALL,
("pmap_object_init_pt: invalid page %p", p));
/*
* Abort the mapping if the first page is not physically
* aligned to a 2/4MB page boundary.
*/
ptepa = VM_PAGE_TO_PHYS(p);
if (ptepa & (NBPDR - 1))
return;
p->valid = VM_PAGE_BITS_ALL;
PMAP_LOCK(pmap);
pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
npdes = size >> PDRSHIFT;
for(i = 0; i < npdes; i++) {
pde_store(&pmap->pm_pdir[ptepindex],
ptepa | PG_U | PG_RW | PG_V | PG_PS);
ptepa += NBPDR;
ptepindex += 1;
/*
* Skip the first page. Abort the mapping if the rest of
* the pages are not physically contiguous.
*/
p = TAILQ_NEXT(p, listq);
for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
pa += PAGE_SIZE) {
KASSERT(p->valid == VM_PAGE_BITS_ALL,
("pmap_object_init_pt: invalid page %p", p));
if (pa != VM_PAGE_TO_PHYS(p))
return;
p = TAILQ_NEXT(p, listq);
}
/* Map using 2/4MB pages. */
PMAP_LOCK(pmap);
for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
pde = pmap_pde(pmap, addr);
if (*pde == 0) {
pde_store(pde, pa | PG_PS | PG_M | PG_A |
PG_U | PG_RW | PG_V);
pmap->pm_stats.resident_count += NBPDR /
PAGE_SIZE;
pmap_pde_mappings++;
}
/* Else continue on if the PDE is already valid. */
addr += NBPDR;
}
pmap_invalidate_all(pmap);
out:
PMAP_UNLOCK(pmap);
}
}

View File

@ -1930,6 +1930,55 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
vm_page_cache_free(object, start, end);
}
/*
* Populate the specified range of the object with valid pages. Returns
* TRUE if the range is successfully populated and FALSE otherwise.
*
* Note: This function should be optimized to pass a larger array of
* pages to vm_pager_get_pages() before it is applied to a non-
* OBJT_DEVICE object.
*
* The object must be locked.
*/
boolean_t
vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
{
vm_page_t m, ma[1];
vm_pindex_t pindex;
int rv;
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
for (pindex = start; pindex < end; pindex++) {
m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL |
VM_ALLOC_RETRY);
if (m->valid != VM_PAGE_BITS_ALL) {
ma[0] = m;
rv = vm_pager_get_pages(object, ma, 1, 0);
m = vm_page_lookup(object, pindex);
if (m == NULL)
break;
if (rv != VM_PAGER_OK) {
vm_page_lock_queues();
vm_page_free(m);
vm_page_unlock_queues();
break;
}
}
/*
* Keep "m" busy because a subsequent iteration may unlock
* the object.
*/
}
if (pindex > start) {
m = vm_page_lookup(object, start);
while (m != NULL && m->pindex < pindex) {
vm_page_wakeup(m);
m = TAILQ_NEXT(m, listq);
}
}
return (pindex == end);
}
/*
* Routine: vm_object_coalesce
* Function: Coalesces two objects backing up adjoining

View File

@ -207,6 +207,7 @@ void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_page_clean (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
void vm_object_reference (vm_object_t);
void vm_object_reference_locked(vm_object_t);
void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);