Rename pmap_collect() to pmap_pv_reclaim() and rewrite it such that it no

longer uses the active and inactive paging queues.  Instead, the pmap now
maintains an LRU-ordered list of pv entry pages, and pmap_pv_reclaim() uses
this list to select pv entries for reclamation.

Note: The old pmap_collect() tried to avoid reclaiming mappings for pages
that have either a hold_count or a busy field that is non-zero.  However,
this isn't necessary for correctness, and the locking in pmap_collect() was
insufficient to guarantee that such mappings weren't reclaimed.  The new
pmap_pv_reclaim() doesn't even try.

Reviewed by:	kib
MFC after:	6 weeks
This commit is contained in:
Alan Cox 2012-05-18 05:36:04 +00:00
parent cbc1e6c2f3
commit 246d751711
2 changed files with 122 additions and 64 deletions

View File

@ -201,6 +201,7 @@ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
/*
* Data for the pv entry allocation mechanism
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static long pv_entry_count;
static struct md_page *pv_table;
@ -2028,71 +2029,136 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0
"Current number of pv entry allocs");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
static int pmap_collect_inactive, pmap_collect_active;
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
"Current number times pmap_collect called on inactive queue");
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
"Current number times pmap_collect called on active queue");
#endif
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
* another pv entry chunk. This is normally called to
* unmap inactive pages, and if necessary, active pages.
* another pv entry chunk.
*
* We do not, however, unmap 2mpages because subsequent accesses will
* allocate per-page pv entries until repromotion occurs, thereby
* exacerbating the shortage of free pv entries.
*/
static void
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
static vm_page_t
pmap_pv_reclaim(pmap_t locked_pmap)
{
struct pch newtail;
struct pv_chunk *pc;
struct md_page *pvh;
pd_entry_t *pde;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t next_pv, pv;
pv_entry_t pv;
vm_offset_t va;
vm_page_t m, free;
TAILQ_FOREACH(m, &vpq->pl, pageq) {
if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
continue;
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
va = pv->pv_va;
pmap = PV_PMAP(pv);
vm_page_t free, m;
uint64_t inuse, freemask;
int bit, field, freed;
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
pmap = NULL;
free = m = NULL;
TAILQ_INIT(&newtail);
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && free == NULL) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
if (pmap != pc->pc_pmap) {
if (pmap != NULL) {
pmap_invalidate_all(pmap);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap)
PMAP_LOCK(pmap);
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
pmap = NULL;
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
continue;
pmap_resident_count_dec(pmap, 1);
pde = pmap_pde(pmap, va);
KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
" a 2mpage in page %p's pv list", m));
pte = pmap_pde_to_pte(pde, va);
tpte = pte_load_clear(pte);
KASSERT((tpte & PG_W) == 0,
("pmap_collect: wired pte %#lx", tpte));
if (tpte & PG_A)
vm_page_aflag_set(m, PGA_REFERENCED);
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
free = NULL;
pmap_unuse_pt(pmap, va, *pde, &free);
pmap_invalidate_page(pmap, va);
pmap_free_zero_pages(free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
free_pv_entry(pmap, pv);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
}
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
/*
* Destroy every non-wired, 4 KB page mapping in the chunk.
*/
freed = 0;
for (field = 0; field < _NPCM; field++) {
freemask = 0;
for (inuse = ~pc->pc_map[field] & pc_freemask[field];
inuse != 0; inuse &= ~(1UL << bit)) {
bit = bsfq(inuse);
pv = &pc->pc_pventry[field * 64 + bit];
va = pv->pv_va;
pde = pmap_pde(pmap, va);
if ((*pde & PG_PS) != 0)
continue;
pte = pmap_pde_to_pte(pde, va);
if ((*pte & PG_W) != 0)
continue;
tpte = pte_load_clear(pte);
if ((tpte & PG_G) != 0)
pmap_invalidate_page(pmap, va);
m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if ((tpte & PG_A) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
if (TAILQ_EMPTY(&pvh->pv_list)) {
vm_page_aflag_clear(m,
PGA_WRITEABLE);
}
}
pmap_unuse_pt(pmap, va, *pde, &free);
freemask |= 1UL << bit;
freed++;
}
pc->pc_map[field] |= freemask;
}
if (freed == 0) {
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
continue;
}
pmap_resident_count_dec(pmap, freed);
PV_STAT(pv_entry_frees += freed);
PV_STAT(pv_entry_spare += freed);
pv_entry_count -= freed;
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
pc->pc_map[2] == PC_FREE2) {
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
/* Entire chunk is free; return it. */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
break;
}
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
/* One freed pv entry in locked_pmap is sufficient. */
if (pmap == locked_pmap)
break;
}
TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
if (pmap != NULL) {
pmap_invalidate_all(pmap);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
if (m == NULL && free != NULL) {
m = free;
free = m->right;
/* Recycle a freed page table page. */
m->wire_count = 1;
atomic_add_int(&cnt.v_wire_count, 1);
}
pmap_free_zero_pages(free);
return (m);
}
/*
@ -2125,6 +2191,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
return;
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
@ -2142,7 +2209,6 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
static pv_entry_t
get_pv_entry(pmap_t pmap, boolean_t try)
{
struct vpgqueues *pq;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@ -2151,7 +2217,6 @@ get_pv_entry(pmap_t pmap, boolean_t try)
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PV_STAT(pv_entry_allocs++);
pq = NULL;
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
@ -2171,35 +2236,26 @@ get_pv_entry(pmap_t pmap, boolean_t try)
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
pc_list);
}
if (pc != TAILQ_LAST(&pv_chunks, pch)) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
}
pv_entry_count++;
PV_STAT(pv_entry_spare--);
return (pv);
}
}
/* No free items, allocate another chunk */
m = vm_page_alloc(NULL, 0, (pq == &vm_page_queues[PQ_ACTIVE] ?
VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ |
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
if (try) {
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
/*
* Reclaim pv entries: At first, destroy mappings to inactive
* pages. After that, if a pv chunk entry is still needed,
* destroy mappings to active pages.
*/
if (pq == NULL) {
PV_STAT(pmap_collect_inactive++);
pq = &vm_page_queues[PQ_INACTIVE];
} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
PV_STAT(pmap_collect_active++);
pq = &vm_page_queues[PQ_ACTIVE];
} else
panic("get_pv_entry: allocation failed");
pmap_collect(pmap, pq);
goto retry;
m = pmap_pv_reclaim(pmap);
if (m == NULL)
goto retry;
}
PV_STAT(pc_chunk_count++);
PV_STAT(pc_chunk_allocs++);
@ -2209,6 +2265,7 @@ get_pv_entry(pmap_t pmap, boolean_t try)
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
pv_entry_count++;
@ -4151,6 +4208,7 @@ pmap_remove_pages(pmap_t pmap)
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
vm_page_unwire(m, 0);

View File

@ -295,7 +295,7 @@ struct pv_chunk {
pmap_t pc_pmap;
TAILQ_ENTRY(pv_chunk) pc_list;
uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */
uint64_t pc_spare[2];
TAILQ_ENTRY(pv_chunk) pc_lru;
struct pv_entry pc_pventry[_NPCPV];
};