Rename pmap_collect() to pmap_pv_reclaim() and rewrite it such that it no
longer uses the active and inactive paging queues. Instead, the pmap now maintains an LRU-ordered list of pv entry pages, and pmap_pv_reclaim() uses this list to select pv entries for reclamation. Note: The old pmap_collect() tried to avoid reclaiming mappings for pages that have either a hold_count or a busy field that is non-zero. However, this isn't necessary for correctness, and the locking in pmap_collect() was insufficient to guarantee that such mappings weren't reclaimed. The new pmap_pv_reclaim() doesn't even try. MFC after: 5 weeks
This commit is contained in:
parent
6ee10a96c0
commit
33853281b4
@ -233,6 +233,7 @@ static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */
|
||||
/*
|
||||
* Data for the pv entry allocation mechanism
|
||||
*/
|
||||
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
|
||||
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
|
||||
static struct md_page *pv_table;
|
||||
static int shpgperproc = PMAP_SHPGPERPROC;
|
||||
@ -2187,69 +2188,144 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0
|
||||
"Current number of pv entry allocs");
|
||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
|
||||
"Current number of spare pv entries");
|
||||
|
||||
static int pmap_collect_inactive, pmap_collect_active;
|
||||
|
||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
|
||||
"Current number times pmap_collect called on inactive queue");
|
||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
|
||||
"Current number times pmap_collect called on active queue");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are in a serious low memory condition. Resort to
|
||||
* drastic measures to free some pages so we can allocate
|
||||
* another pv entry chunk. This is normally called to
|
||||
* unmap inactive pages, and if necessary, active pages.
|
||||
* another pv entry chunk.
|
||||
*/
|
||||
static void
|
||||
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
|
||||
static vm_page_t
|
||||
pmap_pv_reclaim(pmap_t locked_pmap)
|
||||
{
|
||||
struct pch newtail;
|
||||
struct pv_chunk *pc;
|
||||
struct md_page *pvh;
|
||||
pd_entry_t *pde;
|
||||
pmap_t pmap;
|
||||
pt_entry_t *pte, tpte;
|
||||
pv_entry_t next_pv, pv;
|
||||
pv_entry_t pv;
|
||||
vm_offset_t va;
|
||||
vm_page_t m, free;
|
||||
vm_page_t free, m, m_pc;
|
||||
uint32_t inuse, freemask;
|
||||
int bit, field, freed;
|
||||
|
||||
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
|
||||
pmap = NULL;
|
||||
free = m_pc = NULL;
|
||||
TAILQ_INIT(&newtail);
|
||||
sched_pin();
|
||||
TAILQ_FOREACH(m, &vpq->pl, pageq) {
|
||||
if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
|
||||
continue;
|
||||
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
|
||||
va = pv->pv_va;
|
||||
pmap = PV_PMAP(pv);
|
||||
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
|
||||
free == NULL)) {
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
if (pmap != pc->pc_pmap) {
|
||||
if (pmap != NULL) {
|
||||
pmap_invalidate_all(pmap);
|
||||
if (pmap != locked_pmap)
|
||||
PMAP_UNLOCK(pmap);
|
||||
}
|
||||
pmap = pc->pc_pmap;
|
||||
/* Avoid deadlock and lock recursion. */
|
||||
if (pmap > locked_pmap)
|
||||
PMAP_LOCK(pmap);
|
||||
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
|
||||
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
|
||||
pmap = NULL;
|
||||
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
|
||||
continue;
|
||||
pmap->pm_stats.resident_count--;
|
||||
pde = pmap_pde(pmap, va);
|
||||
KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
|
||||
" a 4mpage in page %p's pv list", m));
|
||||
pte = pmap_pte_quick(pmap, va);
|
||||
tpte = pte_load_clear(pte);
|
||||
KASSERT((tpte & PG_W) == 0,
|
||||
("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
|
||||
if (tpte & PG_A)
|
||||
vm_page_aflag_set(m, PGA_REFERENCED);
|
||||
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
||||
vm_page_dirty(m);
|
||||
free = NULL;
|
||||
pmap_unuse_pt(pmap, va, &free);
|
||||
pmap_invalidate_page(pmap, va);
|
||||
pmap_free_zero_pages(free);
|
||||
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
||||
free_pv_entry(pmap, pv);
|
||||
if (pmap != locked_pmap)
|
||||
PMAP_UNLOCK(pmap);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy every non-wired, 4 KB page mapping in the chunk.
|
||||
*/
|
||||
freed = 0;
|
||||
for (field = 0; field < _NPCM; field++) {
|
||||
freemask = 0;
|
||||
for (inuse = ~pc->pc_map[field] & pc_freemask[field];
|
||||
inuse != 0; inuse &= ~(1UL << bit)) {
|
||||
bit = bsfl(inuse);
|
||||
pv = &pc->pc_pventry[field * 32 + bit];
|
||||
va = pv->pv_va;
|
||||
pde = pmap_pde(pmap, va);
|
||||
if ((*pde & PG_PS) != 0)
|
||||
continue;
|
||||
pte = pmap_pte_quick(pmap, va);
|
||||
if ((*pte & PG_W) != 0)
|
||||
continue;
|
||||
tpte = pte_load_clear(pte);
|
||||
if ((tpte & PG_G) != 0)
|
||||
pmap_invalidate_page(pmap, va);
|
||||
m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
|
||||
if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
|
||||
vm_page_dirty(m);
|
||||
if ((tpte & PG_A) != 0)
|
||||
vm_page_aflag_set(m, PGA_REFERENCED);
|
||||
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
|
||||
if (TAILQ_EMPTY(&m->md.pv_list) &&
|
||||
(m->flags & PG_FICTITIOUS) == 0) {
|
||||
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
|
||||
if (TAILQ_EMPTY(&pvh->pv_list)) {
|
||||
vm_page_aflag_clear(m,
|
||||
PGA_WRITEABLE);
|
||||
}
|
||||
}
|
||||
pmap_unuse_pt(pmap, va, &free);
|
||||
freemask |= 1UL << bit;
|
||||
freed++;
|
||||
}
|
||||
pc->pc_map[field] |= freemask;
|
||||
}
|
||||
if (freed == 0) {
|
||||
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
|
||||
continue;
|
||||
}
|
||||
pmap->pm_stats.resident_count -= freed;
|
||||
PV_STAT(pv_entry_frees += freed);
|
||||
PV_STAT(pv_entry_spare += freed);
|
||||
pv_entry_count -= freed;
|
||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
||||
for (field = 0; field < _NPCM; field++)
|
||||
if (pc->pc_map[field] != pc_freemask[field]) {
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
|
||||
pc_list);
|
||||
TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
|
||||
|
||||
/*
|
||||
* One freed pv entry in locked_pmap is
|
||||
* sufficient.
|
||||
*/
|
||||
if (pmap == locked_pmap)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
if (field == _NPCM) {
|
||||
PV_STAT(pv_entry_spare -= _NPCPV);
|
||||
PV_STAT(pc_chunk_count--);
|
||||
PV_STAT(pc_chunk_frees++);
|
||||
/* Entire chunk is free; return it. */
|
||||
m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
|
||||
pmap_qremove((vm_offset_t)pc, 1);
|
||||
pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
|
||||
break;
|
||||
}
|
||||
if (TAILQ_EMPTY(&m->md.pv_list) &&
|
||||
TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
|
||||
vm_page_aflag_clear(m, PGA_WRITEABLE);
|
||||
}
|
||||
out:
|
||||
sched_unpin();
|
||||
TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
|
||||
if (pmap != NULL) {
|
||||
pmap_invalidate_all(pmap);
|
||||
if (pmap != locked_pmap)
|
||||
PMAP_UNLOCK(pmap);
|
||||
}
|
||||
if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
|
||||
m_pc = free;
|
||||
free = m_pc->right;
|
||||
/* Recycle a freed page table page. */
|
||||
m_pc->wire_count = 1;
|
||||
atomic_add_int(&cnt.v_wire_count, 1);
|
||||
}
|
||||
pmap_free_zero_pages(free);
|
||||
return (m_pc);
|
||||
}
|
||||
|
||||
|
||||
@ -2280,6 +2356,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
||||
return;
|
||||
}
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
PV_STAT(pv_entry_spare -= _NPCPV);
|
||||
PV_STAT(pc_chunk_count--);
|
||||
PV_STAT(pc_chunk_frees++);
|
||||
@ -2300,7 +2377,6 @@ get_pv_entry(pmap_t pmap, int try)
|
||||
{
|
||||
static const struct timeval printinterval = { 60, 0 };
|
||||
static struct timeval lastprint;
|
||||
struct vpgqueues *pq;
|
||||
int bit, field;
|
||||
pv_entry_t pv;
|
||||
struct pv_chunk *pc;
|
||||
@ -2315,7 +2391,6 @@ get_pv_entry(pmap_t pmap, int try)
|
||||
printf("Approaching the limit on PV entries, consider "
|
||||
"increasing either the vm.pmap.shpgperproc or the "
|
||||
"vm.pmap.pv_entry_max tunable.\n");
|
||||
pq = NULL;
|
||||
retry:
|
||||
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
|
||||
if (pc != NULL) {
|
||||
@ -2336,6 +2411,10 @@ get_pv_entry(pmap_t pmap, int try)
|
||||
}
|
||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
||||
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
|
||||
if (pc != TAILQ_LAST(&pv_chunks, pch)) {
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
|
||||
}
|
||||
PV_STAT(pv_entry_spare--);
|
||||
return (pv);
|
||||
}
|
||||
@ -2345,29 +2424,16 @@ get_pv_entry(pmap_t pmap, int try)
|
||||
* queues lock. If "pv_vafree" is currently non-empty, it will
|
||||
* remain non-empty until pmap_ptelist_alloc() completes.
|
||||
*/
|
||||
if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq ==
|
||||
&vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
|
||||
if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
|
||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
|
||||
if (try) {
|
||||
pv_entry_count--;
|
||||
PV_STAT(pc_chunk_tryfail++);
|
||||
return (NULL);
|
||||
}
|
||||
/*
|
||||
* Reclaim pv entries: At first, destroy mappings to
|
||||
* inactive pages. After that, if a pv chunk entry
|
||||
* is still needed, destroy mappings to active pages.
|
||||
*/
|
||||
if (pq == NULL) {
|
||||
PV_STAT(pmap_collect_inactive++);
|
||||
pq = &vm_page_queues[PQ_INACTIVE];
|
||||
} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
|
||||
PV_STAT(pmap_collect_active++);
|
||||
pq = &vm_page_queues[PQ_ACTIVE];
|
||||
} else
|
||||
panic("get_pv_entry: increase vm.pmap.shpgperproc");
|
||||
pmap_collect(pmap, pq);
|
||||
goto retry;
|
||||
m = pmap_pv_reclaim(pmap);
|
||||
if (m == NULL)
|
||||
goto retry;
|
||||
}
|
||||
PV_STAT(pc_chunk_count++);
|
||||
PV_STAT(pc_chunk_allocs++);
|
||||
@ -2377,6 +2443,7 @@ get_pv_entry(pmap_t pmap, int try)
|
||||
pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
|
||||
for (field = 1; field < _NPCM; field++)
|
||||
pc->pc_map[field] = pc_freemask[field];
|
||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
|
||||
pv = &pc->pc_pventry[0];
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
||||
PV_STAT(pv_entry_spare += _NPCPV - 1);
|
||||
@ -4374,6 +4441,7 @@ pmap_remove_pages(pmap_t pmap)
|
||||
PV_STAT(pc_chunk_count--);
|
||||
PV_STAT(pc_chunk_frees++);
|
||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
|
||||
pmap_qremove((vm_offset_t)pc, 1);
|
||||
vm_page_unwire(m, 0);
|
||||
|
@ -481,7 +481,7 @@ struct pv_chunk {
|
||||
pmap_t pc_pmap;
|
||||
TAILQ_ENTRY(pv_chunk) pc_list;
|
||||
uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */
|
||||
uint32_t pc_spare[2];
|
||||
TAILQ_ENTRY(pv_chunk) pc_lru;
|
||||
struct pv_entry pc_pventry[_NPCPV];
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user