Reimplement the reclamation of PV entries. Specifically, perform

reclamation synchronously from get_pv_entry() instead of
asynchronously as part of the page daemon.  Additionally, limit the
reclamation to inactive pages unless allocation from the PV entry zone
or reclamation from the inactive queue fails.  Previously, reclamation
destroyed mappings to both inactive and active pages.  get_pv_entry()
still, however, wakes up the page daemon when reclamation occurs.  The
reason being that the page daemon may move some pages from the active
queue to the inactive queue, making some new pages available to future
reclamations.

Print the "reclaiming PV entries" message at most once per minute, but
don't stop printing it after the fifth time.  This way, we do not give
the impression that the problem has gone away.

Reviewed by: tegge
This commit is contained in:
Alan Cox 2005-11-09 08:19:21 +00:00
parent f4d8522334
commit 7a35a21e7b
8 changed files with 190 additions and 152 deletions

View File

@ -317,11 +317,9 @@ static struct mtx allpmaps_lock;
*/
static uma_zone_t pvzone;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
int pmap_pagedaemon_waken;
static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(void);
static pv_entry_t pv_entry_reclaim(pmap_t locked_pmap);
static pv_entry_t get_pv_entry(pmap_t locked_pmap);
static void alpha_protection_init(void);
static void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
@ -1300,39 +1298,44 @@ free_pv_entry(pv_entry_t pv)
/*
* get a new pv_entry, allocating a block from the system
* when needed.
* the memory allocation is performed bypassing the malloc code
* because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
get_pv_entry(void)
{
pv_entry_count++;
if ((pv_entry_count > pv_entry_high_water) &&
(pmap_pagedaemon_waken == 0)) {
pmap_pagedaemon_waken = 1;
wakeup (&vm_pages_needed);
}
return uma_zalloc(pvzone, M_NOWAIT);
}
/*
* Reclaim a pv entry by removing a mapping to an inactive page.
*/
static pv_entry_t
pv_entry_reclaim(pmap_t locked_pmap)
get_pv_entry(pmap_t locked_pmap)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
struct vpgqueues *vpq;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t pv;
pv_entry_t allocated_pv, next_pv, pv;
vm_offset_t va;
vm_page_t m;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
TAILQ_FOREACH(m, &vm_page_queues[PQ_INACTIVE].pl, pageq) {
allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
if (allocated_pv != NULL) {
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup();
else
return (allocated_pv);
}
/*
* Reclaim pv entries: At first, destroy mappings to inactive
* pages. After that, if a pv entry is still needed, destroy
* mappings to active pages.
*/
if (ratecheck(&lastprint, &printinterval))
printf("Approaching the limit on PV entries, "
"increase the vm.pmap.shpgperproc tunable.\n");
vpq = &vm_page_queues[PQ_INACTIVE];
retry:
TAILQ_FOREACH(m, &vpq->pl, pageq) {
if (m->hold_count || m->busy || (m->flags & PG_BUSY))
continue;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
va = pv->pv_va;
pmap = pv->pv_pmap;
if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
@ -1342,7 +1345,7 @@ pv_entry_reclaim(pmap_t locked_pmap)
tpte = *pte;
*pte = 0;
KASSERT((tpte & PG_W) == 0,
("pv_entry_reclaim: wired pte %#lx", tpte));
("get_pv_entry: wired pte %#lx", tpte));
if ((tpte & PG_FOR) == 0)
vm_page_flag_set(m, PG_REFERENCED);
if ((tpte & PG_FOW) == 0) {
@ -1358,10 +1361,20 @@ pv_entry_reclaim(pmap_t locked_pmap)
pmap_unuse_pt(pmap, va, pv->pv_ptem);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
return (pv);
if (allocated_pv == NULL)
allocated_pv = pv;
else
free_pv_entry(pv);
}
}
panic("pv_entry_reclaim: increase vm.pmap.shpgperproc");
if (allocated_pv == NULL) {
if (vpq == &vm_page_queues[PQ_INACTIVE]) {
vpq = &vm_page_queues[PQ_ACTIVE];
goto retry;
}
panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
}
return (allocated_pv);
}
static int
@ -1408,9 +1421,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
{
pv_entry_t pv;
pv = get_pv_entry();
if (pv == NULL)
pv = pv_entry_reclaim(pmap);
pv = get_pv_entry(pmap);
pv->pv_va = va;
pv->pv_pmap = pmap;
pv->pv_ptem = mpte;

View File

@ -185,7 +185,6 @@ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
static uma_zone_t pvzone;
static struct vm_object pvzone_obj;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
int pmap_pagedaemon_waken;
/*
* All those kernel PT submaps that BSD is so fond of
@ -200,8 +199,7 @@ struct msgbuf *msgbufp = 0;
static caddr_t crashdumpmap;
static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(void);
static pv_entry_t pv_entry_reclaim(pmap_t locked_pmap);
static pv_entry_t get_pv_entry(pmap_t locked_pmap);
static void pmap_clear_ptes(vm_page_t m, long bit);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
@ -1428,40 +1426,45 @@ free_pv_entry(pv_entry_t pv)
/*
* get a new pv_entry, allocating a block from the system
* when needed.
* the memory allocation is performed bypassing the malloc code
* because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
get_pv_entry(void)
{
pv_entry_count++;
if ((pv_entry_count > pv_entry_high_water) &&
(pmap_pagedaemon_waken == 0)) {
pmap_pagedaemon_waken = 1;
wakeup (&vm_pages_needed);
}
return uma_zalloc(pvzone, M_NOWAIT);
}
/*
* Reclaim a pv entry by removing a mapping to an inactive page.
*/
static pv_entry_t
pv_entry_reclaim(pmap_t locked_pmap)
get_pv_entry(pmap_t locked_pmap)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
struct vpgqueues *vpq;
pd_entry_t ptepde;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t pv;
pv_entry_t allocated_pv, next_pv, pv;
vm_offset_t va;
vm_page_t m;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
TAILQ_FOREACH(m, &vm_page_queues[PQ_INACTIVE].pl, pageq) {
allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
if (allocated_pv != NULL) {
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup();
else
return (allocated_pv);
}
/*
* Reclaim pv entries: At first, destroy mappings to inactive
* pages. After that, if a pv entry is still needed, destroy
* mappings to active pages.
*/
if (ratecheck(&lastprint, &printinterval))
printf("Approaching the limit on PV entries, "
"increase the vm.pmap.shpgperproc tunable.\n");
vpq = &vm_page_queues[PQ_INACTIVE];
retry:
TAILQ_FOREACH(m, &vpq->pl, pageq) {
if (m->hold_count || m->busy || (m->flags & PG_BUSY))
continue;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
va = pv->pv_va;
pmap = pv->pv_pmap;
if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
@ -1470,12 +1473,12 @@ pv_entry_reclaim(pmap_t locked_pmap)
pte = pmap_pte_pde(pmap, va, &ptepde);
tpte = pte_load_clear(pte);
KASSERT((tpte & PG_W) == 0,
("pv_entry_reclaim: wired pte %#lx", tpte));
("get_pv_entry: wired pte %#lx", tpte));
if (tpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
if (tpte & PG_M) {
KASSERT((tpte & PG_RW),
("pv_entry_reclaim: modified page not writable: va: %#lx, pte: %#lx",
("get_pv_entry: modified page not writable: va: %#lx, pte: %#lx",
va, tpte));
if (pmap_track_modified(va))
vm_page_dirty(m);
@ -1489,10 +1492,20 @@ pv_entry_reclaim(pmap_t locked_pmap)
pmap_unuse_pt(pmap, va, ptepde);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
return (pv);
if (allocated_pv == NULL)
allocated_pv = pv;
else
free_pv_entry(pv);
}
}
panic("pv_entry_reclaim: increase vm.pmap.shpgperproc");
if (allocated_pv == NULL) {
if (vpq == &vm_page_queues[PQ_INACTIVE]) {
vpq = &vm_page_queues[PQ_ACTIVE];
goto retry;
}
panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
}
return (allocated_pv);
}
static void
@ -1531,11 +1544,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
pv_entry_t pv;
pv = get_pv_entry();
if (pv == NULL) {
pv_entry_count--;
pv = pv_entry_reclaim(pmap);
}
pv = get_pv_entry(pmap);
pv->pv_va = va;
pv->pv_pmap = pmap;

View File

@ -208,7 +208,6 @@ static uma_zone_t pdptzone;
static uma_zone_t pvzone;
static struct vm_object pvzone_obj;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
int pmap_pagedaemon_waken;
/*
* All those kernel PT submaps that BSD is so fond of
@ -255,8 +254,7 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
static struct mtx PMAP2mutex;
static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(void);
static pv_entry_t pv_entry_reclaim(pmap_t locked_pmap);
static pv_entry_t get_pv_entry(pmap_t locked_pmap);
static void pmap_clear_ptes(vm_page_t m, int bit);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
@ -1442,40 +1440,45 @@ free_pv_entry(pv_entry_t pv)
/*
* get a new pv_entry, allocating a block from the system
* when needed.
* the memory allocation is performed bypassing the malloc code
* because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
get_pv_entry(void)
{
pv_entry_count++;
if ((pv_entry_count > pv_entry_high_water) &&
(pmap_pagedaemon_waken == 0)) {
pmap_pagedaemon_waken = 1;
wakeup (&vm_pages_needed);
}
return uma_zalloc(pvzone, M_NOWAIT);
}
/*
* Reclaim a pv entry by removing a mapping to an inactive page.
*/
static pv_entry_t
pv_entry_reclaim(pmap_t locked_pmap)
get_pv_entry(pmap_t locked_pmap)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
struct vpgqueues *vpq;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t pv;
pv_entry_t allocated_pv, next_pv, pv;
vm_offset_t va;
vm_page_t m;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
if (allocated_pv != NULL) {
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup();
else
return (allocated_pv);
}
/*
* Reclaim pv entries: At first, destroy mappings to inactive
* pages. After that, if a pv entry is still needed, destroy
* mappings to active pages.
*/
if (ratecheck(&lastprint, &printinterval))
printf("Approaching the limit on PV entries, "
"increase the vm.pmap.shpgperproc tunable.\n");
vpq = &vm_page_queues[PQ_INACTIVE];
retry:
sched_pin();
TAILQ_FOREACH(m, &vm_page_queues[PQ_INACTIVE].pl, pageq) {
TAILQ_FOREACH(m, &vpq->pl, pageq) {
if (m->hold_count || m->busy || (m->flags & PG_BUSY))
continue;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
va = pv->pv_va;
pmap = pv->pv_pmap;
if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
@ -1484,13 +1487,12 @@ pv_entry_reclaim(pmap_t locked_pmap)
pte = pmap_pte_quick(pmap, va);
tpte = pte_load_clear(pte);
KASSERT((tpte & PG_W) == 0,
("pv_entry_reclaim: wired pte %#jx",
(uintmax_t)tpte));
("get_pv_entry: wired pte %#jx", (uintmax_t)tpte));
if (tpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
if (tpte & PG_M) {
KASSERT((tpte & PG_RW),
("pv_entry_reclaim: modified page not writable: va: %#x, pte: %#jx",
("get_pv_entry: modified page not writable: va: %#x, pte: %#jx",
va, (uintmax_t)tpte));
if (pmap_track_modified(va))
vm_page_dirty(m);
@ -1504,12 +1506,21 @@ pv_entry_reclaim(pmap_t locked_pmap)
pmap_unuse_pt(pmap, va);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
sched_unpin();
return (pv);
if (allocated_pv == NULL)
allocated_pv = pv;
else
free_pv_entry(pv);
}
}
sched_unpin();
panic("pv_entry_reclaim: increase vm.pmap.shpgperproc");
if (allocated_pv == NULL) {
if (vpq == &vm_page_queues[PQ_INACTIVE]) {
vpq = &vm_page_queues[PQ_ACTIVE];
goto retry;
}
panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
}
return (allocated_pv);
}
static void
@ -1548,11 +1559,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
pv_entry_t pv;
pv = get_pv_entry();
if (pv == NULL) {
pv_entry_count--;
pv = pv_entry_reclaim(pmap);
}
pv = get_pv_entry(pmap);
pv->pv_va = va;
pv->pv_pmap = pmap;

View File

@ -198,7 +198,6 @@ struct mtx pmap_ridmutex;
*/
static uma_zone_t pvzone;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
int pmap_pagedaemon_waken;
/*
* Data for allocating PTEs for user processes.
@ -232,11 +231,15 @@ static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
NULL, 0, pmap_vhpt_population, "I", "");
static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(void);
static pv_entry_t get_pv_entry(pmap_t locked_pmap);
static pmap_t pmap_install(pmap_t);
static void pmap_invalidate_all(pmap_t pmap);
static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
vm_offset_t va, pv_entry_t pv, int freepte);
vm_offset_t
pmap_steal_memory(vm_size_t size)
@ -807,19 +810,69 @@ free_pv_entry(pv_entry_t pv)
/*
* get a new pv_entry, allocating a block from the system
* when needed.
* the memory allocation is performed bypassing the malloc code
* because of the possibility of allocations at interrupt time.
*/
static pv_entry_t
get_pv_entry(void)
get_pv_entry(pmap_t locked_pmap)
{
pv_entry_count++;
if ((pv_entry_count > pv_entry_high_water) &&
(pmap_pagedaemon_waken == 0)) {
pmap_pagedaemon_waken = 1;
wakeup (&vm_pages_needed);
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
struct vpgqueues *vpq;
struct ia64_lpte *pte;
pmap_t oldpmap, pmap;
pv_entry_t allocated_pv, next_pv, pv;
vm_offset_t va;
vm_page_t m;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
if (allocated_pv != NULL) {
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
pagedaemon_wakeup();
else
return (allocated_pv);
}
return uma_zalloc(pvzone, M_NOWAIT);
/*
* Reclaim pv entries: At first, destroy mappings to inactive
* pages. After that, if a pv entry is still needed, destroy
* mappings to active pages.
*/
if (ratecheck(&lastprint, &printinterval))
printf("Approaching the limit on PV entries, "
"increase the vm.pmap.shpgperproc tunable.\n");
vpq = &vm_page_queues[PQ_INACTIVE];
retry:
TAILQ_FOREACH(m, &vpq->pl, pageq) {
if (m->hold_count || m->busy || (m->flags & PG_BUSY))
continue;
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
va = pv->pv_va;
pmap = pv->pv_pmap;
if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
continue;
oldpmap = pmap_install(pmap);
pte = pmap_find_vhpt(va);
KASSERT(pte != NULL, ("pte"));
pmap_remove_pte(pmap, pte, va, pv, 1);
pmap_install(oldpmap);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
if (allocated_pv == NULL)
allocated_pv = pv;
else
free_pv_entry(pv);
}
}
if (allocated_pv == NULL) {
if (vpq == &vm_page_queues[PQ_INACTIVE]) {
vpq = &vm_page_queues[PQ_ACTIVE];
goto retry;
}
panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
}
return (allocated_pv);
}
/*
@ -959,9 +1012,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
pv_entry_t pv;
pv = get_pv_entry();
if (pv == NULL)
panic("no pv entries: increase vm.pmap.shpgperproc");
pv = get_pv_entry(pmap);
pv->pv_pmap = pmap;
pv->pv_va = va;

View File

@ -197,8 +197,6 @@ int pmap_bootstrapped = 0;
struct msgbuf *msgbufp;
vm_offset_t msgbuf_phys;
int pmap_pagedaemon_waken;
/*
* Map of physical memory regions.
*/

View File

@ -114,8 +114,6 @@
struct msgbuf *msgbufp;
vm_paddr_t msgbuf_phys;
int pmap_pagedaemon_waken;
/*
* Map of physical memory reagions.
*/

View File

@ -90,8 +90,6 @@ struct thread;
*/
extern vm_offset_t kernel_vm_end;
extern int pmap_pagedaemon_waken;
void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t);
void pmap_clear_modify(vm_page_t m);
void pmap_clear_reference(vm_page_t m);

View File

@ -113,7 +113,6 @@ __FBSDID("$FreeBSD$");
/* the kernel process "vm_pageout"*/
static void vm_pageout(void);
static int vm_pageout_clean(vm_page_t);
static void vm_pageout_pmap_collect(void);
static void vm_pageout_scan(int pass);
struct proc *pageproc;
@ -666,35 +665,6 @@ vm_pageout_map_deactivate_pages(map, desired)
}
#endif /* !defined(NO_SWAPPING) */
/*
* This routine is very drastic, but can save the system
* in a pinch.
*/
static void
vm_pageout_pmap_collect(void)
{
int i;
vm_page_t m;
static int warningdone;
if (pmap_pagedaemon_waken == 0)
return;
if (warningdone < 5) {
printf("collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
warningdone++;
}
vm_page_lock_queues();
for (i = 0; i < vm_page_array_size; i++) {
m = &vm_page_array[i];
if (m->wire_count || m->hold_count || m->busy ||
(m->flags & (PG_BUSY | PG_UNMANAGED)))
continue;
pmap_remove_all(m);
}
vm_page_unlock_queues();
pmap_pagedaemon_waken = 0;
}
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*/
@ -723,10 +693,6 @@ vm_pageout_scan(int pass)
* We do this explicitly after the caches have been drained above.
*/
uma_reclaim();
/*
* Do whatever cleanup that the pmap code can.
*/
vm_pageout_pmap_collect();
addl_page_shortage_init = atomic_readandclear_int(&vm_pageout_deficit);