arm64 pmap: per-domain pv chunk list

As with amd64 use a per-domain pv chunk lock to reduce contention as
chunks get created and removed all the time.

Sponsored by:	The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D36307
This commit is contained in:
Andrew Turner 2022-08-19 10:50:06 +00:00
parent 92d73b0b25
commit 3247bc7cd6

View File

@ -150,6 +150,12 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/pcb.h>
#ifdef NUMA
#define PMAP_MEMDOM MAXMEMDOM
#else
#define PMAP_MEMDOM 1
#endif
#define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1)
#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2)
@ -276,8 +282,28 @@ vm_offset_t kernel_vm_end = 0;
/*
* Data for the pv entry allocation mechanism.
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx pv_chunks_mutex;
#ifdef NUMA
static __inline int
pc_to_domain(struct pv_chunk *pc)
{
return (vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
}
#else
static __inline int
pc_to_domain(struct pv_chunk *pc __unused)
{
return (0);
}
#endif
struct pv_chunks_list {
struct mtx pvc_lock;
TAILQ_HEAD(pch, pv_chunk) pvc_list;
int active_reclaims;
} __aligned(CACHE_LINE_SIZE);
struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
static struct md_page *pv_table;
static struct md_page pv_dummy;
@ -1324,9 +1350,13 @@ pmap_init(void)
}
/*
* Initialize the pv chunk list mutex.
* Initialize pv chunk lists.
*/
mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
for (i = 0; i < PMAP_MEMDOM; i++) {
mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL,
MTX_DEF);
TAILQ_INIT(&pv_chunks[i].pvc_list);
}
/*
* Initialize the pool of pv list locks.
@ -2550,8 +2580,9 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
* exacerbating the shortage of free pv entries.
*/
static vm_page_t
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
{
struct pv_chunks_list *pvc;
struct pv_chunk *pc, *pc_marker, *pc_marker_end;
struct pv_chunk_header pc_marker_b, pc_marker_end_b;
struct md_page *pvh;
@ -2564,7 +2595,6 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
struct spglist free;
uint64_t inuse;
int bit, field, freed, lvl;
static int active_reclaims = 0;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
@ -2577,10 +2607,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
pc_marker = (struct pv_chunk *)&pc_marker_b;
pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
mtx_lock(&pv_chunks_mutex);
active_reclaims++;
TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
pvc = &pv_chunks[domain];
mtx_lock(&pvc->pvc_lock);
pvc->active_reclaims++;
TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
SLIST_EMPTY(&free)) {
next_pmap = pc->pc_pmap;
@ -2593,11 +2624,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
*/
goto next_chunk;
}
mtx_unlock(&pv_chunks_mutex);
mtx_unlock(&pvc->pvc_lock);
/*
* A pv_chunk can only be removed from the pc_lru list
* when both pv_chunks_mutex is owned and the
* when both pvc->pvc_lock is owned and the
* corresponding pmap is locked.
*/
if (pmap != next_pmap) {
@ -2608,15 +2639,15 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
if (pmap > locked_pmap) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_LOCK(pmap);
mtx_lock(&pv_chunks_mutex);
mtx_lock(&pvc->pvc_lock);
continue;
} else if (pmap != locked_pmap) {
if (PMAP_TRYLOCK(pmap)) {
mtx_lock(&pv_chunks_mutex);
mtx_lock(&pvc->pvc_lock);
continue;
} else {
pmap = NULL; /* pmap is not locked */
mtx_lock(&pv_chunks_mutex);
mtx_lock(&pvc->pvc_lock);
pc = TAILQ_NEXT(pc_marker, pc_lru);
if (pc == NULL ||
pc->pc_pmap != next_pmap)
@ -2668,7 +2699,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
}
}
if (freed == 0) {
mtx_lock(&pv_chunks_mutex);
mtx_lock(&pvc->pvc_lock);
goto next_chunk;
}
/* Every freed mapping is for a 4 KB page. */
@ -2684,20 +2715,20 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
/* Entire chunk is free; return it. */
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m_pc->phys_addr);
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_lock(&pvc->pvc_lock);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
break;
}
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
mtx_lock(&pv_chunks_mutex);
mtx_lock(&pvc->pvc_lock);
/* One freed pv entry in locked_pmap is sufficient. */
if (pmap == locked_pmap)
break;
next_chunk:
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
if (active_reclaims == 1 && pmap != NULL) {
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
if (pvc->active_reclaims == 1 && pmap != NULL) {
/*
* Rotate the pv chunks list so that we do not
* scan the same pv chunks that could not be
@ -2705,17 +2736,17 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
* and/or superpage mapping) on every
* invocation of reclaim_pv_chunk().
*/
while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker){
MPASS(pc->pc_pmap != NULL);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
}
}
}
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
active_reclaims--;
mtx_unlock(&pv_chunks_mutex);
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
pvc->active_reclaims--;
mtx_unlock(&pvc->pvc_lock);
if (pmap != NULL && pmap != locked_pmap)
PMAP_UNLOCK(pmap);
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
@ -2728,6 +2759,23 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
return (m_pc);
}
static vm_page_t
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
{
vm_page_t m;
int i, domain;
domain = PCPU_GET(domain);
for (i = 0; i < vm_ndomains; i++) {
m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
if (m != NULL)
break;
domain = (domain + 1) % vm_ndomains;
}
return (m);
}
/*
* free the pv_entry back to the free list
*/
@ -2776,28 +2824,37 @@ free_pv_chunk_dequeued(struct pv_chunk *pc)
static void
free_pv_chunk(struct pv_chunk *pc)
{
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
struct pv_chunks_list *pvc;
pvc = &pv_chunks[pc_to_domain(pc)];
mtx_lock(&pvc->pvc_lock);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
mtx_unlock(&pvc->pvc_lock);
free_pv_chunk_dequeued(pc);
}
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
struct pv_chunks_list *pvc;
struct pv_chunk *pc, *npc;
int i;
if (TAILQ_EMPTY(batch))
return;
mtx_lock(&pv_chunks_mutex);
TAILQ_FOREACH(pc, batch, pc_list) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
for (i = 0; i < vm_ndomains; i++) {
if (TAILQ_EMPTY(&batch[i]))
continue;
pvc = &pv_chunks[i];
mtx_lock(&pvc->pvc_lock);
TAILQ_FOREACH(pc, &batch[i], pc_list) {
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
}
mtx_unlock(&pvc->pvc_lock);
}
mtx_unlock(&pv_chunks_mutex);
TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
free_pv_chunk_dequeued(pc);
for (i = 0; i < vm_ndomains; i++) {
TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
free_pv_chunk_dequeued(pc);
}
}
}
@ -2812,6 +2869,7 @@ free_pv_chunk_batch(struct pv_chunklist *batch)
static pv_entry_t
get_pv_entry(pmap_t pmap, struct rwlock **lockp)
{
struct pv_chunks_list *pvc;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@ -2860,9 +2918,10 @@ get_pv_entry(pmap_t pmap, struct rwlock **lockp)
pc->pc_pmap = pmap;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
pc->pc_map[0] &= ~1ul; /* preallocated bit 0 */
mtx_lock(&pv_chunks_mutex);
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
pvc = &pv_chunks[vm_page_domain(m)];
mtx_lock(&pvc->pvc_lock);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
mtx_unlock(&pvc->pvc_lock);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
@ -2879,10 +2938,11 @@ get_pv_entry(pmap_t pmap, struct rwlock **lockp)
static void
reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
{
struct pch new_tail;
struct pv_chunks_list *pvc;
struct pch new_tail[PMAP_MEMDOM];
struct pv_chunk *pc;
vm_page_t m;
int avail, free;
int avail, free, i;
bool reclaimed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@ -2894,7 +2954,8 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
* reclaim_pv_chunk() could recycle one of these chunks. In
* contrast, these chunks must be added to the pmap upon allocation.
*/
TAILQ_INIT(&new_tail);
for (i = 0; i < PMAP_MEMDOM; i++)
TAILQ_INIT(&new_tail[i]);
retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
@ -2921,7 +2982,7 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
pc->pc_pmap = pmap;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
/*
@ -2932,10 +2993,13 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
if (reclaimed)
goto retry;
}
if (!TAILQ_EMPTY(&new_tail)) {
mtx_lock(&pv_chunks_mutex);
TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
mtx_unlock(&pv_chunks_mutex);
for (i = 0; i < vm_ndomains; i++) {
if (TAILQ_EMPTY(&new_tail[i]))
continue;
pvc = &pv_chunks[i];
mtx_lock(&pvc->pvc_lock);
TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
mtx_unlock(&pvc->pvc_lock);
}
}
@ -5276,7 +5340,7 @@ pmap_remove_pages(pmap_t pmap)
pd_entry_t *pde;
pt_entry_t *pte, tpte;
struct spglist free;
struct pv_chunklist free_chunks;
struct pv_chunklist free_chunks[PMAP_MEMDOM];
vm_page_t m, ml3, mt;
pv_entry_t pv;
struct md_page *pvh;
@ -5284,12 +5348,13 @@ pmap_remove_pages(pmap_t pmap)
struct rwlock *lock;
int64_t bit;
uint64_t inuse, bitmask;
int allfree, field, freed, idx, lvl;
int allfree, field, freed, i, idx, lvl;
vm_paddr_t pa;
lock = NULL;
TAILQ_INIT(&free_chunks);
for (i = 0; i < PMAP_MEMDOM; i++)
TAILQ_INIT(&free_chunks[i]);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@ -5430,13 +5495,14 @@ pmap_remove_pages(pmap_t pmap)
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc,
pc_list);
}
}
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
free_pv_chunk_batch(&free_chunks);
free_pv_chunk_batch(free_chunks);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}