amd64 pmap: batch chunk removal in pmap_remove_pages

pv list lock is the main bottleneck during poudriere -j 104 and
pmap_remove_pages is the most impactful consumer. It frees chunks with the lock
held even though it plays no role in correctness. Moreover chunks are often
freed in groups, sample counts during buildkernel (0-sized frees removed):

    value  ------------- Distribution ------------- count
          0 |                                         0
          1 |                                         8
          2 |@@@@@@@                                  19329
          4 |@@@@@@@@@@@@@@@@@@@@@@                   58517
          8 |                                         1085
         16 |                                         71
         32 |@@@@@@@@@@                               24919
         64 |                                         899
        128 |                                         7
        256 |                                         2
        512 |                                         0

Thus:
1. batch freeing
2. move it past unlocking pv list

Reviewed by:	alc (previous version), markj (previous version), kib
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D21832
This commit is contained in:
mjg 2019-09-29 20:44:13 +00:00
parent a32b126360
commit 2e8c47f811

View File

@ -1105,7 +1105,10 @@ static caddr_t crashdumpmap;
#define MAPDEV_FLUSHCACHE 0x0000001 /* Flush cache after mapping. */
#define MAPDEV_SETATTR 0x0000002 /* Modify existing attrs. */
TAILQ_HEAD(pv_chunklist, pv_chunk);
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_chunk_batch(struct pv_chunklist *batch);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
static int popcnt_pc_map_pq(uint64_t *map);
@ -4248,13 +4251,10 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
}
static void
free_pv_chunk(struct pv_chunk *pc)
free_pv_chunk_dequeued(struct pv_chunk *pc)
{
vm_page_t m;
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
@ -4265,6 +4265,35 @@ free_pv_chunk(struct pv_chunk *pc)
vm_page_free(m);
}
static void
free_pv_chunk(struct pv_chunk *pc)
{
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
free_pv_chunk_dequeued(pc);
}
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
struct pv_chunk *pc, *npc;
if (TAILQ_EMPTY(batch))
return;
mtx_lock(&pv_chunks_mutex);
TAILQ_FOREACH(pc, batch, pc_list) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
}
mtx_unlock(&pv_chunks_mutex);
TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
free_pv_chunk_dequeued(pc);
}
}
/*
* Returns a new PV entry, allocating a new PV chunk from the system when
* needed. If this PV chunk allocation fails and a PV list lock pointer was
@ -6865,6 +6894,7 @@ pmap_remove_pages(pmap_t pmap)
pt_entry_t *pte, tpte;
pt_entry_t PG_M, PG_RW, PG_V;
struct spglist free;
struct pv_chunklist free_chunks;
vm_page_t m, mpte, mt;
pv_entry_t pv;
struct md_page *pvh;
@ -6900,6 +6930,7 @@ pmap_remove_pages(pmap_t pmap)
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
TAILQ_INIT(&free_chunks);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@ -7027,13 +7058,14 @@ pmap_remove_pages(pmap_t pmap)
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
free_pv_chunk(pc);
TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
}
}
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
free_pv_chunk_batch(&free_chunks);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}