powerpc/pmap: Simplify Book-E 64-bit page table management

There is no need for the 64-bit pmap to have a fixed number of page table
buffers.  Since the 64-bit pmap has a DMAP, we can effectively have user
page tables limited only by total RAM size.
This commit is contained in:
Justin Hibbits 2019-08-06 03:16:06 +00:00
parent e48c002fa4
commit dc825fed55

View File

@ -59,8 +59,7 @@
* 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved
* 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data
* endkernel - msgbufp-1 : flat device tree
* msgbufp - ptbl_bufs-1 : message buffer
* ptbl_bufs - kernel_pdir-1 : kernel page tables
* msgbufp - kernel_pdir-1 : message buffer
* kernel_pdir - kernel_pp2d-1 : kernel page directory
* kernel_pp2d - . : kernel pointers to page directory
* pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy
@ -262,11 +261,6 @@ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
#define PMAP_SHPGPERPROC 200
#endif
static void ptbl_init(void);
static struct ptbl_buf *ptbl_buf_alloc(void);
static void ptbl_buf_free(struct ptbl_buf *);
static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
#ifdef __powerpc64__
static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **,
unsigned int, boolean_t);
@ -274,6 +268,11 @@ static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int);
static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int);
static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t);
#else
static void ptbl_init(void);
static struct ptbl_buf *ptbl_buf_alloc(void);
static void ptbl_buf_free(struct ptbl_buf *);
static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t);
static void ptbl_free(mmu_t, pmap_t, unsigned int);
static void ptbl_hold(mmu_t, pmap_t, unsigned int);
@ -293,18 +292,15 @@ static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
static void booke_pmap_init_qpages(void);
/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
#ifdef __powerpc64__
#define PTBL_BUFS (16UL * 16 * 16)
#else
#define PTBL_BUFS (128 * 16)
#endif
struct ptbl_buf {
TAILQ_ENTRY(ptbl_buf) link; /* list link */
vm_offset_t kva; /* va of mapping */
};
#ifndef __powerpc64__
/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
#define PTBL_BUFS (128 * 16)
/* ptbl free list and a lock used for access synchronization. */
static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
static struct mtx ptbl_buf_freelist_lock;
@ -314,6 +310,7 @@ static vm_offset_t ptbl_buf_pool_vabase;
/* Pointer to ptbl_buf structures. */
static struct ptbl_buf *ptbl_bufs;
#endif
#ifdef SMP
extern tlb_entry_t __boot_tlb1[];
@ -574,61 +571,6 @@ tlb1_get_tlbconf(void)
static void
ptbl_init(void)
{
int i;
mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
TAILQ_INIT(&ptbl_buf_freelist);
for (i = 0; i < PTBL_BUFS; i++) {
ptbl_bufs[i].kva = ptbl_buf_pool_vabase +
i * MAX(PTBL_PAGES,PDIR_PAGES) * PAGE_SIZE;
TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
}
}
/* Get an sf_buf from the freelist. */
static struct ptbl_buf *
ptbl_buf_alloc(void)
{
struct ptbl_buf *buf;
mtx_lock(&ptbl_buf_freelist_lock);
buf = TAILQ_FIRST(&ptbl_buf_freelist);
if (buf != NULL)
TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
mtx_unlock(&ptbl_buf_freelist_lock);
return (buf);
}
/* Return ptbl buff to free pool. */
static void
ptbl_buf_free(struct ptbl_buf *buf)
{
mtx_lock(&ptbl_buf_freelist_lock);
TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
mtx_unlock(&ptbl_buf_freelist_lock);
}
/*
* Search the list of allocated ptbl bufs and find on list of allocated ptbls
*/
static void
ptbl_free_pmap_ptbl(pmap_t pmap, pte_t * ptbl)
{
struct ptbl_buf *pbuf;
TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) {
if (pbuf->kva == (vm_offset_t) ptbl) {
/* Remove from pmap ptbl buf list. */
TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
/* Free corresponding ptbl buf. */
ptbl_buf_free(pbuf);
break;
}
}
}
/* Get a pointer to a PTE in a page table. */
@ -648,25 +590,36 @@ pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
}
/*
* Search the list of allocated pdir bufs and find on list of allocated pdirs
* allocate a page of pointers to page directories, do not preallocate the
* page tables
*/
static void
ptbl_free_pmap_pdir(mmu_t mmu, pmap_t pmap, pte_t ** pdir)
static pte_t **
pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep)
{
struct ptbl_buf *pbuf;
vm_page_t m;
pte_t **pdir;
int req;
TAILQ_FOREACH(pbuf, &pmap->pm_pdir_list, link) {
if (pbuf->kva == (vm_offset_t) pdir) {
/* Remove from pmap ptbl buf list. */
TAILQ_REMOVE(&pmap->pm_pdir_list, pbuf, link);
KASSERT((pdir[pdir_idx] == NULL),
("%s: valid pdir entry exists!", __func__));
/* Free corresponding pdir buf. */
ptbl_buf_free(pbuf);
break;
req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) {
PMAP_UNLOCK(pmap);
if (nosleep) {
return (NULL);
}
vm_wait(NULL);
PMAP_LOCK(pmap);
}
/* Zero whole ptbl. */
pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
bzero(pdir, PAGE_SIZE);
return (pdir);
}
/* Free pdir pages and invalidate pdir entry. */
static void
pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx)
@ -675,7 +628,6 @@ pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx)
vm_paddr_t pa;
vm_offset_t va;
vm_page_t m;
int i;
pdir = pmap->pm_pp2d[pp2d_idx];
@ -683,16 +635,10 @@ pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx)
pmap->pm_pp2d[pp2d_idx] = NULL;
for (i = 0; i < PDIR_PAGES; i++) {
va = ((vm_offset_t) pdir + (i * PAGE_SIZE));
pa = pte_vatopa(mmu, kernel_pmap, va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
vm_wire_sub(1);
pmap_kremove(va);
}
ptbl_free_pmap_pdir(mmu, pmap, pdir);
va = (vm_offset_t) pdir;
pa = DMAP_TO_PHYS(va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
}
/*
@ -707,30 +653,20 @@ pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx)
pte_t **pdir;
vm_paddr_t pa;
vm_page_t m;
int i;
KASSERT((pmap != kernel_pmap),
("pdir_unhold: unholding kernel pdir!"));
pdir = pmap->pm_pp2d[pp2d_idx];
KASSERT(((vm_offset_t) pdir >= VM_MIN_KERNEL_ADDRESS),
("pdir_unhold: non kva pdir"));
/* decrement hold count */
for (i = 0; i < PDIR_PAGES; i++) {
pa = pte_vatopa(mmu, kernel_pmap,
(vm_offset_t) pdir + (i * PAGE_SIZE));
m = PHYS_TO_VM_PAGE(pa);
m->wire_count--;
}
pa = DMAP_TO_PHYS((vm_offset_t) pdir);
m = PHYS_TO_VM_PAGE(pa);
/*
* Free pdir pages if there are no dir entries in this pdir.
* wire_count has the same value for all ptbl pages, so check the
* last page.
* Free pdir page if there are no dir entries in this pdir.
*/
if (m->wire_count == 0) {
if (vm_page_unwire_noq(m)) {
pdir_free(mmu, pmap, pp2d_idx);
return (1);
}
@ -744,21 +680,15 @@ pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx)
static void
pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir)
{
vm_paddr_t pa;
vm_page_t m;
int i;
KASSERT((pmap != kernel_pmap),
("pdir_hold: holding kernel pdir!"));
KASSERT((pdir != NULL), ("pdir_hold: null pdir"));
for (i = 0; i < PDIR_PAGES; i++) {
pa = pte_vatopa(mmu, kernel_pmap,
(vm_offset_t) pdir + (i * PAGE_SIZE));
m = PHYS_TO_VM_PAGE(pa);
m->wire_count++;
}
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir));
vm_page_wire(m);
}
/* Allocate page table. */
@ -766,50 +696,28 @@ static pte_t *
ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,
boolean_t nosleep)
{
vm_page_t mtbl [PTBL_PAGES];
vm_page_t m;
struct ptbl_buf *pbuf;
unsigned int pidx;
pte_t *ptbl;
int i, j;
int req;
KASSERT((pdir[pdir_idx] == NULL),
("%s: valid ptbl entry exists!", __func__));
pbuf = ptbl_buf_alloc();
if (pbuf == NULL)
panic("%s: couldn't alloc kernel virtual memory", __func__);
ptbl = (pte_t *) pbuf->kva;
for (i = 0; i < PTBL_PAGES; i++) {
pidx = (PTBL_PAGES * pdir_idx) + i;
req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) {
PMAP_UNLOCK(pmap);
rw_wunlock(&pvh_global_lock);
if (nosleep) {
ptbl_free_pmap_ptbl(pmap, ptbl);
for (j = 0; j < i; j++)
vm_page_free(mtbl[j]);
vm_wire_sub(i);
return (NULL);
}
vm_wait(NULL);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) {
PMAP_UNLOCK(pmap);
rw_wunlock(&pvh_global_lock);
if (nosleep) {
return (NULL);
}
mtbl[i] = m;
vm_wait(NULL);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
/* Mapin allocated pages into kernel_pmap. */
mmu_booke_qenter(mmu, (vm_offset_t) ptbl, mtbl, PTBL_PAGES);
/* Zero whole ptbl. */
bzero((caddr_t) ptbl, PTBL_PAGES * PAGE_SIZE);
/* Add pbuf to the pmap ptbl bufs list. */
TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
bzero(ptbl, PAGE_SIZE);
return (ptbl);
}
@ -822,7 +730,6 @@ ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
vm_paddr_t pa;
vm_offset_t va;
vm_page_t m;
int i;
ptbl = pdir[pdir_idx];
@ -830,16 +737,10 @@ ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
pdir[pdir_idx] = NULL;
for (i = 0; i < PTBL_PAGES; i++) {
va = ((vm_offset_t) ptbl + (i * PAGE_SIZE));
pa = pte_vatopa(mmu, kernel_pmap, va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
vm_wire_sub(1);
pmap_kremove(va);
}
ptbl_free_pmap_ptbl(pmap, ptbl);
va = (vm_offset_t) ptbl;
pa = DMAP_TO_PHYS(va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
}
/*
@ -852,12 +753,10 @@ static int
ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va)
{
pte_t *ptbl;
vm_paddr_t pa;
vm_page_t m;
u_int pp2d_idx;
pte_t **pdir;
u_int pdir_idx;
int i;
pp2d_idx = PP2D_IDX(va);
pdir_idx = PDIR_IDX(va);
@ -868,30 +767,15 @@ ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va)
pdir = pmap->pm_pp2d[pp2d_idx];
ptbl = pdir[pdir_idx];
KASSERT(((vm_offset_t) ptbl >= VM_MIN_KERNEL_ADDRESS),
("ptbl_unhold: non kva ptbl"));
/* decrement hold count */
for (i = 0; i < PTBL_PAGES; i++) {
pa = pte_vatopa(mmu, kernel_pmap,
(vm_offset_t) ptbl + (i * PAGE_SIZE));
m = PHYS_TO_VM_PAGE(pa);
m->wire_count--;
}
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl));
/*
* Free ptbl pages if there are no pte entries in this ptbl.
* wire_count has the same value for all ptbl pages, so check the
* last page.
*/
if (m->wire_count == 0) {
/* A pair of indirect entries might point to this ptbl page */
#if 0
tlb_flush_entry(pmap, va & ~((2UL * PAGE_SIZE_1M) - 1),
TLB_SIZE_1M, MAS6_SIND);
tlb_flush_entry(pmap, (va & ~((2UL * PAGE_SIZE_1M) - 1)) | PAGE_SIZE_1M,
TLB_SIZE_1M, MAS6_SIND);
#endif
if (vm_page_unwire_noq(m)) {
ptbl_free(mmu, pmap, pdir, pdir_idx);
pdir_unhold(mmu, pmap, pp2d_idx);
return (1);
@ -906,10 +790,8 @@ ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va)
static void
ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
{
vm_paddr_t pa;
pte_t *ptbl;
vm_page_t m;
int i;
KASSERT((pmap != kernel_pmap),
("ptbl_hold: holding kernel ptbl!"));
@ -918,12 +800,8 @@ ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
for (i = 0; i < PTBL_PAGES; i++) {
pa = pte_vatopa(mmu, kernel_pmap,
(vm_offset_t) ptbl + (i * PAGE_SIZE));
m = PHYS_TO_VM_PAGE(pa);
m->wire_count++;
}
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl));
vm_page_wire(m);
}
#else
@ -1327,51 +1205,6 @@ pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags)
return (0);
}
/*
* allocate a page of pointers to page directories, do not preallocate the
* page tables
*/
static pte_t **
pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep)
{
vm_page_t mtbl [PDIR_PAGES];
vm_page_t m;
struct ptbl_buf *pbuf;
pte_t **pdir;
unsigned int pidx;
int i;
int req;
pbuf = ptbl_buf_alloc();
if (pbuf == NULL)
panic("%s: couldn't alloc kernel virtual memory", __func__);
/* Allocate pdir pages, this will sleep! */
for (i = 0; i < PDIR_PAGES; i++) {
pidx = (PDIR_PAGES * pp2d_idx) + i;
req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) {
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
}
mtbl[i] = m;
}
/* Mapin allocated pages into kernel_pmap. */
pdir = (pte_t **) pbuf->kva;
pmap_qenter((vm_offset_t) pdir, mtbl, PDIR_PAGES);
/* Zero whole pdir. */
bzero((caddr_t) pdir, PDIR_PAGES * PAGE_SIZE);
/* Add pdir to the pmap pdir bufs list. */
TAILQ_INSERT_TAIL(&pmap->pm_pdir_list, pbuf, link);
return pdir;
}
/*
* Insert PTE for a given page and virtual address.
*/
@ -1485,8 +1318,8 @@ kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir)
for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES));
j < PDIR_NENTRIES; j++) {
kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] =
(pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE * PDIR_PAGES) +
(((i * PDIR_NENTRIES) + j) * PAGE_SIZE * PTBL_PAGES));
(pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) +
(((i * PDIR_NENTRIES) + j) * PAGE_SIZE));
}
}
@ -1764,6 +1597,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
data_end = round_page(data_end);
#ifndef __powerpc64__
/* Allocate space for ptbl_bufs. */
ptbl_bufs = (struct ptbl_buf *)data_end;
data_end += sizeof(struct ptbl_buf) * PTBL_BUFS;
@ -1771,6 +1605,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
(uintptr_t)ptbl_bufs, data_end);
data_end = round_page(data_end);
#endif
/* Allocate PTE tables for kernel KVA. */
kernel_pdir = data_end;
@ -1799,12 +1634,12 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
* all range up to the currently calculated 'data_end' is covered.
*/
dpcpu_init(dpcpu, 0);
memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE);
#ifdef __powerpc64__
memset((void *)kernel_pdir, 0,
kernel_pdirs * PDIR_PAGES * PAGE_SIZE +
kernel_ptbls * PTBL_PAGES * PAGE_SIZE);
#else
memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE);
memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE);
#endif
@ -1829,11 +1664,13 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
#ifndef __powerpc64__
/* Allocate KVA space for ptbl bufs. */
ptbl_buf_pool_vabase = virtual_avail;
virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
ptbl_buf_pool_vabase, virtual_avail);
#endif
/* Calculate corresponding physical addresses for the kernel region. */
phys_kernelend = kernload + kernsize;