From 6cadbcd203aa488d843e488700537b35cbe664f1 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 9 Sep 2020 21:50:24 +0000 Subject: [PATCH] Add pmap_enter(9) PMAP_ENTER_LARGEPAGE flag and implement it on amd64. The flag requests entry of non-managed superpage mapping of size pagesizes[psind] into the page table. Pmap supports fake wiring of the largepage mappings. Only attributes of the largepage mapping can be changed by calling pmap_enter(9) over existing mapping, physical address of the page must be unchanged. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D24652 --- sys/amd64/amd64/pmap.c | 120 +++++++++++++++++++++++++++++++++++++++++ sys/vm/pmap.h | 1 + 2 files changed, 121 insertions(+) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 20fe315b4712..3f7876a91604 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6475,6 +6475,119 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, } #endif /* VM_NRESERVLEVEL > 0 */ +static int +pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, + int psind) +{ + vm_page_t mp; + pt_entry_t origpte, *pml4e, *pdpe, *pde, pten, PG_V; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(psind > 0 && psind < MAXPAGESIZES, + ("psind %d unexpected", psind)); + KASSERT(((newpte & PG_FRAME) & (pagesizes[psind] - 1)) == 0, + ("unaligned phys address %#lx newpte %#lx psind %d", + newpte & PG_FRAME, newpte, psind)); + KASSERT((va & (pagesizes[psind] - 1)) == 0, + ("unaligned va %#lx psind %d", va, psind)); + KASSERT(va < VM_MAXUSER_ADDRESS, + ("kernel mode non-transparent superpage")); /* XXXKIB */ + KASSERT(va + pagesizes[psind] < VM_MAXUSER_ADDRESS, + ("overflowing user map va %#lx psind %d", va, psind)); /* XXXKIB */ + + PG_V = pmap_valid_bit(pmap); + +restart: + pten = newpte; + if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) + pten |= pmap_pkru_get(pmap, va); + + if (psind == 2) { /* 1G */ + if (!pmap_pkru_same(pmap, va, va + NBPDP)) + return (KERN_PROTECTION_FAILURE); + pml4e = pmap_pml4e(pmap, va); + if ((*pml4e & PG_V) == 0) { + mp = _pmap_allocpte(pmap, pmap_pml4e_pindex(va), + NULL, va); + if (mp == NULL) { + if ((flags & PMAP_ENTER_NOSLEEP) != 0) + return (KERN_RESOURCE_SHORTAGE); + PMAP_UNLOCK(pmap); + vm_wait(NULL); + PMAP_LOCK(pmap); + + /* + * Restart at least to recalcuate the pkru + * key. Our caller must keep the map locked + * so no paging structure can be validated + * under us. + */ + goto restart; + } + pdpe = pmap_pdpe(pmap, va); + KASSERT(pdpe != NULL, ("va %#lx lost pdpe", va)); + origpte = *pdpe; + MPASS(origpte == 0); + } else { + mp = PHYS_TO_VM_PAGE(*pml4e & PG_FRAME); + pdpe = pmap_pdpe(pmap, va); + KASSERT(pdpe != NULL, ("va %#lx lost pdpe", va)); + origpte = *pdpe; + if ((origpte & PG_V) == 0) + mp->ref_count++; + } + KASSERT((origpte & PG_V) == 0 || ((origpte & PG_PS) != 0 && + (origpte & PG_FRAME) == (newpte & PG_FRAME)), + ("va %#lx changing 1G phys page pdpe %#lx newpte %#lx", + va, origpte, newpte)); + if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) + pmap->pm_stats.wired_count += NBPDP / PAGE_SIZE; + else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) + pmap->pm_stats.wired_count -= NBPDP / PAGE_SIZE; + *pdpe = newpte; + } else /* (psind == 1) */ { /* 2M */ + if (!pmap_pkru_same(pmap, va, va + NBPDR)) + return (KERN_PROTECTION_FAILURE); + pde = pmap_pde(pmap, va); + if (pde == NULL) { + mp = _pmap_allocpte(pmap, pmap_pdpe_pindex(va), + NULL, va); + if (mp == NULL) { + if ((flags & PMAP_ENTER_NOSLEEP) != 0) + return (KERN_RESOURCE_SHORTAGE); + PMAP_UNLOCK(pmap); + vm_wait(NULL); + PMAP_LOCK(pmap); + goto restart; + } + pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); + pde = &pde[pmap_pde_index(va)]; + origpte = *pde; + MPASS(origpte == 0); + } else { + pdpe = pmap_pdpe(pmap, va); + MPASS(pdpe != NULL && (*pdpe & PG_V) != 0); + mp = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); + origpte = *pde; + if ((origpte & PG_V) == 0) + mp->ref_count++; + } + KASSERT((origpte & PG_V) == 0 || ((origpte & PG_PS) != 0 && + (origpte & PG_FRAME) == (newpte & PG_FRAME)), + ("va %#lx changing 2M phys page pde %#lx newpte %#lx", + va, origpte, newpte)); + if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) + pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; + else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) + pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; + *pde = newpte; + } + if ((origpte & PG_V) == 0) + pmap_resident_count_inc(pmap, pagesizes[psind] / PAGE_SIZE); + + return (KERN_SUCCESS); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -6554,6 +6667,13 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, lock = NULL; PMAP_LOCK(pmap); + if ((flags & PMAP_ENTER_LARGEPAGE) != 0) { + KASSERT((m->oflags & VPO_UNMANAGED) != 0, + ("managed largepage va %#lx flags %#x", va, flags)); + rv = pmap_enter_largepage(pmap, va, newpte | PG_PS, flags, + psind); + goto out; + } if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 30a5f7ffc29d..1c521b26c958 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -106,6 +106,7 @@ extern vm_offset_t kernel_vm_end; */ #define PMAP_ENTER_NOSLEEP 0x00000100 #define PMAP_ENTER_WIRED 0x00000200 +#define PMAP_ENTER_LARGEPAGE 0x00000400 #define PMAP_ENTER_RESERVED 0xFF000000 /*