Add support for pmap_enter(..., psind=1) to the amd64 pmap. In other words,
add support for explicitly requesting that pmap_enter() create a 2MB page mapping. (Essentially, this feature allows the machine-independent layer to create superpage mappings preemptively, and not wait for automatic promotion to occur.) Export pmap_ps_enabled() to the machine-independent layer. Add a flag to pmap_pv_insert_pde() that specifies whether it should fail or reclaim a PV entry when one is not available. Refactor pmap_enter_pde() into two functions, one by the same name, that is a general-purpose function for creating PDE PG_PS mappings, and another, pmap_enter_2mpage(), that is used to prefault 2MB read- and/or execute-only mappings for execve(2), mmap(2), and shmat(2). Submitted by: Yufeng Zhou <yz70@rice.edu> (an earlier version) Reviewed by: kib, markj Tested by: pho MFC after: 10 days Differential Revision: https://reviews.freebsd.org/D11556
This commit is contained in:
parent
1d3b9818e7
commit
782e896088
@ -582,6 +582,12 @@ pmap_delayed_invl_page(vm_page_t m)
|
||||
*/
|
||||
static caddr_t crashdumpmap;
|
||||
|
||||
/*
|
||||
* Internal flags for pmap_enter()'s helper functions.
|
||||
*/
|
||||
#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */
|
||||
#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */
|
||||
|
||||
static void free_pv_chunk(struct pv_chunk *pc);
|
||||
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
|
||||
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
|
||||
@ -591,8 +597,8 @@ static void reserve_pv_entries(pmap_t pmap, int needed,
|
||||
struct rwlock **lockp);
|
||||
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
|
||||
struct rwlock **lockp);
|
||||
static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
|
||||
struct rwlock **lockp);
|
||||
static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde,
|
||||
u_int flags, struct rwlock **lockp);
|
||||
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
|
||||
struct rwlock **lockp);
|
||||
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
|
||||
@ -605,8 +611,10 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
|
||||
vm_offset_t va, struct rwlock **lockp);
|
||||
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
|
||||
vm_offset_t va);
|
||||
static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
|
||||
vm_prot_t prot, struct rwlock **lockp);
|
||||
static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
|
||||
vm_prot_t prot, struct rwlock **lockp);
|
||||
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
|
||||
u_int flags, vm_page_t m, struct rwlock **lockp);
|
||||
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
|
||||
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
|
||||
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
|
||||
@ -1389,7 +1397,7 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
|
||||
return (mask);
|
||||
}
|
||||
|
||||
static __inline boolean_t
|
||||
bool
|
||||
pmap_ps_enabled(pmap_t pmap)
|
||||
{
|
||||
|
||||
@ -3393,27 +3401,30 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally create the PV entry for a 2MB page mapping if the required
|
||||
* memory can be allocated without resorting to reclamation.
|
||||
* Create the PV entry for a 2MB page mapping. Always returns true unless the
|
||||
* flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns
|
||||
* false if the PV entry cannot be allocated without resorting to reclamation.
|
||||
*/
|
||||
static boolean_t
|
||||
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
|
||||
static bool
|
||||
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags,
|
||||
struct rwlock **lockp)
|
||||
{
|
||||
struct md_page *pvh;
|
||||
pv_entry_t pv;
|
||||
vm_paddr_t pa;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
/* Pass NULL instead of the lock pointer to disable reclamation. */
|
||||
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
|
||||
pv->pv_va = va;
|
||||
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
|
||||
pvh = pa_to_pvh(pa);
|
||||
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
|
||||
pvh->pv_gen++;
|
||||
return (TRUE);
|
||||
} else
|
||||
return (FALSE);
|
||||
if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
|
||||
NULL : lockp)) == NULL)
|
||||
return (false);
|
||||
pv->pv_va = va;
|
||||
pa = pde & PG_PS_FRAME;
|
||||
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
|
||||
pvh = pa_to_pvh(pa);
|
||||
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
|
||||
pvh->pv_gen++;
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4323,7 +4334,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
*/
|
||||
int
|
||||
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
u_int flags, int8_t psind __unused)
|
||||
u_int flags, int8_t psind)
|
||||
{
|
||||
struct rwlock *lock;
|
||||
pd_entry_t *pde;
|
||||
@ -4351,6 +4362,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
("pmap_enter: managed mapping within the clean submap"));
|
||||
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
|
||||
VM_OBJECT_ASSERT_LOCKED(m->object);
|
||||
KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
|
||||
("pmap_enter: flags %u has reserved bits set", flags));
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
newpte = (pt_entry_t)(pa | PG_A | PG_V);
|
||||
if ((flags & VM_PROT_WRITE) != 0)
|
||||
@ -4367,7 +4380,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
newpte |= PG_U;
|
||||
if (pmap == kernel_pmap)
|
||||
newpte |= PG_G;
|
||||
newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0);
|
||||
newpte |= pmap_cache_bits(pmap, m->md.pat_mode, psind > 0);
|
||||
|
||||
/*
|
||||
* Set modified bit gratuitously for writeable mappings if
|
||||
@ -4380,10 +4393,16 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
} else
|
||||
newpte |= PG_MANAGED;
|
||||
|
||||
mpte = NULL;
|
||||
|
||||
lock = NULL;
|
||||
PMAP_LOCK(pmap);
|
||||
if (psind == 1) {
|
||||
/* Assert the required virtual and physical alignment. */
|
||||
KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned"));
|
||||
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
|
||||
rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock);
|
||||
goto out;
|
||||
}
|
||||
mpte = NULL;
|
||||
|
||||
/*
|
||||
* In the case that a page table page is not
|
||||
@ -4543,48 +4562,118 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
}
|
||||
|
||||
/*
|
||||
* Tries to create a 2MB page mapping. Returns TRUE if successful and FALSE
|
||||
* otherwise. Fails if (1) a page table page cannot be allocated without
|
||||
* blocking, (2) a mapping already exists at the specified virtual address, or
|
||||
* (3) a pv entry cannot be allocated without reclaiming another pv entry.
|
||||
* Tries to create a read- and/or execute-only 2MB page mapping. Returns true
|
||||
* if successful. Returns false if (1) a page table page cannot be allocated
|
||||
* without sleeping, (2) a mapping already exists at the specified virtual
|
||||
* address, or (3) a PV entry cannot be allocated without reclaiming another
|
||||
* PV entry.
|
||||
*/
|
||||
static boolean_t
|
||||
pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
static bool
|
||||
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
struct rwlock **lockp)
|
||||
{
|
||||
pd_entry_t *pde, newpde;
|
||||
pd_entry_t newpde;
|
||||
pt_entry_t PG_V;
|
||||
vm_page_t pdpg;
|
||||
struct spglist free;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
PG_V = pmap_valid_bit(pmap);
|
||||
newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) |
|
||||
PG_PS | PG_V;
|
||||
if ((m->oflags & VPO_UNMANAGED) == 0)
|
||||
newpde |= PG_MANAGED;
|
||||
if ((prot & VM_PROT_EXECUTE) == 0)
|
||||
newpde |= pg_nx;
|
||||
if (va < VM_MAXUSER_ADDRESS)
|
||||
newpde |= PG_U;
|
||||
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
|
||||
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
|
||||
KERN_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if
|
||||
* the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
|
||||
* otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
|
||||
* a mapping already exists at the specified virtual address. Returns
|
||||
* KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
|
||||
* page allocation failed. Returns KERN_RESOURCE_SHORTAGE if
|
||||
* PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
|
||||
*
|
||||
* The parameter "m" is only used when creating a managed, writeable mapping.
|
||||
*/
|
||||
static int
|
||||
pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
|
||||
vm_page_t m, struct rwlock **lockp)
|
||||
{
|
||||
struct spglist free;
|
||||
pd_entry_t oldpde, *pde;
|
||||
pt_entry_t PG_G, PG_RW, PG_V;
|
||||
vm_page_t mt, pdpg;
|
||||
|
||||
PG_G = pmap_global_bit(pmap);
|
||||
PG_RW = pmap_rw_bit(pmap);
|
||||
KASSERT((newpde & (pmap_modified_bit(pmap) | PG_RW)) != PG_RW,
|
||||
("pmap_enter_pde: newpde is missing PG_M"));
|
||||
PG_V = pmap_valid_bit(pmap);
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
|
||||
if ((pdpg = pmap_allocpde(pmap, va, NULL)) == NULL) {
|
||||
if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ?
|
||||
NULL : lockp)) == NULL) {
|
||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
|
||||
" in pmap %p", va, pmap);
|
||||
return (FALSE);
|
||||
return (KERN_RESOURCE_SHORTAGE);
|
||||
}
|
||||
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
|
||||
pde = &pde[pmap_pde_index(va)];
|
||||
if ((*pde & PG_V) != 0) {
|
||||
oldpde = *pde;
|
||||
if ((oldpde & PG_V) != 0) {
|
||||
KASSERT(pdpg->wire_count > 1,
|
||||
("pmap_enter_pde: pdpg's wire count is too low"));
|
||||
pdpg->wire_count--;
|
||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
|
||||
" in pmap %p", va, pmap);
|
||||
return (FALSE);
|
||||
if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
|
||||
pdpg->wire_count--;
|
||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
|
||||
" in pmap %p", va, pmap);
|
||||
return (KERN_FAILURE);
|
||||
}
|
||||
/* Break the existing mapping(s). */
|
||||
SLIST_INIT(&free);
|
||||
if ((oldpde & PG_PS) != 0) {
|
||||
/*
|
||||
* The reference to the PD page that was acquired by
|
||||
* pmap_allocpde() ensures that it won't be freed.
|
||||
* However, if the PDE resulted from a promotion, then
|
||||
* a reserved PT page could be freed.
|
||||
*/
|
||||
(void)pmap_remove_pde(pmap, pde, va, &free, lockp);
|
||||
if ((oldpde & PG_G) == 0)
|
||||
pmap_invalidate_pde_page(pmap, va, oldpde);
|
||||
} else {
|
||||
pmap_delayed_invl_started();
|
||||
if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free,
|
||||
lockp))
|
||||
pmap_invalidate_all(pmap);
|
||||
pmap_delayed_invl_finished();
|
||||
}
|
||||
pmap_free_zero_pages(&free);
|
||||
if (va >= VM_MAXUSER_ADDRESS) {
|
||||
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
|
||||
if (pmap_insert_pt_page(pmap, mt)) {
|
||||
/*
|
||||
* XXX Currently, this can't happen because
|
||||
* we do not perform pmap_enter(psind == 1)
|
||||
* on the kernel pmap.
|
||||
*/
|
||||
panic("pmap_enter_pde: trie insert failed");
|
||||
}
|
||||
} else
|
||||
KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p",
|
||||
pde));
|
||||
}
|
||||
newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) |
|
||||
PG_PS | PG_V;
|
||||
if ((m->oflags & VPO_UNMANAGED) == 0) {
|
||||
newpde |= PG_MANAGED;
|
||||
|
||||
if ((newpde & PG_MANAGED) != 0) {
|
||||
/*
|
||||
* Abort this mapping if its PV entry could not be created.
|
||||
*/
|
||||
if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m),
|
||||
lockp)) {
|
||||
if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) {
|
||||
SLIST_INIT(&free);
|
||||
if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
|
||||
/*
|
||||
@ -4598,17 +4687,19 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
}
|
||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
|
||||
" in pmap %p", va, pmap);
|
||||
return (FALSE);
|
||||
return (KERN_RESOURCE_SHORTAGE);
|
||||
}
|
||||
if ((newpde & PG_RW) != 0) {
|
||||
for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
|
||||
vm_page_aflag_set(mt, PGA_WRITEABLE);
|
||||
}
|
||||
}
|
||||
if ((prot & VM_PROT_EXECUTE) == 0)
|
||||
newpde |= pg_nx;
|
||||
if (va < VM_MAXUSER_ADDRESS)
|
||||
newpde |= PG_U;
|
||||
|
||||
/*
|
||||
* Increment counters.
|
||||
*/
|
||||
if ((newpde & PG_W) != 0)
|
||||
pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE;
|
||||
pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
|
||||
|
||||
/*
|
||||
@ -4620,7 +4711,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
atomic_add_long(&pmap_pde_mappings, 1);
|
||||
CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
|
||||
" in pmap %p", va, pmap);
|
||||
return (TRUE);
|
||||
return (KERN_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4655,7 +4746,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
|
||||
va = start + ptoa(diff);
|
||||
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
|
||||
m->psind == 1 && pmap_ps_enabled(pmap) &&
|
||||
pmap_enter_pde(pmap, va, m, prot, &lock))
|
||||
pmap_enter_2mpage(pmap, va, m, prot, &lock))
|
||||
m = &m[NBPDR / PAGE_SIZE - 1];
|
||||
else
|
||||
mpte = pmap_enter_quick_locked(pmap, va, m, prot,
|
||||
@ -5088,8 +5179,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
|
||||
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg));
|
||||
pde = &pde[pmap_pde_index(addr)];
|
||||
if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
|
||||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
|
||||
PG_PS_FRAME, &lock))) {
|
||||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr,
|
||||
PMAP_ENTER_NORECLAIM, &lock))) {
|
||||
*pde = srcptepaddr & ~PG_W;
|
||||
pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
|
||||
atomic_add_long(&pmap_pde_mappings, 1);
|
||||
|
@ -408,6 +408,7 @@ void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
|
||||
boolean_t pmap_page_is_mapped(vm_page_t m);
|
||||
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
|
||||
void pmap_pinit_pml4(vm_page_t);
|
||||
bool pmap_ps_enabled(pmap_t pmap);
|
||||
void pmap_unmapdev(vm_offset_t, vm_size_t);
|
||||
void pmap_invalidate_page(pmap_t, vm_offset_t);
|
||||
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
|
||||
|
@ -100,9 +100,11 @@ extern vm_offset_t kernel_vm_end;
|
||||
/*
|
||||
* Flags for pmap_enter(). The bits in the low-order byte are reserved
|
||||
* for the protection code (vm_prot_t) that describes the fault type.
|
||||
* Bits 24 through 31 are reserved for the pmap's internal use.
|
||||
*/
|
||||
#define PMAP_ENTER_NOSLEEP 0x0100
|
||||
#define PMAP_ENTER_WIRED 0x0200
|
||||
#define PMAP_ENTER_NOSLEEP 0x00000100
|
||||
#define PMAP_ENTER_WIRED 0x00000200
|
||||
#define PMAP_ENTER_RESERVED 0xFF000000
|
||||
|
||||
/*
|
||||
* Define the maximum number of machine-dependent reference bits that are
|
||||
|
Loading…
Reference in New Issue
Block a user