Disable promotion on pcpu memory on arm64

We need to be careful to not promote or demote the memory containing
the per-CPU structures as the exception handlers will dereference it
so any time it's invalid may cause recursive exceptions.

Add a new pmap function to set a flag in the pte marking memory that
cannot be promoted or demoted and use it to mark pcpu memory.

Sponsored by:	The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D35434
This commit is contained in:
Andrew Turner 2022-04-29 10:30:38 +01:00
parent f9572577cf
commit e3917bb256
4 changed files with 80 additions and 37 deletions

View File

@ -495,6 +495,8 @@ static bool
start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
{
struct pcpu *pcpup;
vm_offset_t pcpu_mem;
vm_size_t size;
vm_paddr_t pa;
int err, naps;
@ -508,13 +510,16 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
KASSERT(cpuid < MAXCPU, ("Too many CPUs"));
pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
sizeof(*pcpup), M_WAITOK | M_ZERO);
size = round_page(sizeof(*pcpup) + DPCPU_SIZE);
pcpu_mem = kmem_malloc_domainset(DOMAINSET_PREF(domain), size,
M_WAITOK | M_ZERO);
pmap_disable_promotion(pcpu_mem, size);
pcpup = (struct pcpu *)pcpu_mem;
pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset(
DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO);
dpcpu[cpuid - 1] = (void *)(pcpup + 1);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
bootstacks[cpuid] = (void *)kmem_malloc_domainset(
@ -538,9 +543,9 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
cpuid, target_cpu, err));
pcpu_destroy(pcpup);
kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
dpcpu[cpuid - 1] = NULL;
kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE);
kmem_free(pcpu_mem, size);
bootstacks[cpuid] = NULL;
mp_ncpus--;
return (false);

View File

@ -3467,7 +3467,7 @@ pmap_remove_all(vm_page_t m)
}
/*
* pmap_protect_l2: do the things to protect a 2MB page in a pmap
* Masks and sets bits in a level 2 page table entries in the specified pmap
*/
static void
pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
@ -3515,34 +3515,16 @@ pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
}
/*
* Set the physical protection on the
* specified range of this map as requested.
* Masks and sets bits in last level page table entries in the specified
* pmap and range
*/
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
static void
pmap_mask_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t mask,
pt_entry_t nbits, bool invalidate)
{
vm_offset_t va, va_next;
pd_entry_t *l0, *l1, *l2;
pt_entry_t *l3p, l3, mask, nbits;
PMAP_ASSERT_STAGE1(pmap);
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
mask = nbits = 0;
if ((prot & VM_PROT_WRITE) == 0) {
mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM;
nbits |= ATTR_S1_AP(ATTR_S1_AP_RO);
}
if ((prot & VM_PROT_EXECUTE) == 0) {
mask |= ATTR_S1_XN;
nbits |= ATTR_S1_XN;
}
if (mask == 0)
return;
pt_entry_t *l3p, l3;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
@ -3569,7 +3551,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0);
if ((pmap_load(l1) & mask) != nbits) {
pmap_store(l1, (pmap_load(l1) & ~mask) | nbits);
pmap_invalidate_page(pmap, sva, true);
if (invalidate)
pmap_invalidate_page(pmap, sva, true);
}
continue;
}
@ -3610,8 +3593,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
*/
if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) {
if (va != va_next) {
pmap_invalidate_range(pmap, va, sva,
true);
if (invalidate)
pmap_invalidate_range(pmap,
va, sva, true);
va = va_next;
}
continue;
@ -3633,12 +3617,54 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
if (va == va_next)
va = sva;
}
if (va != va_next)
if (va != va_next && invalidate)
pmap_invalidate_range(pmap, va, sva, true);
}
PMAP_UNLOCK(pmap);
}
/*
* Set the physical protection on the
* specified range of this map as requested.
*/
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
pt_entry_t mask, nbits;
PMAP_ASSERT_STAGE1(pmap);
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
mask = nbits = 0;
if ((prot & VM_PROT_WRITE) == 0) {
mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM;
nbits |= ATTR_S1_AP(ATTR_S1_AP_RO);
}
if ((prot & VM_PROT_EXECUTE) == 0) {
mask |= ATTR_S1_XN;
nbits |= ATTR_S1_XN;
}
if (mask == 0)
return;
pmap_mask_set(pmap, sva, eva, mask, nbits, true);
}
void
pmap_disable_promotion(vm_offset_t sva, vm_size_t size)
{
MPASS((sva & L3_OFFSET) == 0);
MPASS(((sva + size) & L3_OFFSET) == 0);
pmap_mask_set(kernel_pmap, sva, sva + size, ATTR_SW_NO_PROMOTE,
ATTR_SW_NO_PROMOTE, false);
}
/*
* Inserts the specified page table page into the specified pmap's collection
* of idle page table pages. Each of a pmap's page table pages is responsible
@ -3683,6 +3709,9 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
panic("%s: Updating non-promote pte", __func__);
/*
* Ensure we don't get switched out with the page table in an
* inconsistent state. We also need to ensure no interrupts fire
@ -3775,7 +3804,8 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
firstl3 = pmap_l2_to_l3(l2, sva);
newl2 = pmap_load(firstl3);
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) {
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
(newl2 & ATTR_SW_NO_PROMOTE) != 0) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
@ -6284,6 +6314,9 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
break;
}
} else {
/* We can't demote/promote this entry */
MPASS((pmap_load(ptep) & ATTR_SW_NO_PROMOTE) == 0);
/*
* Split the entry to an level 3 table, then
* set the new attribute.
@ -6375,6 +6408,8 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
("pmap_demote_l1: Invalid virtual address %#lx", va));
KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
("pmap_demote_l1: Level 1 table shouldn't be managed"));
KASSERT((oldl1 & ATTR_SW_NO_PROMOTE) == 0,
("pmap_demote_l1: Demoting entry with no-demote flag set"));
tmpl1 = 0;
if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
@ -6470,6 +6505,8 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
oldl2 = pmap_load(l2);
KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_demote_l2: Demoting a non-block entry"));
KASSERT((oldl2 & ATTR_SW_NO_PROMOTE) == 0,
("pmap_demote_l2: Demoting entry with no-demote flag set"));
va &= ~L2_OFFSET;
tmpl2 = 0;

View File

@ -189,6 +189,7 @@ bool pmap_page_is_mapped(vm_page_t m);
int pmap_pinit_stage(pmap_t, enum pmap_stage, int);
bool pmap_ps_enabled(pmap_t pmap);
uint64_t pmap_to_ttbr0(pmap_t pmap);
void pmap_disable_promotion(vm_offset_t sva, vm_size_t size);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapbios(vm_paddr_t, vm_size_t);

View File

@ -52,8 +52,8 @@ typedef uint64_t pt_entry_t; /* page table entry */
#define ATTR_MASK_L UINT64_C(0x0000000000000fff)
#define ATTR_MASK (ATTR_MASK_H | ATTR_MASK_L)
/* Bits 58:55 are reserved for software */
#define ATTR_SW_UNUSED2 (1UL << 58)
#define ATTR_SW_UNUSED1 (1UL << 57)
#define ATTR_SW_UNUSED1 (1UL << 58)
#define ATTR_SW_NO_PROMOTE (1UL << 57)
#define ATTR_SW_MANAGED (1UL << 56)
#define ATTR_SW_WIRED (1UL << 55)