Add kernel support for Intel userspace protection keys feature on
Skylake Xeons. See SDM rev. 68 Vol 3 4.6.2 Protection Keys and the description of the RDPKRU and WRPKRU instructions. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D18893
This commit is contained in:
parent
a32144e6e2
commit
4adce57d6f
@ -233,6 +233,9 @@ initializecpu(void)
|
||||
if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
|
||||
cr4 |= CR4_FSGSBASE;
|
||||
|
||||
if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
|
||||
cr4 |= CR4_PKE;
|
||||
|
||||
/*
|
||||
* Postpone enabling the SMEP on the boot CPU until the page
|
||||
* tables are switched from the boot loader identity mapping
|
||||
|
@ -48,7 +48,7 @@
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 2003 Networks Associates Technology, Inc.
|
||||
* Copyright (c) 2014-2018 The FreeBSD Foundation
|
||||
* Copyright (c) 2014-2019 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed for the FreeBSD Project by Jake Burkholder,
|
||||
@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/mman.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rangeset.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/turnstile.h>
|
||||
@ -155,6 +156,7 @@ __FBSDID("$FreeBSD$");
|
||||
#ifdef SMP
|
||||
#include <machine/smp.h>
|
||||
#endif
|
||||
#include <machine/sysarch.h>
|
||||
#include <machine/tss.h>
|
||||
|
||||
static __inline boolean_t
|
||||
@ -285,6 +287,13 @@ pmap_modified_bit(pmap_t pmap)
|
||||
return (mask);
|
||||
}
|
||||
|
||||
static __inline pt_entry_t
|
||||
pmap_pku_mask_bit(pmap_t pmap)
|
||||
{
|
||||
|
||||
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
|
||||
}
|
||||
|
||||
#if !defined(DIAGNOSTIC)
|
||||
#ifdef __GNUC_GNU_INLINE__
|
||||
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
|
||||
@ -424,6 +433,22 @@ static pml4_entry_t *pti_pml4;
|
||||
static vm_pindex_t pti_pg_idx;
|
||||
static bool pti_finalized;
|
||||
|
||||
struct pmap_pkru_range {
|
||||
struct rs_el pkru_rs_el;
|
||||
u_int pkru_keyidx;
|
||||
int pkru_flags;
|
||||
};
|
||||
|
||||
static uma_zone_t pmap_pkru_ranges_zone;
|
||||
static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
|
||||
static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va);
|
||||
static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
|
||||
static void *pkru_dup_range(void *ctx, void *data);
|
||||
static void pkru_free_range(void *ctx, void *node);
|
||||
static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap);
|
||||
static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
|
||||
static void pmap_pkru_deassign_all(pmap_t pmap);
|
||||
|
||||
static int
|
||||
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
@ -2846,6 +2871,12 @@ pmap_pinit0(pmap_t pmap)
|
||||
pmap->pm_pcids[i].pm_gen = 1;
|
||||
}
|
||||
pmap_activate_boot(pmap);
|
||||
|
||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
|
||||
pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
|
||||
sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL,
|
||||
UMA_ALIGN_PTR, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2934,6 +2965,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
pmap_pinit_pml4_pti(pml4pgu);
|
||||
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu);
|
||||
}
|
||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
|
||||
rangeset_init(&pmap->pm_pkru, pkru_dup_range,
|
||||
pkru_free_range, pmap, M_NOWAIT);
|
||||
}
|
||||
}
|
||||
|
||||
pmap->pm_root.rt_root = 0;
|
||||
@ -3230,6 +3265,9 @@ pmap_release(pmap_t pmap)
|
||||
vm_page_unwire_noq(m);
|
||||
vm_page_free(m);
|
||||
}
|
||||
if (pmap->pm_type == PT_X86 &&
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
|
||||
rangeset_fini(&pmap->pm_pkru);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -4060,7 +4098,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
{
|
||||
pd_entry_t newpde, oldpde;
|
||||
pt_entry_t *firstpte, newpte;
|
||||
pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V;
|
||||
pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
|
||||
vm_paddr_t mptepa;
|
||||
vm_page_t mpte;
|
||||
struct spglist free;
|
||||
@ -4073,6 +4111,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
PG_RW = pmap_rw_bit(pmap);
|
||||
PG_V = pmap_valid_bit(pmap);
|
||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
|
||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
oldpde = *pde;
|
||||
@ -4505,6 +4544,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
out:
|
||||
if (anyvalid)
|
||||
pmap_invalidate_all(pmap);
|
||||
pmap_pkru_on_remove(pmap, sva, eva);
|
||||
PMAP_UNLOCK(pmap);
|
||||
pmap_delayed_invl_finished();
|
||||
vm_page_free_pages_toq(&free, true);
|
||||
@ -4816,7 +4856,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
{
|
||||
pd_entry_t newpde;
|
||||
pt_entry_t *firstpte, oldpte, pa, *pte;
|
||||
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
|
||||
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
|
||||
vm_page_t mpte;
|
||||
int PG_PTE_CACHE;
|
||||
|
||||
@ -4825,6 +4865,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
|
||||
PG_M = pmap_modified_bit(pmap);
|
||||
PG_V = pmap_valid_bit(pmap);
|
||||
PG_RW = pmap_rw_bit(pmap);
|
||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
|
||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
@ -5052,6 +5093,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
|
||||
|
||||
origpte = *pte;
|
||||
pv = NULL;
|
||||
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86)
|
||||
newpte |= pmap_pkru_get(pmap, va);
|
||||
|
||||
/*
|
||||
* Is the specified virtual address already mapped?
|
||||
@ -5271,6 +5314,25 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
|
||||
" in pmap %p", va, pmap);
|
||||
return (KERN_RESOURCE_SHORTAGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If pkru is not same for the whole pde range, return failure
|
||||
* and let vm_fault() cope. Check after pde allocation, since
|
||||
* it could sleep.
|
||||
*/
|
||||
if (!pmap_pkru_same(pmap, va, va + NBPDR)) {
|
||||
SLIST_INIT(&free);
|
||||
if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
|
||||
pmap_invalidate_page(pmap, va);
|
||||
vm_page_free_pages_toq(&free, true);
|
||||
}
|
||||
return (KERN_FAILURE);
|
||||
}
|
||||
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) {
|
||||
newpde &= ~X86_PG_PKU_MASK;
|
||||
newpde |= pmap_pkru_get(pmap, va);
|
||||
}
|
||||
|
||||
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
|
||||
pde = &pde[pmap_pde_index(va)];
|
||||
oldpde = *pde;
|
||||
@ -5530,7 +5592,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
|
||||
if ((prot & VM_PROT_EXECUTE) == 0)
|
||||
newpte |= pg_nx;
|
||||
if (va < VM_MAXUSER_ADDRESS)
|
||||
newpte |= PG_U;
|
||||
newpte |= PG_U | pmap_pkru_get(pmap, va);
|
||||
pte_store(pte, newpte);
|
||||
return (mpte);
|
||||
}
|
||||
@ -5906,6 +5968,36 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
|
||||
PMAP_UNLOCK(dst_pmap);
|
||||
}
|
||||
|
||||
int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (dst_pmap->pm_type != src_pmap->pm_type ||
|
||||
dst_pmap->pm_type != PT_X86 ||
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
|
||||
return (0);
|
||||
for (;;) {
|
||||
if (dst_pmap < src_pmap) {
|
||||
PMAP_LOCK(dst_pmap);
|
||||
PMAP_LOCK(src_pmap);
|
||||
} else {
|
||||
PMAP_LOCK(src_pmap);
|
||||
PMAP_LOCK(dst_pmap);
|
||||
}
|
||||
error = pmap_pkru_copy(dst_pmap, src_pmap);
|
||||
/* Clean up partial copy on failure due to no memory. */
|
||||
if (error == ENOMEM)
|
||||
pmap_pkru_deassign_all(dst_pmap);
|
||||
PMAP_UNLOCK(src_pmap);
|
||||
PMAP_UNLOCK(dst_pmap);
|
||||
if (error != ENOMEM)
|
||||
break;
|
||||
vm_wait(NULL);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero the specified hardware page.
|
||||
*/
|
||||
@ -6305,6 +6397,7 @@ pmap_remove_pages(pmap_t pmap)
|
||||
if (lock != NULL)
|
||||
rw_wunlock(lock);
|
||||
pmap_invalidate_all(pmap);
|
||||
pmap_pkru_deassign_all(pmap);
|
||||
PMAP_UNLOCK(pmap);
|
||||
vm_page_free_pages_toq(&free, true);
|
||||
}
|
||||
@ -8941,6 +9034,285 @@ pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva)
|
||||
VM_OBJECT_WUNLOCK(pti_obj);
|
||||
}
|
||||
|
||||
static void *
|
||||
pkru_dup_range(void *ctx __unused, void *data)
|
||||
{
|
||||
struct pmap_pkru_range *node, *new_node;
|
||||
|
||||
new_node = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
|
||||
if (new_node == NULL)
|
||||
return (NULL);
|
||||
node = data;
|
||||
memcpy(new_node, node, sizeof(*node));
|
||||
return (new_node);
|
||||
}
|
||||
|
||||
static void
|
||||
pkru_free_range(void *ctx __unused, void *node)
|
||||
{
|
||||
|
||||
uma_zfree(pmap_pkru_ranges_zone, node);
|
||||
}
|
||||
|
||||
static int
|
||||
pmap_pkru_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
|
||||
int flags)
|
||||
{
|
||||
struct pmap_pkru_range *ppr;
|
||||
int error;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
MPASS(pmap->pm_type == PT_X86);
|
||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
|
||||
if ((flags & AMD64_PKRU_EXCL) != 0 &&
|
||||
!rangeset_check_empty(&pmap->pm_pkru, sva, eva))
|
||||
return (EBUSY);
|
||||
ppr = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
|
||||
if (ppr == NULL)
|
||||
return (ENOMEM);
|
||||
ppr->pkru_keyidx = keyidx;
|
||||
ppr->pkru_flags = flags & AMD64_PKRU_PERSIST;
|
||||
error = rangeset_insert(&pmap->pm_pkru, sva, eva, ppr);
|
||||
if (error != 0)
|
||||
uma_zfree(pmap_pkru_ranges_zone, ppr);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
{
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
MPASS(pmap->pm_type == PT_X86);
|
||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
|
||||
return (rangeset_remove(&pmap->pm_pkru, sva, eva));
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pkru_deassign_all(pmap_t pmap)
|
||||
{
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
if (pmap->pm_type == PT_X86 &&
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
|
||||
rangeset_remove_all(&pmap->pm_pkru);
|
||||
}
|
||||
|
||||
static bool
|
||||
pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
{
|
||||
struct pmap_pkru_range *ppr, *prev_ppr;
|
||||
vm_offset_t va;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
if (pmap->pm_type != PT_X86 ||
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
|
||||
sva >= VM_MAXUSER_ADDRESS)
|
||||
return (true);
|
||||
MPASS(eva <= VM_MAXUSER_ADDRESS);
|
||||
for (va = sva, prev_ppr = NULL; va < eva;) {
|
||||
ppr = rangeset_lookup(&pmap->pm_pkru, va);
|
||||
if ((ppr == NULL) ^ (prev_ppr == NULL))
|
||||
return (false);
|
||||
if (ppr == NULL) {
|
||||
va += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
if (prev_ppr->pkru_keyidx != ppr->pkru_keyidx)
|
||||
return (false);
|
||||
va = ppr->pkru_rs_el.re_end;
|
||||
}
|
||||
return (true);
|
||||
}
|
||||
|
||||
static pt_entry_t
|
||||
pmap_pkru_get(pmap_t pmap, vm_offset_t va)
|
||||
{
|
||||
struct pmap_pkru_range *ppr;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
if (pmap->pm_type != PT_X86 ||
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
|
||||
va >= VM_MAXUSER_ADDRESS)
|
||||
return (0);
|
||||
ppr = rangeset_lookup(&pmap->pm_pkru, va);
|
||||
if (ppr != NULL)
|
||||
return (X86_PG_PKU(ppr->pkru_keyidx));
|
||||
return (0);
|
||||
}
|
||||
|
||||
static bool
|
||||
pred_pkru_on_remove(void *ctx __unused, void *r)
|
||||
{
|
||||
struct pmap_pkru_range *ppr;
|
||||
|
||||
ppr = r;
|
||||
return ((ppr->pkru_flags & AMD64_PKRU_PERSIST) == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
{
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
if (pmap->pm_type == PT_X86 &&
|
||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
|
||||
rangeset_remove_pred(&pmap->pm_pkru, sva, eva,
|
||||
pred_pkru_on_remove);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap)
|
||||
{
|
||||
|
||||
PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
|
||||
PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
|
||||
MPASS(dst_pmap->pm_type == PT_X86);
|
||||
MPASS(src_pmap->pm_type == PT_X86);
|
||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
|
||||
if (src_pmap->pm_pkru.rs_data_ctx == NULL)
|
||||
return (0);
|
||||
return (rangeset_copy(&dst_pmap->pm_pkru, &src_pmap->pm_pkru));
|
||||
}
|
||||
|
||||
static void
|
||||
pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
|
||||
u_int keyidx)
|
||||
{
|
||||
pml4_entry_t *pml4e;
|
||||
pdp_entry_t *pdpe;
|
||||
pd_entry_t newpde, ptpaddr, *pde;
|
||||
pt_entry_t newpte, *ptep, pte;
|
||||
vm_offset_t va, va_next;
|
||||
bool changed;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
MPASS(pmap->pm_type == PT_X86);
|
||||
MPASS(keyidx <= PMAP_MAX_PKRU_IDX);
|
||||
|
||||
for (changed = false, va = sva; va < eva; va = va_next) {
|
||||
pml4e = pmap_pml4e(pmap, va);
|
||||
if ((*pml4e & X86_PG_V) == 0) {
|
||||
va_next = (va + NBPML4) & ~PML4MASK;
|
||||
if (va_next < va)
|
||||
va_next = eva;
|
||||
continue;
|
||||
}
|
||||
|
||||
pdpe = pmap_pml4e_to_pdpe(pml4e, va);
|
||||
if ((*pdpe & X86_PG_V) == 0) {
|
||||
va_next = (va + NBPDP) & ~PDPMASK;
|
||||
if (va_next < va)
|
||||
va_next = eva;
|
||||
continue;
|
||||
}
|
||||
|
||||
va_next = (va + NBPDR) & ~PDRMASK;
|
||||
if (va_next < va)
|
||||
va_next = eva;
|
||||
|
||||
pde = pmap_pdpe_to_pde(pdpe, va);
|
||||
ptpaddr = *pde;
|
||||
if (ptpaddr == 0)
|
||||
continue;
|
||||
|
||||
MPASS((ptpaddr & X86_PG_V) != 0);
|
||||
if ((ptpaddr & PG_PS) != 0) {
|
||||
if (va + NBPDR == va_next && eva >= va_next) {
|
||||
newpde = (ptpaddr & ~X86_PG_PKU_MASK) |
|
||||
X86_PG_PKU(keyidx);
|
||||
if (newpde != ptpaddr) {
|
||||
*pde = newpde;
|
||||
changed = true;
|
||||
}
|
||||
continue;
|
||||
} else if (!pmap_demote_pde(pmap, pde, va)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (va_next > eva)
|
||||
va_next = eva;
|
||||
|
||||
for (ptep = pmap_pde_to_pte(pde, va); va != va_next;
|
||||
ptep++, va += PAGE_SIZE) {
|
||||
pte = *ptep;
|
||||
if ((pte & X86_PG_V) == 0)
|
||||
continue;
|
||||
newpte = (pte & ~X86_PG_PKU_MASK) | X86_PG_PKU(keyidx);
|
||||
if (newpte != pte) {
|
||||
*ptep = newpte;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed)
|
||||
pmap_invalidate_range(pmap, sva, eva);
|
||||
}
|
||||
|
||||
static int
|
||||
pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
|
||||
u_int keyidx, int flags)
|
||||
{
|
||||
|
||||
if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX ||
|
||||
(flags & ~(AMD64_PKRU_PERSIST | AMD64_PKRU_EXCL)) != 0)
|
||||
return (EINVAL);
|
||||
if (eva <= sva || eva > VM_MAXUSER_ADDRESS)
|
||||
return (EFAULT);
|
||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
|
||||
return (ENOTSUP);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
|
||||
int flags)
|
||||
{
|
||||
int error;
|
||||
|
||||
sva = trunc_page(sva);
|
||||
eva = round_page(eva);
|
||||
error = pmap_pkru_check_uargs(pmap, sva, eva, keyidx, flags);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
for (;;) {
|
||||
PMAP_LOCK(pmap);
|
||||
error = pmap_pkru_assign(pmap, sva, eva, keyidx, flags);
|
||||
if (error == 0)
|
||||
pmap_pkru_update_range(pmap, sva, eva, keyidx);
|
||||
PMAP_UNLOCK(pmap);
|
||||
if (error != ENOMEM)
|
||||
break;
|
||||
vm_wait(NULL);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
{
|
||||
int error;
|
||||
|
||||
sva = trunc_page(sva);
|
||||
eva = round_page(eva);
|
||||
error = pmap_pkru_check_uargs(pmap, sva, eva, 0, 0);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
for (;;) {
|
||||
PMAP_LOCK(pmap);
|
||||
error = pmap_pkru_deassign(pmap, sva, eva);
|
||||
if (error == 0)
|
||||
pmap_pkru_update_range(pmap, sva, eva, 0);
|
||||
PMAP_UNLOCK(pmap);
|
||||
if (error != ENOMEM)
|
||||
break;
|
||||
vm_wait(NULL);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
#include "opt_ddb.h"
|
||||
#ifdef DDB
|
||||
#include <sys/kdb.h>
|
||||
|
@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/smp.h>
|
||||
@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_kern.h> /* for kernel_map */
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_extern.h>
|
||||
|
||||
#include <machine/frame.h>
|
||||
@ -170,13 +172,16 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
|
||||
int
|
||||
sysarch(struct thread *td, struct sysarch_args *uap)
|
||||
{
|
||||
int error = 0;
|
||||
struct pcb *pcb = curthread->td_pcb;
|
||||
struct pcb *pcb;
|
||||
struct vm_map *map;
|
||||
uint32_t i386base;
|
||||
uint64_t a64base;
|
||||
struct i386_ioperm_args iargs;
|
||||
struct i386_get_xfpustate i386xfpu;
|
||||
struct i386_set_pkru i386pkru;
|
||||
struct amd64_get_xfpustate a64xfpu;
|
||||
struct amd64_set_pkru a64pkru;
|
||||
int error;
|
||||
|
||||
#ifdef CAPABILITY_MODE
|
||||
/*
|
||||
@ -194,11 +199,15 @@ sysarch(struct thread *td, struct sysarch_args *uap)
|
||||
case I386_GET_GSBASE:
|
||||
case I386_SET_GSBASE:
|
||||
case I386_GET_XFPUSTATE:
|
||||
case I386_SET_PKRU:
|
||||
case I386_CLEAR_PKRU:
|
||||
case AMD64_GET_FSBASE:
|
||||
case AMD64_SET_FSBASE:
|
||||
case AMD64_GET_GSBASE:
|
||||
case AMD64_SET_GSBASE:
|
||||
case AMD64_GET_XFPUSTATE:
|
||||
case AMD64_SET_PKRU:
|
||||
case AMD64_CLEAR_PKRU:
|
||||
break;
|
||||
|
||||
case I386_SET_IOPERM:
|
||||
@ -214,6 +223,10 @@ sysarch(struct thread *td, struct sysarch_args *uap)
|
||||
|
||||
if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
|
||||
return (sysarch_ldt(td, uap, UIO_USERSPACE));
|
||||
|
||||
error = 0;
|
||||
pcb = td->td_pcb;
|
||||
|
||||
/*
|
||||
* XXXKIB check that the BSM generation code knows to encode
|
||||
* the op argument.
|
||||
@ -233,11 +246,27 @@ sysarch(struct thread *td, struct sysarch_args *uap)
|
||||
a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
|
||||
a64xfpu.len = i386xfpu.len;
|
||||
break;
|
||||
case I386_SET_PKRU:
|
||||
case I386_CLEAR_PKRU:
|
||||
if ((error = copyin(uap->parms, &i386pkru,
|
||||
sizeof(struct i386_set_pkru))) != 0)
|
||||
return (error);
|
||||
a64pkru.addr = (void *)(uintptr_t)i386pkru.addr;
|
||||
a64pkru.len = i386pkru.len;
|
||||
a64pkru.keyidx = i386pkru.keyidx;
|
||||
a64pkru.flags = i386pkru.flags;
|
||||
break;
|
||||
case AMD64_GET_XFPUSTATE:
|
||||
if ((error = copyin(uap->parms, &a64xfpu,
|
||||
sizeof(struct amd64_get_xfpustate))) != 0)
|
||||
return (error);
|
||||
break;
|
||||
case AMD64_SET_PKRU:
|
||||
case AMD64_CLEAR_PKRU:
|
||||
if ((error = copyin(uap->parms, &a64pkru,
|
||||
sizeof(struct amd64_set_pkru))) != 0)
|
||||
return (error);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -326,6 +355,34 @@ sysarch(struct thread *td, struct sysarch_args *uap)
|
||||
a64xfpu.addr, a64xfpu.len);
|
||||
break;
|
||||
|
||||
case I386_SET_PKRU:
|
||||
case AMD64_SET_PKRU:
|
||||
/*
|
||||
* Read-lock the map to synchronize with parallel
|
||||
* pmap_vmspace_copy() on fork.
|
||||
*/
|
||||
map = &td->td_proc->p_vmspace->vm_map;
|
||||
vm_map_lock_read(map);
|
||||
error = pmap_pkru_set(PCPU_GET(curpmap),
|
||||
(vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr +
|
||||
a64pkru.len, a64pkru.keyidx, a64pkru.flags);
|
||||
vm_map_unlock_read(map);
|
||||
break;
|
||||
|
||||
case I386_CLEAR_PKRU:
|
||||
case AMD64_CLEAR_PKRU:
|
||||
if (a64pkru.flags != 0 || a64pkru.keyidx != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
map = &td->td_proc->p_vmspace->vm_map;
|
||||
vm_map_lock_read(map);
|
||||
error = pmap_pkru_clear(PCPU_GET(curpmap),
|
||||
(vm_offset_t)a64pkru.addr,
|
||||
(vm_offset_t)a64pkru.addr + a64pkru.len);
|
||||
vm_map_unlock(map);
|
||||
break;
|
||||
|
||||
default:
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
@ -807,6 +807,20 @@ trap_pfault(struct trapframe *frame, int usermode)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* User-mode protection key violation (PKU). May happen
|
||||
* either from usermode or from kernel if copyin accessed
|
||||
* key-protected mapping.
|
||||
*/
|
||||
if ((frame->tf_err & PGEX_PK) != 0) {
|
||||
if (eva > VM_MAXUSER_ADDRESS) {
|
||||
trap_fatal(frame, eva);
|
||||
return (-1);
|
||||
}
|
||||
rv = KERN_PROTECTION_FAILURE;
|
||||
goto after_vmfault;
|
||||
}
|
||||
|
||||
/*
|
||||
* If nx protection of the usermode portion of kernel page
|
||||
* tables caused trap, panic.
|
||||
@ -842,6 +856,7 @@ trap_pfault(struct trapframe *frame, int usermode)
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
after_vmfault:
|
||||
if (!usermode) {
|
||||
if (td->td_intr_nesting_level == 0 &&
|
||||
curpcb->pcb_onfault != NULL) {
|
||||
|
@ -66,6 +66,7 @@
|
||||
#define X86_PG_AVAIL2 0x400 /* < programmers use */
|
||||
#define X86_PG_AVAIL3 0x800 /* \ */
|
||||
#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
|
||||
#define X86_PG_PKU(idx) ((pt_entry_t)idx << 59)
|
||||
#define X86_PG_NX (1ul<<63) /* No-execute */
|
||||
#define X86_PG_AVAIL(x) (1ul << (x))
|
||||
|
||||
@ -73,6 +74,10 @@
|
||||
#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
|
||||
#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
|
||||
|
||||
/* Protection keys indexes */
|
||||
#define PMAP_MAX_PKRU_IDX 0xf
|
||||
#define X86_PG_PKU_MASK X86_PG_PKU(PMAP_MAX_PKRU_IDX)
|
||||
|
||||
/*
|
||||
* Intel extended page table (EPT) bit definitions.
|
||||
*/
|
||||
@ -120,7 +125,7 @@
|
||||
* (PTE) page mappings have identical settings for the following fields:
|
||||
*/
|
||||
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
|
||||
PG_M | PG_A | PG_U | PG_RW | PG_V)
|
||||
PG_M | PG_A | PG_U | PG_RW | PG_V | PG_PKU_MASK)
|
||||
|
||||
/*
|
||||
* Page Protection Exception bits
|
||||
@ -242,6 +247,8 @@
|
||||
#include <sys/_cpuset.h>
|
||||
#include <sys/_lock.h>
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/_pctrie.h>
|
||||
#include <sys/_rangeset.h>
|
||||
|
||||
#include <vm/_vm_radix.h>
|
||||
|
||||
@ -336,6 +343,7 @@ struct pmap {
|
||||
long pm_eptgen; /* EPT pmap generation id */
|
||||
int pm_flags;
|
||||
struct pmap_pcids pm_pcids[MAXCPU];
|
||||
struct rangeset pm_pkru;
|
||||
};
|
||||
|
||||
/* flags */
|
||||
@ -454,6 +462,10 @@ void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
|
||||
void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
|
||||
void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
|
||||
vm_offset_t eva);
|
||||
int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
|
||||
int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
|
||||
u_int keyidx, int flags);
|
||||
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/* Return various clipped indexes for a given VA */
|
||||
|
@ -71,5 +71,12 @@ void pmap_kremove_device(vm_offset_t, vm_size_t);
|
||||
vm_paddr_t pmap_kextract(vm_offset_t);
|
||||
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* !_MACHINE_PMAP_H_ */
|
||||
|
@ -171,6 +171,13 @@ struct pcb *pmap_switch(struct thread *, struct thread *);
|
||||
|
||||
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
@ -244,6 +244,13 @@ extern vm_offset_t virtual_end;
|
||||
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
|
||||
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct sf_buf;
|
||||
|
||||
/*
|
||||
|
@ -185,6 +185,13 @@ int pmap_emulate_modified(pmap_t pmap, vm_offset_t va);
|
||||
void pmap_page_set_memattr(vm_page_t, vm_memattr_t);
|
||||
int pmap_change_attr(vm_offset_t, vm_size_t, vm_memattr_t);
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
@ -288,6 +288,13 @@ vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size);
|
||||
void pmap_early_io_unmap(vm_offset_t va, vm_size_t size);
|
||||
void pmap_track_page(pmap_t pmap, vm_offset_t va);
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !_MACHINE_PMAP_H_ */
|
||||
|
@ -166,6 +166,13 @@ bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **,
|
||||
|
||||
int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t);
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
@ -128,4 +128,11 @@ SYSCTL_DECL(_debug_pmap_stats);
|
||||
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* !_MACHINE_PMAP_H_ */
|
||||
|
@ -481,8 +481,20 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
|
||||
fault_flags, true);
|
||||
}
|
||||
VM_OBJECT_WUNLOCK(fs->first_object);
|
||||
pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ?
|
||||
PMAP_ENTER_WIRED : 0), psind);
|
||||
rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
|
||||
(wired ? PMAP_ENTER_WIRED : 0), psind);
|
||||
#if defined(__amd64__)
|
||||
if (psind > 0 && rv == KERN_FAILURE) {
|
||||
for (i = 0; i < npages; i++) {
|
||||
rv = pmap_enter(fs->map->pmap, vaddr + ptoa(i),
|
||||
&m[i], prot, fault_type |
|
||||
(wired ? PMAP_ENTER_WIRED : 0), 0);
|
||||
MPASS(rv == KERN_SUCCESS);
|
||||
}
|
||||
}
|
||||
#else
|
||||
MPASS(rv == KERN_SUCCESS);
|
||||
#endif
|
||||
VM_OBJECT_WLOCK(fs->first_object);
|
||||
m_mtx = NULL;
|
||||
for (i = 0; i < npages; i++) {
|
||||
|
@ -3544,7 +3544,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
vm_map_t new_map, old_map;
|
||||
vm_map_entry_t new_entry, old_entry;
|
||||
vm_object_t object;
|
||||
int locked;
|
||||
int error, locked;
|
||||
vm_inherit_t inh;
|
||||
|
||||
old_map = &vm1->vm_map;
|
||||
@ -3553,6 +3553,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
pmap_pinit);
|
||||
if (vm2 == NULL)
|
||||
return (NULL);
|
||||
|
||||
vm2->vm_taddr = vm1->vm_taddr;
|
||||
vm2->vm_daddr = vm1->vm_daddr;
|
||||
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
|
||||
@ -3563,7 +3564,17 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
|
||||
KASSERT(locked, ("vmspace_fork: lock failed"));
|
||||
|
||||
error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
|
||||
if (error != 0) {
|
||||
sx_xunlock(&old_map->lock);
|
||||
sx_xunlock(&new_map->lock);
|
||||
vm_map_process_deferred();
|
||||
vmspace_free(vm2);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
new_map->anon_loc = old_map->anon_loc;
|
||||
|
||||
old_entry = old_map->header.next;
|
||||
|
||||
while (old_entry != &old_map->header) {
|
||||
|
@ -52,6 +52,8 @@
|
||||
#define I386_GET_GSBASE 9
|
||||
#define I386_SET_GSBASE 10
|
||||
#define I386_GET_XFPUSTATE 11
|
||||
#define I386_SET_PKRU 12
|
||||
#define I386_CLEAR_PKRU 13
|
||||
|
||||
/* Leave space for 0-127 for to avoid translating syscalls */
|
||||
#define AMD64_GET_FSBASE 128
|
||||
@ -59,6 +61,12 @@
|
||||
#define AMD64_GET_GSBASE 130
|
||||
#define AMD64_SET_GSBASE 131
|
||||
#define AMD64_GET_XFPUSTATE 132
|
||||
#define AMD64_SET_PKRU 133
|
||||
#define AMD64_CLEAR_PKRU 134
|
||||
|
||||
/* Flags for AMD64_SET_PKRU */
|
||||
#define AMD64_PKRU_EXCL 0x0001
|
||||
#define AMD64_PKRU_PERSIST 0x0002
|
||||
|
||||
struct i386_ioperm_args {
|
||||
unsigned int start;
|
||||
@ -94,12 +102,26 @@ struct i386_get_xfpustate {
|
||||
int len;
|
||||
};
|
||||
|
||||
struct i386_set_pkru {
|
||||
unsigned int addr;
|
||||
unsigned int len;
|
||||
unsigned int keyidx;
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct amd64_get_xfpustate {
|
||||
void *addr;
|
||||
int len;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct amd64_set_pkru {
|
||||
void *addr;
|
||||
unsigned long len;
|
||||
unsigned int keyidx;
|
||||
int flags;
|
||||
};
|
||||
|
||||
#ifndef _KERNEL
|
||||
union descriptor;
|
||||
struct dbreg;
|
||||
|
Loading…
Reference in New Issue
Block a user