Add kernel support for Intel userspace protection keys feature on

Skylake Xeons.

See SDM rev. 68 Vol 3 4.6.2 Protection Keys and the description of the
RDPKRU and WRPKRU instructions.

Reviewed by:	markj
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D18893
This commit is contained in:
kib 2019-02-20 09:51:13 +00:00
parent a32144e6e2
commit 4adce57d6f
15 changed files with 563 additions and 10 deletions

View File

@ -233,6 +233,9 @@ initializecpu(void)
if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
cr4 |= CR4_FSGSBASE;
if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
cr4 |= CR4_PKE;
/*
* Postpone enabling the SMEP on the boot CPU until the page
* tables are switched from the boot loader identity mapping

View File

@ -48,7 +48,7 @@
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
* Copyright (c) 2014-2018 The FreeBSD Foundation
* Copyright (c) 2014-2019 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rangeset.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
@ -155,6 +156,7 @@ __FBSDID("$FreeBSD$");
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/sysarch.h>
#include <machine/tss.h>
static __inline boolean_t
@ -285,6 +287,13 @@ pmap_modified_bit(pmap_t pmap)
return (mask);
}
static __inline pt_entry_t
pmap_pku_mask_bit(pmap_t pmap)
{
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
}
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@ -424,6 +433,22 @@ static pml4_entry_t *pti_pml4;
static vm_pindex_t pti_pg_idx;
static bool pti_finalized;
struct pmap_pkru_range {
struct rs_el pkru_rs_el;
u_int pkru_keyidx;
int pkru_flags;
};
static uma_zone_t pmap_pkru_ranges_zone;
static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va);
static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
static void *pkru_dup_range(void *ctx, void *data);
static void pkru_free_range(void *ctx, void *node);
static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap);
static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
static void pmap_pkru_deassign_all(pmap_t pmap);
static int
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
{
@ -2846,6 +2871,12 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pcids[i].pm_gen = 1;
}
pmap_activate_boot(pmap);
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
}
}
void
@ -2934,6 +2965,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
pmap_pinit_pml4_pti(pml4pgu);
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu);
}
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
rangeset_init(&pmap->pm_pkru, pkru_dup_range,
pkru_free_range, pmap, M_NOWAIT);
}
}
pmap->pm_root.rt_root = 0;
@ -3230,6 +3265,9 @@ pmap_release(pmap_t pmap)
vm_page_unwire_noq(m);
vm_page_free(m);
}
if (pmap->pm_type == PT_X86 &&
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
rangeset_fini(&pmap->pm_pkru);
}
static int
@ -4060,7 +4098,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V;
pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
vm_paddr_t mptepa;
vm_page_t mpte;
struct spglist free;
@ -4073,6 +4111,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
PG_RW = pmap_rw_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpde = *pde;
@ -4505,6 +4544,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
out:
if (anyvalid)
pmap_invalidate_all(pmap);
pmap_pkru_on_remove(pmap, sva, eva);
PMAP_UNLOCK(pmap);
pmap_delayed_invl_finished();
vm_page_free_pages_toq(&free, true);
@ -4816,7 +4856,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
vm_page_t mpte;
int PG_PTE_CACHE;
@ -4825,6 +4865,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@ -5052,6 +5093,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
origpte = *pte;
pv = NULL;
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86)
newpte |= pmap_pkru_get(pmap, va);
/*
* Is the specified virtual address already mapped?
@ -5271,6 +5314,25 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
" in pmap %p", va, pmap);
return (KERN_RESOURCE_SHORTAGE);
}
/*
* If pkru is not same for the whole pde range, return failure
* and let vm_fault() cope. Check after pde allocation, since
* it could sleep.
*/
if (!pmap_pkru_same(pmap, va, va + NBPDR)) {
SLIST_INIT(&free);
if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
pmap_invalidate_page(pmap, va);
vm_page_free_pages_toq(&free, true);
}
return (KERN_FAILURE);
}
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) {
newpde &= ~X86_PG_PKU_MASK;
newpde |= pmap_pkru_get(pmap, va);
}
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
pde = &pde[pmap_pde_index(va)];
oldpde = *pde;
@ -5530,7 +5592,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
if (va < VM_MAXUSER_ADDRESS)
newpte |= PG_U;
newpte |= PG_U | pmap_pkru_get(pmap, va);
pte_store(pte, newpte);
return (mpte);
}
@ -5906,6 +5968,36 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
PMAP_UNLOCK(dst_pmap);
}
int
pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
{
int error;
if (dst_pmap->pm_type != src_pmap->pm_type ||
dst_pmap->pm_type != PT_X86 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
return (0);
for (;;) {
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
} else {
PMAP_LOCK(src_pmap);
PMAP_LOCK(dst_pmap);
}
error = pmap_pkru_copy(dst_pmap, src_pmap);
/* Clean up partial copy on failure due to no memory. */
if (error == ENOMEM)
pmap_pkru_deassign_all(dst_pmap);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
if (error != ENOMEM)
break;
vm_wait(NULL);
}
return (error);
}
/*
* Zero the specified hardware page.
*/
@ -6305,6 +6397,7 @@ pmap_remove_pages(pmap_t pmap)
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}
@ -8941,6 +9034,285 @@ pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva)
VM_OBJECT_WUNLOCK(pti_obj);
}
static void *
pkru_dup_range(void *ctx __unused, void *data)
{
struct pmap_pkru_range *node, *new_node;
new_node = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
if (new_node == NULL)
return (NULL);
node = data;
memcpy(new_node, node, sizeof(*node));
return (new_node);
}
static void
pkru_free_range(void *ctx __unused, void *node)
{
uma_zfree(pmap_pkru_ranges_zone, node);
}
static int
pmap_pkru_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
int flags)
{
struct pmap_pkru_range *ppr;
int error;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
MPASS(pmap->pm_type == PT_X86);
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
if ((flags & AMD64_PKRU_EXCL) != 0 &&
!rangeset_check_empty(&pmap->pm_pkru, sva, eva))
return (EBUSY);
ppr = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
if (ppr == NULL)
return (ENOMEM);
ppr->pkru_keyidx = keyidx;
ppr->pkru_flags = flags & AMD64_PKRU_PERSIST;
error = rangeset_insert(&pmap->pm_pkru, sva, eva, ppr);
if (error != 0)
uma_zfree(pmap_pkru_ranges_zone, ppr);
return (error);
}
static int
pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
MPASS(pmap->pm_type == PT_X86);
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
return (rangeset_remove(&pmap->pm_pkru, sva, eva));
}
static void
pmap_pkru_deassign_all(pmap_t pmap)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_type == PT_X86 &&
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
rangeset_remove_all(&pmap->pm_pkru);
}
static bool
pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct pmap_pkru_range *ppr, *prev_ppr;
vm_offset_t va;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_type != PT_X86 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
sva >= VM_MAXUSER_ADDRESS)
return (true);
MPASS(eva <= VM_MAXUSER_ADDRESS);
for (va = sva, prev_ppr = NULL; va < eva;) {
ppr = rangeset_lookup(&pmap->pm_pkru, va);
if ((ppr == NULL) ^ (prev_ppr == NULL))
return (false);
if (ppr == NULL) {
va += PAGE_SIZE;
continue;
}
if (prev_ppr->pkru_keyidx != ppr->pkru_keyidx)
return (false);
va = ppr->pkru_rs_el.re_end;
}
return (true);
}
static pt_entry_t
pmap_pkru_get(pmap_t pmap, vm_offset_t va)
{
struct pmap_pkru_range *ppr;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_type != PT_X86 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
va >= VM_MAXUSER_ADDRESS)
return (0);
ppr = rangeset_lookup(&pmap->pm_pkru, va);
if (ppr != NULL)
return (X86_PG_PKU(ppr->pkru_keyidx));
return (0);
}
static bool
pred_pkru_on_remove(void *ctx __unused, void *r)
{
struct pmap_pkru_range *ppr;
ppr = r;
return ((ppr->pkru_flags & AMD64_PKRU_PERSIST) == 0);
}
static void
pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_type == PT_X86 &&
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
rangeset_remove_pred(&pmap->pm_pkru, sva, eva,
pred_pkru_on_remove);
}
}
static int
pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap)
{
PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
MPASS(dst_pmap->pm_type == PT_X86);
MPASS(src_pmap->pm_type == PT_X86);
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
if (src_pmap->pm_pkru.rs_data_ctx == NULL)
return (0);
return (rangeset_copy(&dst_pmap->pm_pkru, &src_pmap->pm_pkru));
}
static void
pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx)
{
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t newpde, ptpaddr, *pde;
pt_entry_t newpte, *ptep, pte;
vm_offset_t va, va_next;
bool changed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
MPASS(pmap->pm_type == PT_X86);
MPASS(keyidx <= PMAP_MAX_PKRU_IDX);
for (changed = false, va = sva; va < eva; va = va_next) {
pml4e = pmap_pml4e(pmap, va);
if ((*pml4e & X86_PG_V) == 0) {
va_next = (va + NBPML4) & ~PML4MASK;
if (va_next < va)
va_next = eva;
continue;
}
pdpe = pmap_pml4e_to_pdpe(pml4e, va);
if ((*pdpe & X86_PG_V) == 0) {
va_next = (va + NBPDP) & ~PDPMASK;
if (va_next < va)
va_next = eva;
continue;
}
va_next = (va + NBPDR) & ~PDRMASK;
if (va_next < va)
va_next = eva;
pde = pmap_pdpe_to_pde(pdpe, va);
ptpaddr = *pde;
if (ptpaddr == 0)
continue;
MPASS((ptpaddr & X86_PG_V) != 0);
if ((ptpaddr & PG_PS) != 0) {
if (va + NBPDR == va_next && eva >= va_next) {
newpde = (ptpaddr & ~X86_PG_PKU_MASK) |
X86_PG_PKU(keyidx);
if (newpde != ptpaddr) {
*pde = newpde;
changed = true;
}
continue;
} else if (!pmap_demote_pde(pmap, pde, va)) {
continue;
}
}
if (va_next > eva)
va_next = eva;
for (ptep = pmap_pde_to_pte(pde, va); va != va_next;
ptep++, va += PAGE_SIZE) {
pte = *ptep;
if ((pte & X86_PG_V) == 0)
continue;
newpte = (pte & ~X86_PG_PKU_MASK) | X86_PG_PKU(keyidx);
if (newpte != pte) {
*ptep = newpte;
changed = true;
}
}
}
if (changed)
pmap_invalidate_range(pmap, sva, eva);
}
static int
pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx, int flags)
{
if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX ||
(flags & ~(AMD64_PKRU_PERSIST | AMD64_PKRU_EXCL)) != 0)
return (EINVAL);
if (eva <= sva || eva > VM_MAXUSER_ADDRESS)
return (EFAULT);
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
return (ENOTSUP);
return (0);
}
int
pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
int flags)
{
int error;
sva = trunc_page(sva);
eva = round_page(eva);
error = pmap_pkru_check_uargs(pmap, sva, eva, keyidx, flags);
if (error != 0)
return (error);
for (;;) {
PMAP_LOCK(pmap);
error = pmap_pkru_assign(pmap, sva, eva, keyidx, flags);
if (error == 0)
pmap_pkru_update_range(pmap, sva, eva, keyidx);
PMAP_UNLOCK(pmap);
if (error != ENOMEM)
break;
vm_wait(NULL);
}
return (error);
}
int
pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
int error;
sva = trunc_page(sva);
eva = round_page(eva);
error = pmap_pkru_check_uargs(pmap, sva, eva, 0, 0);
if (error != 0)
return (error);
for (;;) {
PMAP_LOCK(pmap);
error = pmap_pkru_deassign(pmap, sva, eva);
if (error == 0)
pmap_pkru_update_range(pmap, sva, eva, 0);
PMAP_UNLOCK(pmap);
if (error != ENOMEM)
break;
vm_wait(NULL);
}
return (error);
}
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kdb.h>

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/smp.h>
@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h> /* for kernel_map */
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <machine/frame.h>
@ -170,13 +172,16 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
int
sysarch(struct thread *td, struct sysarch_args *uap)
{
int error = 0;
struct pcb *pcb = curthread->td_pcb;
struct pcb *pcb;
struct vm_map *map;
uint32_t i386base;
uint64_t a64base;
struct i386_ioperm_args iargs;
struct i386_get_xfpustate i386xfpu;
struct i386_set_pkru i386pkru;
struct amd64_get_xfpustate a64xfpu;
struct amd64_set_pkru a64pkru;
int error;
#ifdef CAPABILITY_MODE
/*
@ -194,11 +199,15 @@ sysarch(struct thread *td, struct sysarch_args *uap)
case I386_GET_GSBASE:
case I386_SET_GSBASE:
case I386_GET_XFPUSTATE:
case I386_SET_PKRU:
case I386_CLEAR_PKRU:
case AMD64_GET_FSBASE:
case AMD64_SET_FSBASE:
case AMD64_GET_GSBASE:
case AMD64_SET_GSBASE:
case AMD64_GET_XFPUSTATE:
case AMD64_SET_PKRU:
case AMD64_CLEAR_PKRU:
break;
case I386_SET_IOPERM:
@ -214,6 +223,10 @@ sysarch(struct thread *td, struct sysarch_args *uap)
if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
return (sysarch_ldt(td, uap, UIO_USERSPACE));
error = 0;
pcb = td->td_pcb;
/*
* XXXKIB check that the BSM generation code knows to encode
* the op argument.
@ -233,11 +246,27 @@ sysarch(struct thread *td, struct sysarch_args *uap)
a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
a64xfpu.len = i386xfpu.len;
break;
case I386_SET_PKRU:
case I386_CLEAR_PKRU:
if ((error = copyin(uap->parms, &i386pkru,
sizeof(struct i386_set_pkru))) != 0)
return (error);
a64pkru.addr = (void *)(uintptr_t)i386pkru.addr;
a64pkru.len = i386pkru.len;
a64pkru.keyidx = i386pkru.keyidx;
a64pkru.flags = i386pkru.flags;
break;
case AMD64_GET_XFPUSTATE:
if ((error = copyin(uap->parms, &a64xfpu,
sizeof(struct amd64_get_xfpustate))) != 0)
return (error);
break;
case AMD64_SET_PKRU:
case AMD64_CLEAR_PKRU:
if ((error = copyin(uap->parms, &a64pkru,
sizeof(struct amd64_set_pkru))) != 0)
return (error);
break;
default:
break;
}
@ -326,6 +355,34 @@ sysarch(struct thread *td, struct sysarch_args *uap)
a64xfpu.addr, a64xfpu.len);
break;
case I386_SET_PKRU:
case AMD64_SET_PKRU:
/*
* Read-lock the map to synchronize with parallel
* pmap_vmspace_copy() on fork.
*/
map = &td->td_proc->p_vmspace->vm_map;
vm_map_lock_read(map);
error = pmap_pkru_set(PCPU_GET(curpmap),
(vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr +
a64pkru.len, a64pkru.keyidx, a64pkru.flags);
vm_map_unlock_read(map);
break;
case I386_CLEAR_PKRU:
case AMD64_CLEAR_PKRU:
if (a64pkru.flags != 0 || a64pkru.keyidx != 0) {
error = EINVAL;
break;
}
map = &td->td_proc->p_vmspace->vm_map;
vm_map_lock_read(map);
error = pmap_pkru_clear(PCPU_GET(curpmap),
(vm_offset_t)a64pkru.addr,
(vm_offset_t)a64pkru.addr + a64pkru.len);
vm_map_unlock(map);
break;
default:
error = EINVAL;
break;

View File

@ -807,6 +807,20 @@ trap_pfault(struct trapframe *frame, int usermode)
return (-1);
}
/*
* User-mode protection key violation (PKU). May happen
* either from usermode or from kernel if copyin accessed
* key-protected mapping.
*/
if ((frame->tf_err & PGEX_PK) != 0) {
if (eva > VM_MAXUSER_ADDRESS) {
trap_fatal(frame, eva);
return (-1);
}
rv = KERN_PROTECTION_FAILURE;
goto after_vmfault;
}
/*
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
@ -842,6 +856,7 @@ trap_pfault(struct trapframe *frame, int usermode)
#endif
return (0);
}
after_vmfault:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {

View File

@ -66,6 +66,7 @@
#define X86_PG_AVAIL2 0x400 /* < programmers use */
#define X86_PG_AVAIL3 0x800 /* \ */
#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
#define X86_PG_PKU(idx) ((pt_entry_t)idx << 59)
#define X86_PG_NX (1ul<<63) /* No-execute */
#define X86_PG_AVAIL(x) (1ul << (x))
@ -73,6 +74,10 @@
#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
/* Protection keys indexes */
#define PMAP_MAX_PKRU_IDX 0xf
#define X86_PG_PKU_MASK X86_PG_PKU(PMAP_MAX_PKRU_IDX)
/*
* Intel extended page table (EPT) bit definitions.
*/
@ -120,7 +125,7 @@
* (PTE) page mappings have identical settings for the following fields:
*/
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
PG_M | PG_A | PG_U | PG_RW | PG_V)
PG_M | PG_A | PG_U | PG_RW | PG_V | PG_PKU_MASK)
/*
* Page Protection Exception bits
@ -242,6 +247,8 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_pctrie.h>
#include <sys/_rangeset.h>
#include <vm/_vm_radix.h>
@ -336,6 +343,7 @@ struct pmap {
long pm_eptgen; /* EPT pmap generation id */
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
struct rangeset pm_pkru;
};
/* flags */
@ -454,6 +462,10 @@ void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
vm_offset_t eva);
int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx, int flags);
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */

View File

@ -71,5 +71,12 @@ void pmap_kremove_device(vm_offset_t, vm_size_t);
vm_paddr_t pmap_kextract(vm_offset_t);
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif /* _KERNEL */
#endif /* !_MACHINE_PMAP_H_ */

View File

@ -171,6 +171,13 @@ struct pcb *pmap_switch(struct thread *, struct thread *);
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif /* _KERNEL */
#endif /* !LOCORE */

View File

@ -244,6 +244,13 @@ extern vm_offset_t virtual_end;
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
struct sf_buf;
/*

View File

@ -185,6 +185,13 @@ int pmap_emulate_modified(pmap_t pmap, vm_offset_t va);
void pmap_page_set_memattr(vm_page_t, vm_memattr_t);
int pmap_change_attr(vm_offset_t, vm_size_t, vm_memattr_t);
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif /* _KERNEL */
#endif /* !LOCORE */

View File

@ -288,6 +288,13 @@ vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size);
void pmap_early_io_unmap(vm_offset_t va, vm_size_t size);
void pmap_track_page(pmap_t pmap, vm_offset_t va);
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif
#endif /* !_MACHINE_PMAP_H_ */

View File

@ -166,6 +166,13 @@ bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **,
int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t);
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif /* _KERNEL */
#endif /* !LOCORE */

View File

@ -128,4 +128,11 @@ SYSCTL_DECL(_debug_pmap_stats);
#endif
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
return (0);
}
#endif /* !_MACHINE_PMAP_H_ */

View File

@ -481,8 +481,20 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
fault_flags, true);
}
VM_OBJECT_WUNLOCK(fs->first_object);
pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ?
PMAP_ENTER_WIRED : 0), psind);
rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
(wired ? PMAP_ENTER_WIRED : 0), psind);
#if defined(__amd64__)
if (psind > 0 && rv == KERN_FAILURE) {
for (i = 0; i < npages; i++) {
rv = pmap_enter(fs->map->pmap, vaddr + ptoa(i),
&m[i], prot, fault_type |
(wired ? PMAP_ENTER_WIRED : 0), 0);
MPASS(rv == KERN_SUCCESS);
}
}
#else
MPASS(rv == KERN_SUCCESS);
#endif
VM_OBJECT_WLOCK(fs->first_object);
m_mtx = NULL;
for (i = 0; i < npages; i++) {

View File

@ -3544,7 +3544,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
vm_map_t new_map, old_map;
vm_map_entry_t new_entry, old_entry;
vm_object_t object;
int locked;
int error, locked;
vm_inherit_t inh;
old_map = &vm1->vm_map;
@ -3553,6 +3553,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
pmap_pinit);
if (vm2 == NULL)
return (NULL);
vm2->vm_taddr = vm1->vm_taddr;
vm2->vm_daddr = vm1->vm_daddr;
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
@ -3563,7 +3564,17 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
KASSERT(locked, ("vmspace_fork: lock failed"));
error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
if (error != 0) {
sx_xunlock(&old_map->lock);
sx_xunlock(&new_map->lock);
vm_map_process_deferred();
vmspace_free(vm2);
return (NULL);
}
new_map->anon_loc = old_map->anon_loc;
old_entry = old_map->header.next;
while (old_entry != &old_map->header) {

View File

@ -52,6 +52,8 @@
#define I386_GET_GSBASE 9
#define I386_SET_GSBASE 10
#define I386_GET_XFPUSTATE 11
#define I386_SET_PKRU 12
#define I386_CLEAR_PKRU 13
/* Leave space for 0-127 for to avoid translating syscalls */
#define AMD64_GET_FSBASE 128
@ -59,6 +61,12 @@
#define AMD64_GET_GSBASE 130
#define AMD64_SET_GSBASE 131
#define AMD64_GET_XFPUSTATE 132
#define AMD64_SET_PKRU 133
#define AMD64_CLEAR_PKRU 134
/* Flags for AMD64_SET_PKRU */
#define AMD64_PKRU_EXCL 0x0001
#define AMD64_PKRU_PERSIST 0x0002
struct i386_ioperm_args {
unsigned int start;
@ -94,12 +102,26 @@ struct i386_get_xfpustate {
int len;
};
struct i386_set_pkru {
unsigned int addr;
unsigned int len;
unsigned int keyidx;
int flags;
};
struct amd64_get_xfpustate {
void *addr;
int len;
};
#endif
struct amd64_set_pkru {
void *addr;
unsigned long len;
unsigned int keyidx;
int flags;
};
#ifndef _KERNEL
union descriptor;
struct dbreg;