Improve MD page fault handlers.

Centralize calculation of signal and ucode delivered on unhandled page
fault in new function vm_fault_trap().  MD trap_pfault() now almost
always uses the signal numbers and error codes calculated in
consistent MI way.

This introduces the protection fault compatibility sysctls to all
non-x86 architectures which did not have that bug, but apparently they
were already much more wrong in selecting delivered signals on
protection violations.

Change the delivered signal for accesses to mapped area after the
backing object was truncated.  According to POSIX description for
mmap(2):
   The system shall always zero-fill any partial page at the end of an
   object. Further, the system shall never write out any modified
   portions of the last page of an object which are beyond its
   end. References within the address range starting at pa and
   continuing for len bytes to whole pages following the end of an
   object shall result in delivery of a SIGBUS signal.

   An implementation may generate SIGBUS signals when a reference
   would cause an error in the mapped object, such as out-of-space
   condition.
Adjust according to the description, keeping the existing
compatibility code for SIGSEGV/SIGBUS on protection failures.

For situations where kernel cannot handle page fault due to resource
limit enforcement, SIGBUS with a new error code BUS_OBJERR is
delivered.  Also, provide a new error code SEGV_PKUERR for SIGSEGV on
amd64 due to protection key access violation.

vm_fault_hold() is renamed to vm_fault().  Fixed some nits in
trap_pfault()s like mis-interpreting Mach errors as errnos.  Removed
unneeded truncations of the fault addresses reported by hardware.

PR:	211924
Reviewed by:	alc
Discussed with:	jilles, markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D21566
This commit is contained in:
kib 2019-09-27 18:43:36 +00:00
parent 26118133a6
commit 957270782d
16 changed files with 231 additions and 233 deletions

View File

@ -111,7 +111,7 @@ void __noinline trap(struct trapframe *frame);
void trap_check(struct trapframe *frame);
void dblfault_handler(struct trapframe *frame);
static int trap_pfault(struct trapframe *, int);
static int trap_pfault(struct trapframe *, bool, int *, int *);
static void trap_fatal(struct trapframe *, vm_offset_t);
#ifdef KDTRACE_HOOKS
static bool trap_user_dtrace(struct trapframe *,
@ -155,10 +155,6 @@ static const char *const trap_msg[] = {
[T_DTRACE_RET] = "DTrace pid return trap",
};
static int prot_fault_translation;
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
&prot_fault_translation, 0,
"Select signal to deliver on protection fault");
static int uprintf_signal;
SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN,
&uprintf_signal, 0,
@ -192,14 +188,11 @@ trap(struct trapframe *frame)
struct thread *td;
struct proc *p;
register_t addr, dr6;
int signo, ucode;
int pf, signo, ucode;
u_int type;
td = curthread;
p = td->td_proc;
signo = 0;
ucode = 0;
addr = 0;
dr6 = 0;
VM_CNT_INC(v_trap);
@ -345,47 +338,18 @@ trap(struct trapframe *frame)
case T_PAGEFLT: /* page fault */
/*
* Emulator can take care about this trap?
* Can emulator handle this trap?
*/
if (*p->p_sysent->sv_trap != NULL &&
(*p->p_sysent->sv_trap)(td) == 0)
return;
addr = frame->tf_addr;
signo = trap_pfault(frame, TRUE);
if (signo == -1)
pf = trap_pfault(frame, true, &signo, &ucode);
if (pf == -1)
return;
if (signo == 0)
if (pf == 0)
goto userret;
if (signo == SIGSEGV) {
ucode = SEGV_MAPERR;
} else if (prot_fault_translation == 0) {
/*
* Autodetect. This check also covers
* the images without the ABI-tag ELF
* note.
*/
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
p->p_osrel >= P_OSREL_SIGSEGV) {
signo = SIGSEGV;
ucode = SEGV_ACCERR;
} else {
signo = SIGBUS;
ucode = T_PAGEFLT;
}
} else if (prot_fault_translation == 1) {
/*
* Always compat mode.
*/
signo = SIGBUS;
ucode = T_PAGEFLT;
} else {
/*
* Always SIGSEGV mode.
*/
signo = SIGSEGV;
ucode = SEGV_ACCERR;
}
addr = frame->tf_addr;
break;
case T_DIVIDE: /* integer divide fault */
@ -440,7 +404,7 @@ trap(struct trapframe *frame)
("kernel trap doesn't have ucred"));
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(frame, FALSE);
(void)trap_pfault(frame, false, NULL, NULL);
return;
case T_DNA:
@ -712,16 +676,28 @@ trap_is_pti(struct trapframe *frame)
(PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK));
}
/*
* Handle all details of a page fault.
* Returns:
* -1 if this fault was fatal, typically from kernel mode
* (cannot happen, but we need to return something).
* 0 if this fault was handled by updating either the user or kernel
* page table, execution can continue.
* 1 if this fault was from usermode and it was not handled, a synchronous
* signal should be delivered to the thread. *signo returns the signal
* number, *ucode gives si_code.
*/
static int
trap_pfault(struct trapframe *frame, int usermode)
trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode)
{
struct thread *td;
struct proc *p;
vm_map_t map;
vm_offset_t va;
vm_offset_t eva;
int rv;
vm_prot_t ftype;
vm_offset_t eva;
MPASS(!usermode || (signo != NULL && ucode != NULL));
td = curthread;
p = td->td_proc;
@ -771,13 +747,15 @@ trap_pfault(struct trapframe *frame, int usermode)
return (-1);
}
}
va = trunc_page(eva);
if (va >= VM_MIN_KERNEL_ADDRESS) {
if (eva >= VM_MIN_KERNEL_ADDRESS) {
/*
* Don't allow user-mode faults in kernel address space.
*/
if (usermode)
return (SIGSEGV);
if (usermode) {
*signo = SIGSEGV;
*ucode = SEGV_MAPERR;
return (1);
}
map = kernel_map;
} else {
@ -819,7 +797,11 @@ trap_pfault(struct trapframe *frame, int usermode)
trap_fatal(frame, eva);
return (-1);
}
rv = KERN_PROTECTION_FAILURE;
if (usermode) {
*signo = SIGSEGV;
*ucode = SEGV_PKUERR;
return (1);
}
goto after_vmfault;
}
@ -843,7 +825,7 @@ trap_pfault(struct trapframe *frame, int usermode)
ftype = VM_PROT_READ;
/* Fault in the page. */
rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
if (rv == KERN_SUCCESS) {
#ifdef HWPMC_HOOKS
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
@ -858,17 +840,17 @@ trap_pfault(struct trapframe *frame, int usermode)
#endif
return (0);
}
if (usermode)
return (1);
after_vmfault:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
frame->tf_rip = (long)curpcb->pcb_onfault;
return (0);
}
trap_fatal(frame, eva);
return (-1);
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
frame->tf_rip = (long)curpcb->pcb_onfault;
return (0);
}
return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
trap_fatal(frame, eva);
return (-1);
}
static void

View File

@ -1411,7 +1411,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
}
map = &vm->vmspace->vm_map;
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
"ftype = %d", rv, vme->u.paging.gpa, ftype);

View File

@ -94,12 +94,12 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <machine/cpu.h>
#include <machine/frame.h>
#include <machine/machdep.h>
#include <machine/pcb.h>
#include <machine/vmparam.h>
#ifdef KDB
#include <sys/kdb.h>
@ -181,7 +181,7 @@ abort_handler(struct trapframe *tf, int type)
vm_prot_t ftype;
void *onfault;
vm_offset_t va;
int error = 0;
int error = 0, signo, ucode;
struct ksig ksig;
struct proc *p;
@ -230,6 +230,8 @@ abort_handler(struct trapframe *tf, int type)
if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) {
if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far,
td, &ksig)) {
signo = ksig.signb;
ucode = ksig.code;
goto do_trapsignal;
}
goto out;
@ -262,8 +264,8 @@ abort_handler(struct trapframe *tf, int type)
* Give the user an illegal instruction signal.
*/
/* Deliver a SIGILL to the process */
ksig.signb = SIGILL;
ksig.code = 0;
signo = SIGILL;
ucode = 0;
goto do_trapsignal;
}
@ -299,8 +301,8 @@ abort_handler(struct trapframe *tf, int type)
* but uses USR mode permissions for its accesses.
*/
user = 1;
ksig.signb = SIGSEGV;
ksig.code = 0;
signo = SIGSEGV;
ucode = 0;
goto do_trapsignal;
}
} else {
@ -350,9 +352,9 @@ abort_handler(struct trapframe *tf, int type)
onfault = pcb->pcb_onfault;
pcb->pcb_onfault = NULL;
error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &signo, &ucode);
pcb->pcb_onfault = onfault;
if (__predict_true(error == 0))
if (__predict_true(error == KERN_SUCCESS))
goto out;
fatal_pagefault:
if (user == 0) {
@ -368,18 +370,8 @@ abort_handler(struct trapframe *tf, int type)
}
if (error == ENOMEM) {
printf("VM: pid %d (%s), uid %d killed: "
"out of swap\n", td->td_proc->p_pid, td->td_name,
(td->td_proc->p_ucred) ?
td->td_proc->p_ucred->cr_uid : -1);
ksig.signb = SIGKILL;
} else {
ksig.signb = SIGSEGV;
}
ksig.code = 0;
do_trapsignal:
call_trapsignal(td, ksig.signb, ksig.code);
call_trapsignal(td, signo, ucode);
out:
/* If returning to user mode, make sure to invoke userret() */
if (user)
@ -613,10 +605,9 @@ prefetch_abort_handler(struct trapframe *tf)
struct proc * p;
struct vm_map *map;
vm_offset_t fault_pc, va;
int error = 0;
int error = 0, signo, ucode;
struct ksig ksig;
#if 0
/* Update vmmeter statistics */
uvmexp.traps++;
@ -652,8 +643,8 @@ prefetch_abort_handler(struct trapframe *tf)
/* Ok validate the address, can only execute in USER space */
if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS ||
(fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) {
ksig.signb = SIGSEGV;
ksig.code = 0;
signo = SIGSEGV;
ucode = 0;
goto do_trapsignal;
}
@ -669,24 +660,13 @@ prefetch_abort_handler(struct trapframe *tf)
if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1))
goto out;
error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
VM_FAULT_NORMAL);
if (__predict_true(error == 0))
error = vm_fault_trap(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
VM_FAULT_NORMAL, &signo, &ucode);
if (__predict_true(error == KERN_SUCCESS))
goto out;
if (error == ENOMEM) {
printf("VM: pid %d (%s), uid %d killed: "
"out of swap\n", td->td_proc->p_pid, td->td_name,
(td->td_proc->p_ucred) ?
td->td_proc->p_ucred->cr_uid : -1);
ksig.signb = SIGKILL;
} else {
ksig.signb = SIGSEGV;
}
ksig.code = 0;
do_trapsignal:
call_trapsignal(td, ksig.signb, ksig.code);
call_trapsignal(td, signo, ucode);
out:
userret(td, tf);

View File

@ -287,7 +287,7 @@ abort_handler(struct trapframe *tf, int prefetch)
struct vmspace *vm;
vm_prot_t ftype;
bool usermode;
int bp_harden;
int bp_harden, ucode;
#ifdef INVARIANTS
void *onfault;
#endif
@ -497,7 +497,9 @@ abort_handler(struct trapframe *tf, int prefetch)
#endif
/* Fault in the page. */
rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &ksig.sig,
&ucode);
ksig.code = ucode;
#ifdef INVARIANTS
pcb->pcb_onfault = onfault;
@ -518,8 +520,6 @@ abort_handler(struct trapframe *tf, int prefetch)
return;
}
ksig.sig = SIGSEGV;
ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR;
ksig.addr = far;
do_trapsignal:

View File

@ -155,7 +155,6 @@ data_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
struct proc *p;
struct pcb *pcb;
vm_prot_t ftype;
vm_offset_t va;
int error, sig, ucode;
#ifdef KDB
bool handled;
@ -211,7 +210,6 @@ data_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
panic("data abort in critical section or under mutex");
}
va = trunc_page(far);
if (exec)
ftype = VM_PROT_EXECUTE;
else
@ -219,14 +217,9 @@ data_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
VM_PROT_READ | VM_PROT_WRITE;
/* Fault in the page. */
error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode);
if (error != KERN_SUCCESS) {
if (lower) {
sig = SIGSEGV;
if (error == KERN_PROTECTION_FAILURE)
ucode = SEGV_ACCERR;
else
ucode = SEGV_MAPERR;
call_trapsignal(td, sig, ucode, (void *)far);
} else {
if (td->td_intr_nesting_level == 0 &&

View File

@ -114,7 +114,7 @@ PMC_SOFT_DEFINE( , , page_fault, write);
void trap(struct trapframe *frame);
void syscall(struct trapframe *frame);
static int trap_pfault(struct trapframe *, int, vm_offset_t);
static int trap_pfault(struct trapframe *, bool, vm_offset_t, int *, int *);
static void trap_fatal(struct trapframe *, vm_offset_t);
#ifdef KDTRACE_HOOKS
static bool trap_user_dtrace(struct trapframe *,
@ -181,9 +181,6 @@ trap_msg(int trapno)
int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
static int prot_fault_translation = 0;
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
&prot_fault_translation, 0, "Select signal to deliver on protection fault");
static int uprintf_signal;
SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
&uprintf_signal, 0,
@ -202,7 +199,7 @@ trap(struct trapframe *frame)
ksiginfo_t ksi;
struct thread *td;
struct proc *p;
int signo, ucode;
int pf, signo, ucode;
u_int type;
register_t addr, dr6;
vm_offset_t eva;
@ -212,9 +209,6 @@ trap(struct trapframe *frame)
td = curthread;
p = td->td_proc;
signo = 0;
ucode = 0;
addr = 0;
dr6 = 0;
VM_CNT_INC(v_trap);
@ -365,6 +359,7 @@ trap(struct trapframe *frame)
case T_STKFLT: /* stack fault */
if (frame->tf_eflags & PSL_VM) {
signo = vm86_emulate((struct vm86frame *)frame);
ucode = 0; /* XXXKIB: better code ? */
if (signo == SIGTRAP) {
load_dr6(rdr6() | 0x4000);
goto user_trctrap_out;
@ -395,57 +390,23 @@ trap(struct trapframe *frame)
break;
case T_PAGEFLT: /* page fault */
signo = trap_pfault(frame, TRUE, eva);
addr = eva;
pf = trap_pfault(frame, true, eva, &signo, &ucode);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (signo == -2) {
if (pf == -2) {
/*
* The f00f hack workaround has triggered, so
* treat the fault as an illegal instruction
* (T_PRIVINFLT) instead of a page fault.
*/
type = frame->tf_trapno = T_PRIVINFLT;
/* Proceed as in that case. */
ucode = ILL_PRVOPC;
signo = SIGILL;
break;
}
#endif
if (signo == -1)
if (pf == -1)
return;
if (signo == 0)
if (pf == 0)
goto user;
if (signo == SIGSEGV)
ucode = SEGV_MAPERR;
else if (prot_fault_translation == 0) {
/*
* Autodetect. This check also covers
* the images without the ABI-tag ELF
* note.
*/
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
p->p_osrel >= P_OSREL_SIGSEGV) {
signo = SIGSEGV;
ucode = SEGV_ACCERR;
} else {
signo = SIGBUS;
ucode = T_PAGEFLT;
}
} else if (prot_fault_translation == 1) {
/*
* Always compat mode.
*/
signo = SIGBUS;
ucode = T_PAGEFLT;
} else {
/*
* Always SIGSEGV mode.
*/
signo = SIGSEGV;
ucode = SEGV_ACCERR;
}
addr = eva;
break;
case T_DIVIDE: /* integer divide fault */
@ -517,7 +478,7 @@ trap(struct trapframe *frame)
("kernel trap doesn't have ucred"));
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(frame, FALSE, eva);
(void)trap_pfault(frame, false, eva, NULL, NULL);
return;
case T_DNA:
@ -769,16 +730,31 @@ trap(struct trapframe *frame)
("Return from trap with kernel FPU ctx leaked"));
}
/*
* Handle all details of a page fault.
* Returns:
* -2 if the fault was caused by triggered workaround for Intel Pentium
* 0xf00f bug.
* -1 if this fault was fatal, typically from kernel mode
* (cannot happen, but we need to return something).
* 0 if this fault was handled by updating either the user or kernel
* page table, execution can continue.
* 1 if this fault was from usermode and it was not handled, a synchronous
* signal should be delivered to the thread. *signo returns the signal
* number, *ucode gives si_code.
*/
static int
trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
trap_pfault(struct trapframe *frame, bool usermode, vm_offset_t eva,
int *signo, int *ucode)
{
struct thread *td;
struct proc *p;
vm_offset_t va;
vm_map_t map;
int rv;
vm_prot_t ftype;
MPASS(!usermode || (signo != NULL && ucode != NULL));
td = curthread;
p = td->td_proc;
@ -826,8 +802,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
return (-1);
}
}
va = trunc_page(eva);
if (va >= PMAP_TRM_MIN_ADDRESS) {
if (eva >= PMAP_TRM_MIN_ADDRESS) {
/*
* Don't allow user-mode faults in kernel address space.
* An exception: if the faulting address is the invalid
@ -837,11 +812,17 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
*ucode = ILL_PRVOPC;
*signo = SIGILL;
return (-2);
}
#endif
if (usermode)
return (SIGSEGV);
if (usermode) {
*signo = SIGSEGV;
*ucode = SEGV_MAPERR;
return (1);
}
trap_fatal(frame, eva);
return (-1);
} else {
@ -878,7 +859,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
ftype = VM_PROT_READ;
/* Fault in the page. */
rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
if (rv == KERN_SUCCESS) {
#ifdef HWPMC_HOOKS
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
@ -893,16 +874,15 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
#endif
return (0);
}
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
frame->tf_eip = (int)curpcb->pcb_onfault;
return (0);
}
trap_fatal(frame, eva);
return (-1);
if (usermode)
return (1);
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
frame->tf_eip = (int)curpcb->pcb_onfault;
return (0);
}
return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
trap_fatal(frame, eva);
return (-1);
}
static void

View File

@ -286,7 +286,7 @@ proc_rwmem(struct proc *p, struct uio *uio)
/*
* Fault and hold the page on behalf of the process.
*/
error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
error = vm_fault(map, pageno, reqprot, fault_flags, &m);
if (error != KERN_SUCCESS) {
if (error == KERN_RESOURCE_SHORTAGE)
error = ENOMEM;

View File

@ -669,8 +669,9 @@ trap(struct trapframe *trapframe)
int rv;
kernel_fault:
va = trunc_page((vm_offset_t)trapframe->badvaddr);
rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL);
va = (vm_offset_t)trapframe->badvaddr;
rv = vm_fault_trap(kernel_map, va, ftype,
VM_FAULT_NORMAL, NULL, NULL);
if (rv == KERN_SUCCESS)
return (trapframe->pc);
if (td->td_pcb->pcb_onfault != NULL) {
@ -705,7 +706,7 @@ trap(struct trapframe *trapframe)
vm = p->p_vmspace;
map = &vm->vm_map;
va = trunc_page((vm_offset_t)trapframe->badvaddr);
va = (vm_offset_t)trapframe->badvaddr;
if (KERNLAND(trapframe->badvaddr)) {
/*
* Don't allow user-mode faults in kernel
@ -714,7 +715,8 @@ trap(struct trapframe *trapframe)
goto nogo;
}
rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL,
&i, &ucode);
/*
* XXXDTRACE: add dtrace_doubletrap_func here?
*/
@ -739,11 +741,6 @@ trap(struct trapframe *trapframe)
}
goto err;
}
i = SIGSEGV;
if (rv == KERN_PROTECTION_FAILURE)
ucode = SEGV_ACCERR;
else
ucode = SEGV_MAPERR;
addr = trapframe->pc;
msg = "BAD_PAGE_FAULT";

View File

@ -87,7 +87,8 @@ __FBSDID("$FreeBSD$");
static void trap_fatal(struct trapframe *frame);
static void printtrap(u_int vector, struct trapframe *frame, int isfatal,
int user);
static int trap_pfault(struct trapframe *frame, int user);
static bool trap_pfault(struct trapframe *frame, bool user, int *signo,
int *ucode);
static int fix_unaligned(struct thread *td, struct trapframe *frame);
static int handle_onfault(struct trapframe *frame);
static void syscall(struct trapframe *frame);
@ -269,9 +270,8 @@ trap(struct trapframe *frame)
#endif
case EXC_DSI:
case EXC_ISI:
sig = trap_pfault(frame, 1);
if (sig == SIGSEGV)
ucode = SEGV_MAPERR;
if (trap_pfault(frame, true, &sig, &ucode))
sig = 0;
break;
case EXC_SC:
@ -460,7 +460,7 @@ trap(struct trapframe *frame)
break;
#endif
case EXC_DSI:
if (trap_pfault(frame, 0) == 0)
if (trap_pfault(frame, false, NULL, NULL))
return;
break;
case EXC_MCHK:
@ -718,10 +718,10 @@ syscall(struct trapframe *frame)
syscallret(td);
}
static int
trap_pfault(struct trapframe *frame, int user)
static bool
trap_pfault(struct trapframe *frame, bool user, int *signo, int *ucode)
{
vm_offset_t eva, va;
vm_offset_t eva;
struct thread *td;
struct proc *p;
vm_map_t map;
@ -753,28 +753,27 @@ trap_pfault(struct trapframe *frame, int user)
} else {
rv = pmap_decode_kernel_ptr(eva, &is_user, &eva);
if (rv != 0)
return (SIGSEGV);
return (false);
if (is_user)
map = &p->p_vmspace->vm_map;
else
map = kernel_map;
}
va = trunc_page(eva);
/* Fault in the page. */
rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
/*
* XXXDTRACE: add dtrace_doubletrap_func here?
*/
if (rv == KERN_SUCCESS)
return (0);
return (true);
if (!user && handle_onfault(frame))
return (0);
return (true);
return (SIGSEGV);
return (false);
}
/*

View File

@ -217,14 +217,9 @@ data_abort(struct trapframe *frame, int usermode)
if (pmap_fault_fixup(map->pmap, va, ftype))
goto done;
error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode);
if (error != KERN_SUCCESS) {
if (usermode) {
sig = SIGSEGV;
if (error == KERN_PROTECTION_FAILURE)
ucode = SEGV_ACCERR;
else
ucode = SEGV_MAPERR;
call_trapsignal(td, sig, ucode, (void *)stval);
} else {
if (pcb->pcb_onfault != 0) {

View File

@ -91,7 +91,8 @@ void trap(struct trapframe *tf);
void syscall(struct trapframe *tf);
static int trap_cecc(void);
static int trap_pfault(struct thread *td, struct trapframe *tf);
static bool trap_pfault(struct thread *td, struct trapframe *tf, int *signo,
int *ucode);
extern char copy_fault[];
extern char copy_nofault_begin[];
@ -287,7 +288,8 @@ trap(struct trapframe *tf)
addr = tf->tf_sfar;
/* FALLTHROUGH */
case T_INSTRUCTION_MISS:
sig = trap_pfault(td, tf);
if (trap_pfault(td, tf, &sig, &ucode))
sig = 0;
break;
case T_FILL:
sig = rwindow_load(td, tf, 2);
@ -358,7 +360,7 @@ trap(struct trapframe *tf)
case T_DATA_MISS:
case T_DATA_PROTECTION:
case T_INSTRUCTION_MISS:
error = trap_pfault(td, tf);
error = !trap_pfault(td, tf, &sig, &ucode);
break;
case T_DATA_EXCEPTION:
case T_MEM_ADDRESS_NOT_ALIGNED:
@ -443,8 +445,8 @@ trap_cecc(void)
return (0);
}
static int
trap_pfault(struct thread *td, struct trapframe *tf)
static bool
trap_pfault(struct thread *td, struct trapframe *tf, int *signo, int *ucode)
{
vm_map_t map;
struct proc *p;
@ -508,27 +510,27 @@ trap_pfault(struct thread *td, struct trapframe *tf)
}
/* Fault in the page. */
rv = vm_fault(map, va, prot, VM_FAULT_NORMAL);
rv = vm_fault_trap(map, va, prot, VM_FAULT_NORMAL, signo, ucode);
CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
td, va, rv);
if (rv == KERN_SUCCESS)
return (0);
return (true);
if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
tf->tf_tpc <= (u_long)fs_nofault_end) {
tf->tf_tpc = (u_long)fs_fault;
tf->tf_tnpc = tf->tf_tpc + 4;
return (0);
return (true);
}
if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
tf->tf_tpc <= (u_long)copy_nofault_end) {
tf->tf_tpc = (u_long)copy_fault;
tf->tf_tnpc = tf->tf_tpc + 4;
return (0);
return (true);
}
}
return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
return (false);
}
/* Maximum number of arguments that can be passed via the out registers. */

View File

@ -315,11 +315,13 @@ struct siginfo32 {
#define BUS_ADRALN 1 /* Invalid address alignment. */
#define BUS_ADRERR 2 /* Nonexistent physical address. */
#define BUS_OBJERR 3 /* Object-specific hardware error. */
#define BUS_OOMERR 100 /* Non-standard: No memory. */
/* codes for SIGSEGV */
#define SEGV_MAPERR 1 /* Address not mapped to object. */
#define SEGV_ACCERR 2 /* Invalid permissions for mapped */
/* object. */
#define SEGV_PKUERR 100 /* x86: PKU violation */
/* codes for SIGFPE */
#define FPE_INTOVF 1 /* Integer overflow. */

View File

@ -85,15 +85,16 @@ void kmeminit(void);
int kernacc(void *, int, int);
int useracc(void *, int, int);
int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold);
void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
vm_ooffset_t *);
int vm_fault_disable_pagefaults(void);
void vm_fault_enable_pagefaults(int save);
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold);
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vm_prot_t prot, vm_page_t *ma, int max_count);
int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, int *signo, int *ucode);
int vm_forkproc(struct thread *, struct proc *, struct thread *,
struct vmspace *, int);
void vm_waitproc(struct proc *);

View File

@ -90,7 +90,9 @@ __FBSDID("$FreeBSD$");
#include <sys/refcount.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#ifdef KTRACE
@ -520,8 +522,19 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
return (KERN_SUCCESS);
}
static int prot_fault_translation;
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
&prot_fault_translation, 0,
"Control signal to deliver on protection fault");
/* compat definition to keep common code for signal translation */
#define UCODE_PAGEFLT 12
#ifdef T_PAGEFLT
_Static_assert(UCODE_PAGEFLT == T_PAGEFLT, "T_PAGEFLT");
#endif
/*
* vm_fault:
* vm_fault_trap:
*
* Handle a page fault occurring at the given address,
* requiring the given permissions, in the map specified.
@ -538,12 +551,13 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
* Caller may hold no locks.
*/
int
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags)
vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, int *signo, int *ucode)
{
struct thread *td;
int result;
MPASS(signo == NULL || ucode != NULL);
td = curthread;
if ((td->td_pflags & TDP_NOFAULTING) != 0)
return (KERN_PROTECTION_FAILURE);
@ -551,17 +565,69 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
ktrfault(vaddr, fault_type);
#endif
result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
result = vm_fault(map, trunc_page(vaddr), fault_type, fault_flags,
NULL);
KASSERT(result == KERN_SUCCESS || result == KERN_FAILURE ||
result == KERN_INVALID_ADDRESS ||
result == KERN_RESOURCE_SHORTAGE ||
result == KERN_PROTECTION_FAILURE ||
result == KERN_OUT_OF_BOUNDS,
("Unexpected Mach error %d from vm_fault()", result));
#ifdef KTRACE
if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
ktrfaultend(result);
#endif
if (result != KERN_SUCCESS && signo != NULL) {
switch (result) {
case KERN_FAILURE:
case KERN_INVALID_ADDRESS:
*signo = SIGSEGV;
*ucode = SEGV_MAPERR;
break;
case KERN_RESOURCE_SHORTAGE:
*signo = SIGBUS;
*ucode = BUS_OOMERR;
break;
case KERN_OUT_OF_BOUNDS:
*signo = SIGBUS;
*ucode = BUS_OBJERR;
break;
case KERN_PROTECTION_FAILURE:
if (prot_fault_translation == 0) {
/*
* Autodetect. This check also covers
* the images without the ABI-tag ELF
* note.
*/
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
curproc->p_osrel >= P_OSREL_SIGSEGV) {
*signo = SIGSEGV;
*ucode = SEGV_ACCERR;
} else {
*signo = SIGBUS;
*ucode = UCODE_PAGEFLT;
}
} else if (prot_fault_translation == 1) {
/* Always compat mode. */
*signo = SIGBUS;
*ucode = UCODE_PAGEFLT;
} else {
/* Always SIGSEGV mode. */
*signo = SIGSEGV;
*ucode = SEGV_ACCERR;
}
break;
default:
KASSERT(0, ("Unexpected Mach error %d from vm_fault()",
result));
break;
}
}
return (result);
}
int
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold)
{
struct faultstate fs;
@ -775,7 +841,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
fs.object == fs.first_object) {
if (fs.pindex >= fs.object->size) {
unlock_and_deallocate(&fs);
return (KERN_PROTECTION_FAILURE);
return (KERN_OUT_OF_BOUNDS);
}
if (fs.object == fs.first_object &&
@ -1024,8 +1090,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
vm_page_xunbusy(fs.m);
fs.m = NULL;
unlock_and_deallocate(&fs);
return (rv == VM_PAGER_ERROR ? KERN_FAILURE :
KERN_PROTECTION_FAILURE);
return (KERN_OUT_OF_BOUNDS);
}
/*
@ -1585,7 +1650,7 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
* If vm_fault_disable_pagefaults() was called,
* i.e., TDP_NOFAULTING is set, we must not sleep nor
* acquire MD VM locks, which means we must not call
* vm_fault_hold(). Some (out of tree) callers mark
* vm_fault(). Some (out of tree) callers mark
* too wide a code area with vm_fault_disable_pagefaults()
* already, use the VM_PROT_QUICK_NOFAULT flag to request
* the proper behaviour explicitly.
@ -1594,7 +1659,7 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
(curthread->td_pflags & TDP_NOFAULTING) != 0)
goto error;
for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
if (*mp == NULL && vm_fault_hold(map, va, prot,
if (*mp == NULL && vm_fault(map, va, prot,
VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
goto error;
}

View File

@ -3191,8 +3191,9 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
* Simulate a fault to get the page and enter
* it into the physical map.
*/
if ((rv = vm_fault(map, faddr, VM_PROT_NONE,
VM_FAULT_WIRE)) != KERN_SUCCESS)
if ((rv = vm_fault(map, faddr,
VM_PROT_NONE, VM_FAULT_WIRE, NULL)) !=
KERN_SUCCESS)
break;
} while ((faddr += PAGE_SIZE) < saved_end);
vm_map_lock(map);

View File

@ -113,6 +113,7 @@ struct xswdev {
#define KERN_RESOURCE_SHORTAGE 6
#define KERN_NOT_RECEIVER 7
#define KERN_NO_ACCESS 8
#define KERN_OUT_OF_BOUNDS 9
#ifndef PA_LOCK_COUNT
#ifdef SMP