c788f92509
from Bruce Evans. Trim the trailing spaces. MFC after: 1 week
907 lines
23 KiB
ArmAsm
907 lines
23 KiB
ArmAsm
/*-
|
|
* Copyright (c) 1989, 1990 William F. Jolitz.
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* Copyright (c) 2007 The FreeBSD Foundation
|
|
* All rights reserved.
|
|
*
|
|
* Portions of this software were developed by A. Joseph Koshy under
|
|
* sponsorship from the FreeBSD Foundation and Google, Inc.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#include "opt_atpic.h"
|
|
#include "opt_compat.h"
|
|
#include "opt_hwpmc_hooks.h"
|
|
#include "opt_kdtrace.h"
|
|
|
|
#include <machine/asmacros.h>
|
|
#include <machine/psl.h>
|
|
#include <machine/trap.h>
|
|
#include <machine/specialreg.h>
|
|
|
|
#include "assym.s"
|
|
|
|
#ifdef KDTRACE_HOOKS
|
|
.bss
|
|
.globl dtrace_invop_jump_addr
|
|
.align 8
|
|
.type dtrace_invop_jump_addr,@object
|
|
.size dtrace_invop_jump_addr,8
|
|
dtrace_invop_jump_addr:
|
|
.zero 8
|
|
.globl dtrace_invop_calltrap_addr
|
|
.align 8
|
|
.type dtrace_invop_calltrap_addr,@object
|
|
.size dtrace_invop_calltrap_addr,8
|
|
dtrace_invop_calltrap_addr:
|
|
.zero 8
|
|
#endif
|
|
.text
|
|
#ifdef HWPMC_HOOKS
|
|
ENTRY(start_exceptions)
|
|
#endif
|
|
|
|
/*****************************************************************************/
|
|
/* Trap handling */
|
|
/*****************************************************************************/
|
|
/*
|
|
* Trap and fault vector routines.
|
|
*
|
|
* All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes
|
|
* state on the stack but also disables interrupts. This is important for
|
|
* us for the use of the swapgs instruction. We cannot be interrupted
|
|
* until the GS.base value is correct. For most traps, we automatically
|
|
* then enable interrupts if the interrupted context had them enabled.
|
|
* This is equivalent to the i386 port's use of SDT_SYS386TGT.
|
|
*
|
|
* The cpu will push a certain amount of state onto the kernel stack for
|
|
* the current process. See amd64/include/frame.h.
|
|
* This includes the current RFLAGS (status register, which includes
|
|
* the interrupt disable state prior to the trap), the code segment register,
|
|
* and the return instruction pointer are pushed by the cpu. The cpu
|
|
* will also push an 'error' code for certain traps. We push a dummy
|
|
* error code for those traps where the cpu doesn't in order to maintain
|
|
* a consistent frame. We also push a contrived 'trap number'.
|
|
*
|
|
* The CPU does not push the general registers, so we must do that, and we
|
|
* must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss
|
|
* but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for
|
|
* for the kernel mode operation shortly, without changes to the selector
|
|
* loaded. Since superuser long mode works with any selectors loaded into
|
|
* segment registers other then %cs, which makes them mostly unused in long
|
|
* mode, and kernel does not reference %fs, leave them alone. The segment
|
|
* registers are reloaded on return to the usermode.
|
|
*/
|
|
|
|
MCOUNT_LABEL(user)
|
|
MCOUNT_LABEL(btrap)
|
|
|
|
/* Traps that we leave interrupts disabled for.. */
|
|
#define TRAP_NOEN(a) \
|
|
subq $TF_RIP,%rsp; \
|
|
movl $(a),TF_TRAPNO(%rsp) ; \
|
|
movq $0,TF_ADDR(%rsp) ; \
|
|
movq $0,TF_ERR(%rsp) ; \
|
|
jmp alltraps_noen
|
|
IDTVEC(dbg)
|
|
TRAP_NOEN(T_TRCTRAP)
|
|
IDTVEC(bpt)
|
|
TRAP_NOEN(T_BPTFLT)
|
|
#ifdef KDTRACE_HOOKS
|
|
IDTVEC(dtrace_ret)
|
|
TRAP_NOEN(T_DTRACE_RET)
|
|
#endif
|
|
|
|
/* Regular traps; The cpu does not supply tf_err for these. */
|
|
#define TRAP(a) \
|
|
subq $TF_RIP,%rsp; \
|
|
movl $(a),TF_TRAPNO(%rsp) ; \
|
|
movq $0,TF_ADDR(%rsp) ; \
|
|
movq $0,TF_ERR(%rsp) ; \
|
|
jmp alltraps
|
|
IDTVEC(div)
|
|
TRAP(T_DIVIDE)
|
|
IDTVEC(ofl)
|
|
TRAP(T_OFLOW)
|
|
IDTVEC(bnd)
|
|
TRAP(T_BOUND)
|
|
IDTVEC(ill)
|
|
TRAP(T_PRIVINFLT)
|
|
IDTVEC(dna)
|
|
TRAP(T_DNA)
|
|
IDTVEC(fpusegm)
|
|
TRAP(T_FPOPFLT)
|
|
IDTVEC(mchk)
|
|
TRAP(T_MCHK)
|
|
IDTVEC(rsvd)
|
|
TRAP(T_RESERVED)
|
|
IDTVEC(fpu)
|
|
TRAP(T_ARITHTRAP)
|
|
IDTVEC(xmm)
|
|
TRAP(T_XMMFLT)
|
|
|
|
/* This group of traps have tf_err already pushed by the cpu */
|
|
#define TRAP_ERR(a) \
|
|
subq $TF_ERR,%rsp; \
|
|
movl $(a),TF_TRAPNO(%rsp) ; \
|
|
movq $0,TF_ADDR(%rsp) ; \
|
|
jmp alltraps
|
|
IDTVEC(tss)
|
|
TRAP_ERR(T_TSSFLT)
|
|
IDTVEC(missing)
|
|
TRAP_ERR(T_SEGNPFLT)
|
|
IDTVEC(stk)
|
|
TRAP_ERR(T_STKFLT)
|
|
IDTVEC(align)
|
|
TRAP_ERR(T_ALIGNFLT)
|
|
|
|
/*
|
|
* alltraps entry point. Use swapgs if this is the first time in the
|
|
* kernel from userland. Reenable interrupts if they were enabled
|
|
* before the trap. This approximates SDT_SYS386TGT on the i386 port.
|
|
*/
|
|
SUPERALIGN_TEXT
|
|
.globl alltraps
|
|
.type alltraps,@function
|
|
alltraps:
|
|
movq %rdi,TF_RDI(%rsp)
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz alltraps_testi /* already running with kernel GS.base */
|
|
swapgs
|
|
movq PCPU(CURPCB),%rdi
|
|
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
alltraps_testi:
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz alltraps_pushregs_no_rdi
|
|
sti
|
|
alltraps_pushregs_no_rdi:
|
|
movq %rsi,TF_RSI(%rsp)
|
|
movq %rdx,TF_RDX(%rsp)
|
|
movq %rcx,TF_RCX(%rsp)
|
|
movq %r8,TF_R8(%rsp)
|
|
movq %r9,TF_R9(%rsp)
|
|
movq %rax,TF_RAX(%rsp)
|
|
movq %rbx,TF_RBX(%rsp)
|
|
movq %rbp,TF_RBP(%rsp)
|
|
movq %r10,TF_R10(%rsp)
|
|
movq %r11,TF_R11(%rsp)
|
|
movq %r12,TF_R12(%rsp)
|
|
movq %r13,TF_R13(%rsp)
|
|
movq %r14,TF_R14(%rsp)
|
|
movq %r15,TF_R15(%rsp)
|
|
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
cld
|
|
FAKE_MCOUNT(TF_RIP(%rsp))
|
|
#ifdef KDTRACE_HOOKS
|
|
/*
|
|
* DTrace Function Boundary Trace (fbt) probes are triggered
|
|
* by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
|
|
* interrupt. For all other trap types, just handle them in
|
|
* the usual way.
|
|
*/
|
|
cmpl $T_BPTFLT,TF_TRAPNO(%rsp)
|
|
jne calltrap
|
|
|
|
/* Check if there is no DTrace hook registered. */
|
|
cmpq $0,dtrace_invop_jump_addr
|
|
je calltrap
|
|
|
|
/*
|
|
* Set our jump address for the jump back in the event that
|
|
* the breakpoint wasn't caused by DTrace at all.
|
|
*/
|
|
movq $calltrap,dtrace_invop_calltrap_addr(%rip)
|
|
|
|
/* Jump to the code hooked in by DTrace. */
|
|
movq dtrace_invop_jump_addr,%rax
|
|
jmpq *dtrace_invop_jump_addr
|
|
#endif
|
|
.globl calltrap
|
|
.type calltrap,@function
|
|
calltrap:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
MEXITCOUNT
|
|
jmp doreti /* Handle any pending ASTs */
|
|
|
|
/*
|
|
* alltraps_noen entry point. Unlike alltraps above, we want to
|
|
* leave the interrupts disabled. This corresponds to
|
|
* SDT_SYS386IGT on the i386 port.
|
|
*/
|
|
SUPERALIGN_TEXT
|
|
.globl alltraps_noen
|
|
.type alltraps_noen,@function
|
|
alltraps_noen:
|
|
movq %rdi,TF_RDI(%rsp)
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz 1f /* already running with kernel GS.base */
|
|
swapgs
|
|
movq PCPU(CURPCB),%rdi
|
|
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
|
1: movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
jmp alltraps_pushregs_no_rdi
|
|
|
|
IDTVEC(dblfault)
|
|
subq $TF_ERR,%rsp
|
|
movl $T_DOUBLEFLT,TF_TRAPNO(%rsp)
|
|
movq $0,TF_ADDR(%rsp)
|
|
movq $0,TF_ERR(%rsp)
|
|
movq %rdi,TF_RDI(%rsp)
|
|
movq %rsi,TF_RSI(%rsp)
|
|
movq %rdx,TF_RDX(%rsp)
|
|
movq %rcx,TF_RCX(%rsp)
|
|
movq %r8,TF_R8(%rsp)
|
|
movq %r9,TF_R9(%rsp)
|
|
movq %rax,TF_RAX(%rsp)
|
|
movq %rbx,TF_RBX(%rsp)
|
|
movq %rbp,TF_RBP(%rsp)
|
|
movq %r10,TF_R10(%rsp)
|
|
movq %r11,TF_R11(%rsp)
|
|
movq %r12,TF_R12(%rsp)
|
|
movq %r13,TF_R13(%rsp)
|
|
movq %r14,TF_R14(%rsp)
|
|
movq %r15,TF_R15(%rsp)
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
cld
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz 1f /* already running with kernel GS.base */
|
|
swapgs
|
|
1:
|
|
movq %rsp,%rdi
|
|
call dblfault_handler
|
|
2:
|
|
hlt
|
|
jmp 2b
|
|
|
|
IDTVEC(page)
|
|
subq $TF_ERR,%rsp
|
|
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
|
|
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz 1f /* already running with kernel GS.base */
|
|
swapgs
|
|
movq PCPU(CURPCB),%rdi
|
|
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
|
|
1: movq %cr2,%rdi /* preserve %cr2 before .. */
|
|
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz alltraps_pushregs_no_rdi
|
|
sti
|
|
jmp alltraps_pushregs_no_rdi
|
|
|
|
/*
|
|
* We have to special-case this one. If we get a trap in doreti() at
|
|
* the iretq stage, we'll reenter with the wrong gs state. We'll have
|
|
* to do a special the swapgs in this case even coming from the kernel.
|
|
* XXX linux has a trap handler for their equivalent of load_gs().
|
|
*/
|
|
IDTVEC(prot)
|
|
subq $TF_ERR,%rsp
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
movq $0,TF_ADDR(%rsp)
|
|
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
|
leaq doreti_iret(%rip),%rdi
|
|
cmpq %rdi,TF_RIP(%rsp)
|
|
je 1f /* kernel but with user gsbase!! */
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz 2f /* already running with kernel GS.base */
|
|
1: swapgs
|
|
2: movq PCPU(CURPCB),%rdi
|
|
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz alltraps_pushregs_no_rdi
|
|
sti
|
|
jmp alltraps_pushregs_no_rdi
|
|
|
|
/*
|
|
* Fast syscall entry point. We enter here with just our new %cs/%ss set,
|
|
* and the new privilige level. We are still running on the old user stack
|
|
* pointer. We have to juggle a few things around to find our stack etc.
|
|
* swapgs gives us access to our PCPU space only.
|
|
*
|
|
* We do not support invoking this from a custom %cs or %ss (e.g. using
|
|
* entries from an LDT).
|
|
*/
|
|
IDTVEC(fast_syscall)
|
|
swapgs
|
|
movq %rsp,PCPU(SCRATCH_RSP)
|
|
movq PCPU(RSP0),%rsp
|
|
/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
|
|
subq $TF_SIZE,%rsp
|
|
/* defer TF_RSP till we have a spare register */
|
|
movq %r11,TF_RFLAGS(%rsp)
|
|
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
|
|
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
|
|
movq %r11,TF_RSP(%rsp) /* user stack pointer */
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
movq PCPU(CURPCB),%r11
|
|
andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
|
|
sti
|
|
movq $KUDSEL,TF_SS(%rsp)
|
|
movq $KUCSEL,TF_CS(%rsp)
|
|
movq $2,TF_ERR(%rsp)
|
|
movq %rdi,TF_RDI(%rsp) /* arg 1 */
|
|
movq %rsi,TF_RSI(%rsp) /* arg 2 */
|
|
movq %rdx,TF_RDX(%rsp) /* arg 3 */
|
|
movq %r10,TF_RCX(%rsp) /* arg 4 */
|
|
movq %r8,TF_R8(%rsp) /* arg 5 */
|
|
movq %r9,TF_R9(%rsp) /* arg 6 */
|
|
movq %rax,TF_RAX(%rsp) /* syscall number */
|
|
movq %rbx,TF_RBX(%rsp) /* C preserved */
|
|
movq %rbp,TF_RBP(%rsp) /* C preserved */
|
|
movq %r12,TF_R12(%rsp) /* C preserved */
|
|
movq %r13,TF_R13(%rsp) /* C preserved */
|
|
movq %r14,TF_R14(%rsp) /* C preserved */
|
|
movq %r15,TF_R15(%rsp) /* C preserved */
|
|
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
cld
|
|
FAKE_MCOUNT(TF_RIP(%rsp))
|
|
movq PCPU(CURTHREAD),%rdi
|
|
movq %rsp,TD_FRAME(%rdi)
|
|
movl TF_RFLAGS(%rsp),%esi
|
|
andl $PSL_T,%esi
|
|
call amd64_syscall
|
|
1: movq PCPU(CURPCB),%rax
|
|
/* Disable interrupts before testing PCB_FULL_IRET. */
|
|
cli
|
|
testl $PCB_FULL_IRET,PCB_FLAGS(%rax)
|
|
jnz 3f
|
|
/* Check for and handle AST's on return to userland. */
|
|
movq PCPU(CURTHREAD),%rax
|
|
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
|
|
jne 2f
|
|
/* Restore preserved registers. */
|
|
MEXITCOUNT
|
|
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
|
|
movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */
|
|
movq TF_RDX(%rsp),%rdx /* return value 2 */
|
|
movq TF_RAX(%rsp),%rax /* return value 1 */
|
|
movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
|
|
movq TF_RIP(%rsp),%rcx /* original %rip */
|
|
movq TF_RSP(%rsp),%rsp /* user stack pointer */
|
|
swapgs
|
|
sysretq
|
|
|
|
2: /* AST scheduled. */
|
|
sti
|
|
movq %rsp,%rdi
|
|
call ast
|
|
jmp 1b
|
|
|
|
3: /* Requested full context restore, use doreti for that. */
|
|
MEXITCOUNT
|
|
jmp doreti
|
|
|
|
/*
|
|
* Here for CYA insurance, in case a "syscall" instruction gets
|
|
* issued from 32 bit compatability mode. MSR_CSTAR has to point
|
|
* to *something* if EFER_SCE is enabled.
|
|
*/
|
|
IDTVEC(fast_syscall32)
|
|
sysret
|
|
|
|
/*
|
|
* NMI handling is special.
|
|
*
|
|
* First, NMIs do not respect the state of the processor's RFLAGS.IF
|
|
* bit. The NMI handler may be entered at any time, including when
|
|
* the processor is in a critical section with RFLAGS.IF == 0.
|
|
* The processor's GS.base value could be invalid on entry to the
|
|
* handler.
|
|
*
|
|
* Second, the processor treats NMIs specially, blocking further NMIs
|
|
* until an 'iretq' instruction is executed. We thus need to execute
|
|
* the NMI handler with interrupts disabled, to prevent a nested interrupt
|
|
* from executing an 'iretq' instruction and inadvertently taking the
|
|
* processor out of NMI mode.
|
|
*
|
|
* Third, the NMI handler runs on its own stack (tss_ist2). The canonical
|
|
* GS.base value for the processor is stored just above the bottom of its
|
|
* NMI stack. For NMIs taken from kernel mode, the current value in
|
|
* the processor's GS.base is saved at entry to C-preserved register %r12,
|
|
* the canonical value for GS.base is then loaded into the processor, and
|
|
* the saved value is restored at exit time. For NMIs taken from user mode,
|
|
* the cheaper 'SWAPGS' instructions are used for swapping GS.base.
|
|
*/
|
|
|
|
IDTVEC(nmi)
|
|
subq $TF_RIP,%rsp
|
|
movl $(T_NMI),TF_TRAPNO(%rsp)
|
|
movq $0,TF_ADDR(%rsp)
|
|
movq $0,TF_ERR(%rsp)
|
|
movq %rdi,TF_RDI(%rsp)
|
|
movq %rsi,TF_RSI(%rsp)
|
|
movq %rdx,TF_RDX(%rsp)
|
|
movq %rcx,TF_RCX(%rsp)
|
|
movq %r8,TF_R8(%rsp)
|
|
movq %r9,TF_R9(%rsp)
|
|
movq %rax,TF_RAX(%rsp)
|
|
movq %rbx,TF_RBX(%rsp)
|
|
movq %rbp,TF_RBP(%rsp)
|
|
movq %r10,TF_R10(%rsp)
|
|
movq %r11,TF_R11(%rsp)
|
|
movq %r12,TF_R12(%rsp)
|
|
movq %r13,TF_R13(%rsp)
|
|
movq %r14,TF_R14(%rsp)
|
|
movq %r15,TF_R15(%rsp)
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
cld
|
|
xorl %ebx,%ebx
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
|
jnz nmi_fromuserspace
|
|
/*
|
|
* We've interrupted the kernel. Preserve GS.base in %r12.
|
|
*/
|
|
movl $MSR_GSBASE,%ecx
|
|
rdmsr
|
|
movq %rax,%r12
|
|
shlq $32,%rdx
|
|
orq %rdx,%r12
|
|
/* Retrieve and load the canonical value for GS.base. */
|
|
movq TF_SIZE(%rsp),%rdx
|
|
movl %edx,%eax
|
|
shrq $32,%rdx
|
|
wrmsr
|
|
jmp nmi_calltrap
|
|
nmi_fromuserspace:
|
|
incl %ebx
|
|
swapgs
|
|
/* Note: this label is also used by ddb and gdb: */
|
|
nmi_calltrap:
|
|
FAKE_MCOUNT(TF_RIP(%rsp))
|
|
movq %rsp,%rdi
|
|
call trap
|
|
MEXITCOUNT
|
|
#ifdef HWPMC_HOOKS
|
|
/*
|
|
* Capture a userspace callchain if needed.
|
|
*
|
|
* - Check if the current trap was from user mode.
|
|
* - Check if the current thread is valid.
|
|
* - Check if the thread requires a user call chain to be
|
|
* captured.
|
|
*
|
|
* We are still in NMI mode at this point.
|
|
*/
|
|
testl %ebx,%ebx
|
|
jz nocallchain /* not from userspace */
|
|
movq PCPU(CURTHREAD),%rax
|
|
orq %rax,%rax /* curthread present? */
|
|
jz nocallchain
|
|
testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
|
|
jz nocallchain
|
|
/*
|
|
* A user callchain is to be captured, so:
|
|
* - Move execution to the regular kernel stack, to allow for
|
|
* nested NMI interrupts.
|
|
* - Take the processor out of "NMI" mode by faking an "iret".
|
|
* - Enable interrupts, so that copyin() can work.
|
|
*/
|
|
movq %rsp,%rsi /* source stack pointer */
|
|
movq $TF_SIZE,%rcx
|
|
movq PCPU(RSP0),%rdx
|
|
subq %rcx,%rdx
|
|
movq %rdx,%rdi /* destination stack pointer */
|
|
|
|
shrq $3,%rcx /* trap frame size in long words */
|
|
cld
|
|
rep
|
|
movsq /* copy trapframe */
|
|
|
|
movl %ss,%eax
|
|
pushq %rax /* tf_ss */
|
|
pushq %rdx /* tf_rsp (on kernel stack) */
|
|
pushfq /* tf_rflags */
|
|
movl %cs,%eax
|
|
pushq %rax /* tf_cs */
|
|
pushq $outofnmi /* tf_rip */
|
|
iretq
|
|
outofnmi:
|
|
/*
|
|
* At this point the processor has exited NMI mode and is running
|
|
* with interrupts turned off on the normal kernel stack.
|
|
*
|
|
* If a pending NMI gets recognized at or after this point, it
|
|
* will cause a kernel callchain to be traced.
|
|
*
|
|
* We turn interrupts back on, and call the user callchain capture hook.
|
|
*/
|
|
movq pmc_hook,%rax
|
|
orq %rax,%rax
|
|
jz nocallchain
|
|
movq PCPU(CURTHREAD),%rdi /* thread */
|
|
movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */
|
|
movq %rsp,%rdx /* frame */
|
|
sti
|
|
call *%rax
|
|
cli
|
|
nocallchain:
|
|
#endif
|
|
testl %ebx,%ebx
|
|
jnz doreti_exit
|
|
nmi_kernelexit:
|
|
/*
|
|
* Put back the preserved MSR_GSBASE value.
|
|
*/
|
|
movl $MSR_GSBASE,%ecx
|
|
movq %r12,%rdx
|
|
movl %edx,%eax
|
|
shrq $32,%rdx
|
|
wrmsr
|
|
nmi_restoreregs:
|
|
movq TF_RDI(%rsp),%rdi
|
|
movq TF_RSI(%rsp),%rsi
|
|
movq TF_RDX(%rsp),%rdx
|
|
movq TF_RCX(%rsp),%rcx
|
|
movq TF_R8(%rsp),%r8
|
|
movq TF_R9(%rsp),%r9
|
|
movq TF_RAX(%rsp),%rax
|
|
movq TF_RBX(%rsp),%rbx
|
|
movq TF_RBP(%rsp),%rbp
|
|
movq TF_R10(%rsp),%r10
|
|
movq TF_R11(%rsp),%r11
|
|
movq TF_R12(%rsp),%r12
|
|
movq TF_R13(%rsp),%r13
|
|
movq TF_R14(%rsp),%r14
|
|
movq TF_R15(%rsp),%r15
|
|
addq $TF_RIP,%rsp
|
|
jmp doreti_iret
|
|
|
|
ENTRY(fork_trampoline)
|
|
movq %r12,%rdi /* function */
|
|
movq %rbx,%rsi /* arg1 */
|
|
movq %rsp,%rdx /* trapframe pointer */
|
|
call fork_exit
|
|
MEXITCOUNT
|
|
jmp doreti /* Handle any ASTs */
|
|
|
|
/*
|
|
* To efficiently implement classification of trap and interrupt handlers
|
|
* for profiling, there must be only trap handlers between the labels btrap
|
|
* and bintr, and only interrupt handlers between the labels bintr and
|
|
* eintr. This is implemented (partly) by including files that contain
|
|
* some of the handlers. Before including the files, set up a normal asm
|
|
* environment so that the included files doen't need to know that they are
|
|
* included.
|
|
*/
|
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
.data
|
|
.p2align 4
|
|
.text
|
|
SUPERALIGN_TEXT
|
|
|
|
#include <amd64/ia32/ia32_exception.S>
|
|
#endif
|
|
|
|
.data
|
|
.p2align 4
|
|
.text
|
|
SUPERALIGN_TEXT
|
|
MCOUNT_LABEL(bintr)
|
|
|
|
#include <amd64/amd64/apic_vector.S>
|
|
|
|
#ifdef DEV_ATPIC
|
|
.data
|
|
.p2align 4
|
|
.text
|
|
SUPERALIGN_TEXT
|
|
|
|
#include <amd64/amd64/atpic_vector.S>
|
|
#endif
|
|
|
|
.text
|
|
MCOUNT_LABEL(eintr)
|
|
|
|
/*
|
|
* void doreti(struct trapframe)
|
|
*
|
|
* Handle return from interrupts, traps and syscalls.
|
|
*/
|
|
.text
|
|
SUPERALIGN_TEXT
|
|
.type doreti,@function
|
|
doreti:
|
|
FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
|
|
/*
|
|
* Check if ASTs can be handled now.
|
|
*/
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
|
|
jz doreti_exit /* can't handle ASTs now if not */
|
|
|
|
doreti_ast:
|
|
/*
|
|
* Check for ASTs atomically with returning. Disabling CPU
|
|
* interrupts provides sufficient locking even in the SMP case,
|
|
* since we will be informed of any new ASTs by an IPI.
|
|
*/
|
|
cli
|
|
movq PCPU(CURTHREAD),%rax
|
|
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
|
|
je doreti_exit
|
|
sti
|
|
movq %rsp,%rdi /* pass a pointer to the trapframe */
|
|
call ast
|
|
jmp doreti_ast
|
|
|
|
/*
|
|
* doreti_exit: pop registers, iret.
|
|
*
|
|
* The segment register pop is a special case, since it may
|
|
* fault if (for example) a sigreturn specifies bad segment
|
|
* registers. The fault is handled in trap.c.
|
|
*/
|
|
doreti_exit:
|
|
MEXITCOUNT
|
|
movq PCPU(CURPCB),%r8
|
|
|
|
/*
|
|
* Do not reload segment registers for kernel.
|
|
* Since we do not reload segments registers with sane
|
|
* values on kernel entry, descriptors referenced by
|
|
* segments registers might be not valid. This is fatal
|
|
* for user mode, but is not a problem for the kernel.
|
|
*/
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
|
jz ld_regs
|
|
testl $PCB_FULL_IRET,PCB_FLAGS(%r8)
|
|
jz ld_regs
|
|
testl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
je set_segs
|
|
|
|
do_segs:
|
|
/* Restore %fs and fsbase */
|
|
movw TF_FS(%rsp),%ax
|
|
.globl ld_fs
|
|
ld_fs:
|
|
movw %ax,%fs
|
|
cmpw $KUF32SEL,%ax
|
|
jne 1f
|
|
movl $MSR_FSBASE,%ecx
|
|
movl PCB_FSBASE(%r8),%eax
|
|
movl PCB_FSBASE+4(%r8),%edx
|
|
.globl ld_fsbase
|
|
ld_fsbase:
|
|
wrmsr
|
|
1:
|
|
/* Restore %gs and gsbase */
|
|
movw TF_GS(%rsp),%si
|
|
pushfq
|
|
cli
|
|
movl $MSR_GSBASE,%ecx
|
|
rdmsr
|
|
.globl ld_gs
|
|
ld_gs:
|
|
movw %si,%gs
|
|
wrmsr
|
|
popfq
|
|
cmpw $KUG32SEL,%si
|
|
jne 1f
|
|
movl $MSR_KGSBASE,%ecx
|
|
movl PCB_GSBASE(%r8),%eax
|
|
movl PCB_GSBASE+4(%r8),%edx
|
|
.globl ld_gsbase
|
|
ld_gsbase:
|
|
wrmsr
|
|
1:
|
|
.globl ld_es
|
|
ld_es:
|
|
movw TF_ES(%rsp),%es
|
|
.globl ld_ds
|
|
ld_ds:
|
|
movw TF_DS(%rsp),%ds
|
|
ld_regs:
|
|
movq TF_RDI(%rsp),%rdi
|
|
movq TF_RSI(%rsp),%rsi
|
|
movq TF_RDX(%rsp),%rdx
|
|
movq TF_RCX(%rsp),%rcx
|
|
movq TF_R8(%rsp),%r8
|
|
movq TF_R9(%rsp),%r9
|
|
movq TF_RAX(%rsp),%rax
|
|
movq TF_RBX(%rsp),%rbx
|
|
movq TF_RBP(%rsp),%rbp
|
|
movq TF_R10(%rsp),%r10
|
|
movq TF_R11(%rsp),%r11
|
|
movq TF_R12(%rsp),%r12
|
|
movq TF_R13(%rsp),%r13
|
|
movq TF_R14(%rsp),%r14
|
|
movq TF_R15(%rsp),%r15
|
|
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
|
jz 1f /* keep running with kernel GS.base */
|
|
cli
|
|
swapgs
|
|
1:
|
|
addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
|
|
.globl doreti_iret
|
|
doreti_iret:
|
|
iretq
|
|
|
|
set_segs:
|
|
movw $KUDSEL,%ax
|
|
movw %ax,TF_DS(%rsp)
|
|
movw %ax,TF_ES(%rsp)
|
|
movw $KUF32SEL,TF_FS(%rsp)
|
|
movw $KUG32SEL,TF_GS(%rsp)
|
|
jmp do_segs
|
|
|
|
/*
|
|
* doreti_iret_fault. Alternative return code for
|
|
* the case where we get a fault in the doreti_exit code
|
|
* above. trap() (amd64/amd64/trap.c) catches this specific
|
|
* case, sends the process a signal and continues in the
|
|
* corresponding place in the code below.
|
|
*/
|
|
ALIGN_TEXT
|
|
.globl doreti_iret_fault
|
|
doreti_iret_fault:
|
|
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movw %fs,TF_FS(%rsp)
|
|
movw %gs,TF_GS(%rsp)
|
|
movw %es,TF_ES(%rsp)
|
|
movw %ds,TF_DS(%rsp)
|
|
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
|
movq %rdi,TF_RDI(%rsp)
|
|
movq %rsi,TF_RSI(%rsp)
|
|
movq %rdx,TF_RDX(%rsp)
|
|
movq %rcx,TF_RCX(%rsp)
|
|
movq %r8,TF_R8(%rsp)
|
|
movq %r9,TF_R9(%rsp)
|
|
movq %rax,TF_RAX(%rsp)
|
|
movq %rbx,TF_RBX(%rsp)
|
|
movq %rbp,TF_RBP(%rsp)
|
|
movq %r10,TF_R10(%rsp)
|
|
movq %r11,TF_R11(%rsp)
|
|
movq %r12,TF_R12(%rsp)
|
|
movq %r13,TF_R13(%rsp)
|
|
movq %r14,TF_R14(%rsp)
|
|
movq %r15,TF_R15(%rsp)
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
movq $0,TF_ERR(%rsp) /* XXX should be the error code */
|
|
movq $0,TF_ADDR(%rsp)
|
|
FAKE_MCOUNT(TF_RIP(%rsp))
|
|
jmp calltrap
|
|
|
|
ALIGN_TEXT
|
|
.globl ds_load_fault
|
|
ds_load_fault:
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movw $KUDSEL,TF_DS(%rsp)
|
|
jmp doreti
|
|
|
|
ALIGN_TEXT
|
|
.globl es_load_fault
|
|
es_load_fault:
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movw $KUDSEL,TF_ES(%rsp)
|
|
jmp doreti
|
|
|
|
ALIGN_TEXT
|
|
.globl fs_load_fault
|
|
fs_load_fault:
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movw $KUF32SEL,TF_FS(%rsp)
|
|
jmp doreti
|
|
|
|
ALIGN_TEXT
|
|
.globl gs_load_fault
|
|
gs_load_fault:
|
|
popfq
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movw $KUG32SEL,TF_GS(%rsp)
|
|
jmp doreti
|
|
|
|
ALIGN_TEXT
|
|
.globl fsbase_load_fault
|
|
fsbase_load_fault:
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movq PCPU(CURTHREAD),%r8
|
|
movq TD_PCB(%r8),%r8
|
|
movq $0,PCB_FSBASE(%r8)
|
|
jmp doreti
|
|
|
|
ALIGN_TEXT
|
|
.globl gsbase_load_fault
|
|
gsbase_load_fault:
|
|
movl $T_PROTFLT,TF_TRAPNO(%rsp)
|
|
testl $PSL_I,TF_RFLAGS(%rsp)
|
|
jz 1f
|
|
sti
|
|
1:
|
|
movq %rsp,%rdi
|
|
call trap
|
|
movq PCPU(CURTHREAD),%r8
|
|
movq TD_PCB(%r8),%r8
|
|
movq $0,PCB_GSBASE(%r8)
|
|
jmp doreti
|
|
|
|
#ifdef HWPMC_HOOKS
|
|
ENTRY(end_exceptions)
|
|
#endif
|