freebsd-skq/sys/i386/xen/exception.s

495 lines
13 KiB
ArmAsm
Raw Normal View History

/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_apic.h"
#include "opt_npx.h"
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include "assym.s"
#define SEL_RPL_MASK 0x0002
#define __HYPERVISOR_iret 23
/* Offsets into shared_info_t. */
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
#define sizeof_vcpu_shift 6
#ifdef SMP
2008-10-23 07:20:43 +00:00
#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \
shl $sizeof_vcpu_shift,reg ; \
addl HYPERVISOR_shared_info,reg
#else
#define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
#endif
#define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg)
#define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg)
#define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
__DISABLE_INTERRUPTS(reg)
#define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
__ENABLE_INTERRUPTS(reg)
#define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
#define POPA \
popl %edi; \
popl %esi; \
popl %ebp; \
popl %ebx; \
popl %ebx; \
popl %edx; \
popl %ecx; \
popl %eax;
.text
/*****************************************************************************/
/* Trap handling */
/*****************************************************************************/
/*
* Trap and fault vector routines.
*
* Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
* the stack that mostly looks like an interrupt, but does not disable
* interrupts. A few of the traps we are use are interrupt gates,
* SDT_SYS386IGT, which are nearly the same thing except interrupts are
* disabled on entry.
*
* The cpu will push a certain amount of state onto the kernel stack for
* the current process. The amount of state depends on the type of trap
* and whether the trap crossed rings or not. See i386/include/frame.h.
* At the very least the current EFLAGS (status register, which includes
* the interrupt disable state prior to the trap), the code segment register,
* and the return instruction pointer are pushed by the cpu. The cpu
* will also push an 'error' code for certain traps. We push a dummy
* error code for those traps where the cpu doesn't in order to maintain
* a consistent frame. We also push a contrived 'trap number'.
*
* The cpu does not push the general registers, we must do that, and we
* must restore them prior to calling 'iret'. The cpu adjusts the %cs and
* %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
* must load them with appropriate values for supervisor mode operation.
*/
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
#define TRAP(a) pushl $(a) ; jmp alltraps
IDTVEC(div)
pushl $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
pushl $0; TRAP(T_TRCTRAP)
IDTVEC(nmi)
pushl $0; TRAP(T_NMI)
IDTVEC(bpt)
pushl $0; TRAP(T_BPTFLT)
IDTVEC(ofl)
pushl $0; TRAP(T_OFLOW)
IDTVEC(bnd)
pushl $0; TRAP(T_BOUND)
IDTVEC(ill)
pushl $0; TRAP(T_PRIVINFLT)
IDTVEC(dna)
pushl $0; TRAP(T_DNA)
IDTVEC(fpusegm)
pushl $0; TRAP(T_FPOPFLT)
IDTVEC(tss)
TRAP(T_TSSFLT)
IDTVEC(missing)
TRAP(T_SEGNPFLT)
IDTVEC(stk)
TRAP(T_STKFLT)
IDTVEC(prot)
TRAP(T_PROTFLT)
IDTVEC(page)
TRAP(T_PAGEFLT)
IDTVEC(mchk)
pushl $0; TRAP(T_MCHK)
IDTVEC(rsvd)
pushl $0; TRAP(T_RESERVED)
IDTVEC(fpu)
pushl $0; TRAP(T_ARITHTRAP)
IDTVEC(align)
TRAP(T_ALIGNFLT)
IDTVEC(xmm)
pushl $0; TRAP(T_XMMFLT)
IDTVEC(hypervisor_callback)
pushl $0;
pushl $0;
pushal
pushl %ds
pushl %es
pushl %fs
upcall_with_regs_pushed:
SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
call_evtchn_upcall:
movl TF_EIP(%esp),%eax
cmpl $scrit,%eax
jb 10f
cmpl $ecrit,%eax
jb critical_region_fixup
10: pushl %esp
Implement vector callback for PVHVM and unify event channel implementations Re-structure Xen HVM support so that: - Xen is detected and hypercalls can be performed very early in system startup. - Xen interrupt services are implemented using FreeBSD's native interrupt delivery infrastructure. - the Xen interrupt service implementation is shared between PV and HVM guests. - Xen interrupt handlers can optionally use a filter handler in order to avoid the overhead of dispatch to an interrupt thread. - interrupt load can be distributed among all available CPUs. - the overhead of accessing the emulated local and I/O apics on HVM is removed for event channel port events. - a similar optimization can eventually, and fairly easily, be used to optimize MSI. Early Xen detection, HVM refactoring, PVHVM interrupt infrastructure, and misc Xen cleanups: Sponsored by: Spectra Logic Corporation Unification of PV & HVM interrupt infrastructure, bug fixes, and misc Xen cleanups: Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D sys/x86/x86/local_apic.c: sys/amd64/include/apicvar.h: sys/i386/include/apicvar.h: sys/amd64/amd64/apic_vector.S: sys/i386/i386/apic_vector.s: sys/amd64/amd64/machdep.c: sys/i386/i386/machdep.c: sys/i386/xen/exception.s: sys/x86/include/segments.h: Reserve IDT vector 0x93 for the Xen event channel upcall interrupt handler. On Hypervisors that support the direct vector callback feature, we can request that this vector be called directly by an injected HVM interrupt event, instead of a simulated PCI interrupt on the Xen platform PCI device. This avoids all of the overhead of dealing with the emulated I/O APIC and local APIC. It also means that the Hypervisor can inject these events on any CPU, allowing upcalls for different ports to be handled in parallel. sys/amd64/amd64/mp_machdep.c: sys/i386/i386/mp_machdep.c: Map Xen per-vcpu area during AP startup. sys/amd64/include/intr_machdep.h: sys/i386/include/intr_machdep.h: Increase the FreeBSD IRQ vector table to include space for event channel interrupt sources. sys/amd64/include/pcpu.h: sys/i386/include/pcpu.h: Remove Xen HVM per-cpu variable data. These fields are now allocated via the dynamic per-cpu scheme. See xen_intr.c for details. sys/amd64/include/xen/hypercall.h: sys/dev/xen/blkback/blkback.c: sys/i386/include/xen/xenvar.h: sys/i386/xen/clock.c: sys/i386/xen/xen_machdep.c: sys/xen/gnttab.c: Prefer FreeBSD primatives to Linux ones in Xen support code. sys/amd64/include/xen/xen-os.h: sys/i386/include/xen/xen-os.h: sys/xen/xen-os.h: sys/dev/xen/balloon/balloon.c: sys/dev/xen/blkback/blkback.c: sys/dev/xen/blkfront/blkfront.c: sys/dev/xen/console/xencons_ring.c: sys/dev/xen/control/control.c: sys/dev/xen/netback/netback.c: sys/dev/xen/netfront/netfront.c: sys/dev/xen/xenpci/xenpci.c: sys/i386/i386/machdep.c: sys/i386/include/pmap.h: sys/i386/include/xen/xenfunc.h: sys/i386/isa/npx.c: sys/i386/xen/clock.c: sys/i386/xen/mp_machdep.c: sys/i386/xen/mptable.c: sys/i386/xen/xen_clock_util.c: sys/i386/xen/xen_machdep.c: sys/i386/xen/xen_rtc.c: sys/xen/evtchn/evtchn_dev.c: sys/xen/features.c: sys/xen/gnttab.c: sys/xen/gnttab.h: sys/xen/hvm.h: sys/xen/xenbus/xenbus.c: sys/xen/xenbus/xenbus_if.m: sys/xen/xenbus/xenbusb_front.c: sys/xen/xenbus/xenbusvar.h: sys/xen/xenstore/xenstore.c: sys/xen/xenstore/xenstore_dev.c: sys/xen/xenstore/xenstorevar.h: Pull common Xen OS support functions/settings into xen/xen-os.h. sys/amd64/include/xen/xen-os.h: sys/i386/include/xen/xen-os.h: sys/xen/xen-os.h: Remove constants, macros, and functions unused in FreeBSD's Xen support. sys/xen/xen-os.h: sys/i386/xen/xen_machdep.c: sys/x86/xen/hvm.c: Introduce new functions xen_domain(), xen_pv_domain(), and xen_hvm_domain(). These are used in favor of #ifdefs so that FreeBSD can dynamically detect and adapt to the presence of a hypervisor. The goal is to have an HVM optimized GENERIC, but more is necessary before this is possible. sys/amd64/amd64/machdep.c: sys/dev/xen/xenpci/xenpcivar.h: sys/dev/xen/xenpci/xenpci.c: sys/x86/xen/hvm.c: sys/sys/kernel.h: Refactor magic ioport, Hypercall table and Hypervisor shared information page setup, and move it to a dedicated HVM support module. HVM mode initialization is now triggered during the SI_SUB_HYPERVISOR phase of system startup. This currently occurs just after the kernel VM is fully setup which is just enough infrastructure to allow the hypercall table and shared info page to be properly mapped. sys/xen/hvm.h: sys/x86/xen/hvm.c: Add definitions and a method for configuring Hypervisor event delievery via a direct vector callback. sys/amd64/include/xen/xen-os.h: sys/x86/xen/hvm.c: sys/conf/files: sys/conf/files.amd64: sys/conf/files.i386: Adjust kernel build to reflect the refactoring of early Xen startup code and Xen interrupt services. sys/dev/xen/blkback/blkback.c: sys/dev/xen/blkfront/blkfront.c: sys/dev/xen/blkfront/block.h: sys/dev/xen/control/control.c: sys/dev/xen/evtchn/evtchn_dev.c: sys/dev/xen/netback/netback.c: sys/dev/xen/netfront/netfront.c: sys/xen/xenstore/xenstore.c: sys/xen/evtchn/evtchn_dev.c: sys/dev/xen/console/console.c: sys/dev/xen/console/xencons_ring.c Adjust drivers to use new xen_intr_*() API. sys/dev/xen/blkback/blkback.c: Since blkback defers all event handling to a taskqueue, convert this task queue to a "fast" taskqueue, and schedule it via an interrupt filter. This avoids an unnecessary ithread context switch. sys/xen/xenstore/xenstore.c: The xenstore driver is MPSAFE. Indicate as much when registering its interrupt handler. sys/xen/xenbus/xenbus.c: sys/xen/xenbus/xenbusvar.h: Remove unused event channel APIs. sys/xen/evtchn.h: Remove all kernel Xen interrupt service API definitions from this file. It is now only used for structure and ioctl definitions related to the event channel userland device driver. Update the definitions in this file to match those from NetBSD. Implementing this interface will be necessary for Dom0 support. sys/xen/evtchn/evtchnvar.h: Add a header file for implemenation internal APIs related to managing event channels event delivery. This is used to allow, for example, the event channel userland device driver to access low-level routines that typical kernel consumers of event channel services should never access. sys/xen/interface/event_channel.h: sys/xen/xen_intr.h: Standardize on the evtchn_port_t type for referring to an event channel port id. In order to prevent low-level event channel APIs from leaking to kernel consumers who should not have access to this data, the type is defined twice: Once in the Xen provided event_channel.h, and again in xen/xen_intr.h. The double declaration is protected by __XEN_EVTCHN_PORT_DEFINED__ to ensure it is never declared twice within a given compilation unit. sys/xen/xen_intr.h: sys/xen/evtchn/evtchn.c: sys/x86/xen/xen_intr.c: sys/dev/xen/xenpci/evtchn.c: sys/dev/xen/xenpci/xenpcivar.h: New implementation of Xen interrupt services. This is similar in many respects to the i386 PV implementation with the exception that events for bound to event channel ports (i.e. not IPI, virtual IRQ, or physical IRQ) are further optimized to avoid mask/unmask operations that aren't necessary for these edge triggered events. Stubs exist for supporting physical IRQ binding, but will need additional work before this implementation can be fully shared between PV and HVM. sys/amd64/amd64/mp_machdep.c: sys/i386/i386/mp_machdep.c: sys/i386/xen/mp_machdep.c sys/x86/xen/hvm.c: Add support for placing vcpu_info into an arbritary memory page instead of using HYPERVISOR_shared_info->vcpu_info. This allows the creation of domains with more than 32 vcpus. sys/i386/i386/machdep.c: sys/i386/xen/clock.c: sys/i386/xen/xen_machdep.c: sys/i386/xen/exception.s: Add support for new event channle implementation.
2013-08-29 19:52:18 +00:00
call xen_intr_handle_upcall
addl $4,%esp
/*
* Return via doreti to handle ASTs.
*/
MEXITCOUNT
jmp doreti
hypervisor_callback_pending:
DISABLE_INTERRUPTS(%esi) /* cli */
jmp 10b
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
*/
SUPERALIGN_TEXT
.globl alltraps
.type alltraps,@function
alltraps:
pushal
pushl %ds
pushl %es
pushl %fs
alltraps_with_regs_pushed:
SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
calltrap:
push %esp
call trap
add $4, %esp
/*
* Return via doreti to handle ASTs.
*/
MEXITCOUNT
jmp doreti
/*
* SYSCALL CALL GATE (old entry point for a.out binaries)
*
* The intersegment call has been set up to specify one dummy parameter.
*
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
* final spot. It has to be done this way because esp can't be just
* temporarily altered for the pushfl - an interrupt might come in
* and clobber the saved cs/eip.
*/
SUPERALIGN_TEXT
IDTVEC(lcall_syscall)
pushfl /* save eflags */
popl 8(%esp) /* shuffle into tf_eflags */
pushl $7 /* sizeof "lcall 7,0" */
subl $4,%esp /* skip over tf_trapno */
pushal
pushl %ds
pushl %es
pushl %fs
SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call syscall
add $4, %esp
MEXITCOUNT
jmp doreti
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
*
* Even though the name says 'int0x80', this is actually a TGT (trap gate)
* rather then an IGT (interrupt gate). Thus interrupts are enabled on
* entry just as they are for a normal syscall.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
pushl $0xBEEF /* for debug */
pushal
pushl %ds
pushl %es
pushl %fs
SET_KERNEL_SREGS
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call syscall
add $4, %esp
MEXITCOUNT
jmp doreti
ENTRY(fork_trampoline)
pushl %esp /* trapframe pointer */
pushl %ebx /* arg1 */
pushl %esi /* function */
call fork_exit
addl $12,%esp
/* cut from syscall */
/*
* Return via doreti to handle ASTs.
*/
MEXITCOUNT
jmp doreti
/*
* To efficiently implement classification of trap and interrupt handlers
* for profiling, there must be only trap handlers between the labels btrap
* and bintr, and only interrupt handlers between the labels bintr and
* eintr. This is implemented (partly) by including files that contain
* some of the handlers. Before including the files, set up a normal asm
* environment so that the included files doen't need to know that they are
* included.
*/
.data
.p2align 4
.text
SUPERALIGN_TEXT
MCOUNT_LABEL(bintr)
#ifdef DEV_APIC
.data
.p2align 4
.text
SUPERALIGN_TEXT
#include <i386/i386/apic_vector.s>
#endif
.data
.p2align 4
.text
SUPERALIGN_TEXT
#include <i386/i386/vm86bios.s>
.text
MCOUNT_LABEL(eintr)
/*
* void doreti(struct trapframe)
*
* Handle return from interrupts, traps and syscalls.
*/
.text
SUPERALIGN_TEXT
.type doreti,@function
doreti:
FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
doreti_next:
#ifdef notyet
/*
* Check if ASTs can be handled now. PSL_VM must be checked first
* since segment registers only have an RPL in non-VM86 mode.
*/
testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
jz doreti_notvm86
movl PCPU(CURPCB),%ecx
testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */
jz doreti_ast /* can handle ASTS now if not */
jmp doreti_exit
doreti_notvm86:
#endif
testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
jz doreti_exit /* can't handle ASTs now if not */
doreti_ast:
/*
* Check for ASTs atomically with returning. Disabling CPU
* interrupts provides sufficient locking even in the SMP case,
* since we will be informed of any new ASTs by an IPI.
*/
DISABLE_INTERRUPTS(%esi) /* cli */
movl PCPU(CURTHREAD),%eax
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
je doreti_exit
ENABLE_INTERRUPTS(%esi) /* sti */
pushl %esp /* pass a pointer to the trapframe */
call ast
add $4,%esp
jmp doreti_ast
/*
* doreti_exit: pop registers, iret.
*
* The segment register pop is a special case, since it may
* fault if (for example) a sigreturn specifies bad segment
* registers. The fault is handled in trap.c.
*/
doreti_exit:
ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
.globl scrit
scrit:
__TEST_PENDING(%esi)
jnz hypervisor_callback_pending /* More to go */
MEXITCOUNT
.globl doreti_popl_fs
doreti_popl_fs:
popl %fs
.globl doreti_popl_es
doreti_popl_es:
popl %es
.globl doreti_popl_ds
doreti_popl_ds:
popl %ds
/*
* This is important: as nothing is atomic over here (we can get
* interrupted any time), we use the critical_region_fixup() in
* order to figure out where out stack is. Therefore, do NOT use
* 'popal' here without fixing up the table!
*/
POPA
addl $8,%esp
.globl doreti_iret
doreti_iret:
jmp hypercall_page + (__HYPERVISOR_iret * 32)
.globl ecrit
ecrit:
/*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
* above. trap() (i386/i386/trap.c) catches this specific
* case, sends the process a signal and continues in the
* corresponding place in the code below.
*/
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
subl $8,%esp
pushal
pushl %ds
.globl doreti_popl_ds_fault
doreti_popl_ds_fault:
pushl %es
.globl doreti_popl_es_fault
doreti_popl_es_fault:
pushl %fs
.globl doreti_popl_fs_fault
doreti_popl_fs_fault:
movl $0,TF_ERR(%esp) /* XXX should be the error code */
movl $T_PROTFLT,TF_TRAPNO(%esp)
jmp alltraps_with_regs_pushed
/*
# [How we do the fixup]. We want to merge the current stack frame with the
# just-interrupted frame. How we do this depends on where in the critical
# region the interrupted handler was executing, and so how many saved
# registers are in each frame. We do this quickly using the lookup table
# 'critical_fixup_table'. For each byte offset in the critical region, it
# provides the number of bytes which have already been popped from the
# interrupted stack frame.
*/
.globl critical_region_fixup
critical_region_fixup:
addl $critical_fixup_table-scrit,%eax
movzbl (%eax),%eax # %eax contains num bytes popped
movl %esp,%esi
add %eax,%esi # %esi points at end of src region
movl %esp,%edi
add $0x40,%edi # %edi points at end of dst region
movl %eax,%ecx
shr $2,%ecx # convert bytes to words
je 16f # skip loop if nothing to copy
15: subl $4,%esi # pre-decrementing copy loop
subl $4,%edi
movl (%esi),%eax
movl %eax,(%edi)
loop 15b
16: movl %edi,%esp # final %edi is top of merged stack
jmp hypervisor_callback_pending
critical_fixup_table:
.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
.byte 0x0,0x0 #pop %fs
.byte 0x04 #pop %es
.byte 0x08 #pop %ds
.byte 0x0c #pop %edi
.byte 0x10 #pop %esi
.byte 0x14 #pop %ebp
.byte 0x18 #pop %ebx
.byte 0x1c #pop %ebx
.byte 0x20 #pop %edx
.byte 0x24 #pop %ecx
.byte 0x28 #pop %eax
.byte 0x2c,0x2c,0x2c #add $0x8,%esp
#if 0
.byte 0x34 #iret
#endif
.byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret
/* # Hypervisor uses this for application faults while it executes.*/
ENTRY(failsafe_callback)
pushal
call xen_failsafe_handler
/*# call install_safe_pf_handler */
movl 28(%esp),%ebx
1: movl %ebx,%ds
movl 32(%esp),%ebx
2: movl %ebx,%es
movl 36(%esp),%ebx
3: movl %ebx,%fs
movl 40(%esp),%ebx
4: movl %ebx,%gs
/*# call install_normal_pf_handler */
popal
addl $12,%esp
iret