freebsd-skq/sys/x86/include/segments.h
Justin T. Gibbs 76acc41fb7 Implement vector callback for PVHVM and unify event channel implementations
Re-structure Xen HVM support so that:
	- Xen is detected and hypercalls can be performed very
	  early in system startup.
	- Xen interrupt services are implemented using FreeBSD's native
	  interrupt delivery infrastructure.
	- the Xen interrupt service implementation is shared between PV
	  and HVM guests.
	- Xen interrupt handlers can optionally use a filter handler
	  in order to avoid the overhead of dispatch to an interrupt
	  thread.
	- interrupt load can be distributed among all available CPUs.
	- the overhead of accessing the emulated local and I/O apics
	  on HVM is removed for event channel port events.
	- a similar optimization can eventually, and fairly easily,
	  be used to optimize MSI.

Early Xen detection, HVM refactoring, PVHVM interrupt infrastructure,
and misc Xen cleanups:

Sponsored by: Spectra Logic Corporation

Unification of PV & HVM interrupt infrastructure, bug fixes,
and misc Xen cleanups:

Submitted by: Roger Pau Monné
Sponsored by: Citrix Systems R&D

sys/x86/x86/local_apic.c:
sys/amd64/include/apicvar.h:
sys/i386/include/apicvar.h:
sys/amd64/amd64/apic_vector.S:
sys/i386/i386/apic_vector.s:
sys/amd64/amd64/machdep.c:
sys/i386/i386/machdep.c:
sys/i386/xen/exception.s:
sys/x86/include/segments.h:
	Reserve IDT vector 0x93 for the Xen event channel upcall
	interrupt handler.  On Hypervisors that support the direct
	vector callback feature, we can request that this vector be
	called directly by an injected HVM interrupt event, instead
	of a simulated PCI interrupt on the Xen platform PCI device.
	This avoids all of the overhead of dealing with the emulated
	I/O APIC and local APIC.  It also means that the Hypervisor
	can inject these events on any CPU, allowing upcalls for
	different ports to be handled in parallel.

sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
	Map Xen per-vcpu area during AP startup.

sys/amd64/include/intr_machdep.h:
sys/i386/include/intr_machdep.h:
	Increase the FreeBSD IRQ vector table to include space
	for event channel interrupt sources.

sys/amd64/include/pcpu.h:
sys/i386/include/pcpu.h:
	Remove Xen HVM per-cpu variable data.  These fields are now
	allocated via the dynamic per-cpu scheme.  See xen_intr.c
	for details.

sys/amd64/include/xen/hypercall.h:
sys/dev/xen/blkback/blkback.c:
sys/i386/include/xen/xenvar.h:
sys/i386/xen/clock.c:
sys/i386/xen/xen_machdep.c:
sys/xen/gnttab.c:
	Prefer FreeBSD primatives to Linux ones in Xen support code.

sys/amd64/include/xen/xen-os.h:
sys/i386/include/xen/xen-os.h:
sys/xen/xen-os.h:
sys/dev/xen/balloon/balloon.c:
sys/dev/xen/blkback/blkback.c:
sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/console/xencons_ring.c:
sys/dev/xen/control/control.c:
sys/dev/xen/netback/netback.c:
sys/dev/xen/netfront/netfront.c:
sys/dev/xen/xenpci/xenpci.c:
sys/i386/i386/machdep.c:
sys/i386/include/pmap.h:
sys/i386/include/xen/xenfunc.h:
sys/i386/isa/npx.c:
sys/i386/xen/clock.c:
sys/i386/xen/mp_machdep.c:
sys/i386/xen/mptable.c:
sys/i386/xen/xen_clock_util.c:
sys/i386/xen/xen_machdep.c:
sys/i386/xen/xen_rtc.c:
sys/xen/evtchn/evtchn_dev.c:
sys/xen/features.c:
sys/xen/gnttab.c:
sys/xen/gnttab.h:
sys/xen/hvm.h:
sys/xen/xenbus/xenbus.c:
sys/xen/xenbus/xenbus_if.m:
sys/xen/xenbus/xenbusb_front.c:
sys/xen/xenbus/xenbusvar.h:
sys/xen/xenstore/xenstore.c:
sys/xen/xenstore/xenstore_dev.c:
sys/xen/xenstore/xenstorevar.h:
	Pull common Xen OS support functions/settings into xen/xen-os.h.

sys/amd64/include/xen/xen-os.h:
sys/i386/include/xen/xen-os.h:
sys/xen/xen-os.h:
	Remove constants, macros, and functions unused in FreeBSD's Xen
	support.

sys/xen/xen-os.h:
sys/i386/xen/xen_machdep.c:
sys/x86/xen/hvm.c:
	Introduce new functions xen_domain(), xen_pv_domain(), and
	xen_hvm_domain().  These are used in favor of #ifdefs so that
	FreeBSD can dynamically detect and adapt to the presence of
	a hypervisor.  The goal is to have an HVM optimized GENERIC,
	but more is necessary before this is possible.

sys/amd64/amd64/machdep.c:
sys/dev/xen/xenpci/xenpcivar.h:
sys/dev/xen/xenpci/xenpci.c:
sys/x86/xen/hvm.c:
sys/sys/kernel.h:
	Refactor magic ioport, Hypercall table and Hypervisor shared
	information page setup, and move it to a dedicated HVM support
	module.

	HVM mode initialization is now triggered during the
	SI_SUB_HYPERVISOR phase of system startup.  This currently
	occurs just after the kernel VM is fully setup which is
	just enough infrastructure to allow the hypercall table
	and shared info page to be properly mapped.

sys/xen/hvm.h:
sys/x86/xen/hvm.c:
	Add definitions and a method for configuring Hypervisor event
	delievery via a direct vector callback.

sys/amd64/include/xen/xen-os.h:
sys/x86/xen/hvm.c:

sys/conf/files:
sys/conf/files.amd64:
sys/conf/files.i386:
	Adjust kernel build to reflect the refactoring of early
	Xen startup code and Xen interrupt services.

sys/dev/xen/blkback/blkback.c:
sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/blkfront/block.h:
sys/dev/xen/control/control.c:
sys/dev/xen/evtchn/evtchn_dev.c:
sys/dev/xen/netback/netback.c:
sys/dev/xen/netfront/netfront.c:
sys/xen/xenstore/xenstore.c:
sys/xen/evtchn/evtchn_dev.c:
sys/dev/xen/console/console.c:
sys/dev/xen/console/xencons_ring.c
	Adjust drivers to use new xen_intr_*() API.

sys/dev/xen/blkback/blkback.c:
	Since blkback defers all event handling to a taskqueue,
	convert this task queue to a "fast" taskqueue, and schedule
	it via an interrupt filter.  This avoids an unnecessary
	ithread context switch.

sys/xen/xenstore/xenstore.c:
	The xenstore driver is MPSAFE.  Indicate as much when
	registering its interrupt handler.

sys/xen/xenbus/xenbus.c:
sys/xen/xenbus/xenbusvar.h:
	Remove unused event channel APIs.

sys/xen/evtchn.h:
	Remove all kernel Xen interrupt service API definitions
	from this file.  It is now only used for structure and
	ioctl definitions related to the event channel userland
	device driver.

	Update the definitions in this file to match those from
	NetBSD.  Implementing this interface will be necessary for
	Dom0 support.

sys/xen/evtchn/evtchnvar.h:
	Add a header file for implemenation internal APIs related
	to managing event channels event delivery.  This is used
	to allow, for example, the event channel userland device
	driver to access low-level routines that typical kernel
	consumers of event channel services should never access.

sys/xen/interface/event_channel.h:
sys/xen/xen_intr.h:
	Standardize on the evtchn_port_t type for referring to
	an event channel port id.  In order to prevent low-level
	event channel APIs from leaking to kernel consumers who
	should not have access to this data, the type is defined
	twice: Once in the Xen provided event_channel.h, and again
	in xen/xen_intr.h.  The double declaration is protected by
	__XEN_EVTCHN_PORT_DEFINED__ to ensure it is never declared
	twice within a given compilation unit.

sys/xen/xen_intr.h:
sys/xen/evtchn/evtchn.c:
sys/x86/xen/xen_intr.c:
sys/dev/xen/xenpci/evtchn.c:
sys/dev/xen/xenpci/xenpcivar.h:
	New implementation of Xen interrupt services.  This is
	similar in many respects to the i386 PV implementation with
	the exception that events for bound to event channel ports
	(i.e. not IPI, virtual IRQ, or physical IRQ) are further
	optimized to avoid mask/unmask operations that aren't
	necessary for these edge triggered events.

	Stubs exist for supporting physical IRQ binding, but will
	need additional work before this implementation can be
	fully shared between PV and HVM.

sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
sys/i386/xen/mp_machdep.c
sys/x86/xen/hvm.c:
	Add support for placing vcpu_info into an arbritary memory
	page instead of using HYPERVISOR_shared_info->vcpu_info.
	This allows the creation of domains with more than 32 vcpus.

sys/i386/i386/machdep.c:
sys/i386/xen/clock.c:
sys/i386/xen/xen_machdep.c:
sys/i386/xen/exception.s:
	Add support for new event channle implementation.
2013-08-29 19:52:18 +00:00

288 lines
11 KiB
C

/*-
* Copyright (c) 1989, 1990 William F. Jolitz
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* William Jolitz.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)segments.h 7.1 (Berkeley) 5/9/91
* $FreeBSD$
*/
#ifndef _X86_SEGMENTS_H_
#define _X86_SEGMENTS_H_
/*
* X86 Segmentation Data Structures and definitions
*/
/*
* Selectors
*/
#define SEL_RPL_MASK 3 /* requester priv level */
#define ISPL(s) ((s)&3) /* priority level of a selector */
#ifdef XEN
#define SEL_KPL 1 /* kernel priority level */
#else
#define SEL_KPL 0 /* kernel priority level */
#endif
#define SEL_UPL 3 /* user priority level */
#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
#define SEL_LDT 4 /* local descriptor table */
#define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */
#define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local selector */
#define GSEL(s,r) (((s)<<3) | r) /* a global selector */
/*
* User segment descriptors (%cs, %ds etc for i386 apps. 64 bit wide)
* For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl,
* sd_p, sd_l and sd_def32 which must be zero). %ds only has sd_p.
*/
struct segment_descriptor {
unsigned sd_lolimit:16; /* segment extent (lsb) */
unsigned sd_lobase:24; /* segment base address (lsb) */
unsigned sd_type:5; /* segment type */
unsigned sd_dpl:2; /* segment descriptor priority level */
unsigned sd_p:1; /* segment descriptor present */
unsigned sd_hilimit:4; /* segment extent (msb) */
unsigned sd_xx:2; /* unused */
unsigned sd_def32:1; /* default 32 vs 16 bit size */
unsigned sd_gran:1; /* limit granularity (byte/page units)*/
unsigned sd_hibase:8; /* segment base address (msb) */
} __packed;
struct user_segment_descriptor {
unsigned sd_lolimit:16; /* segment extent (lsb) */
unsigned sd_lobase:24; /* segment base address (lsb) */
unsigned sd_type:5; /* segment type */
unsigned sd_dpl:2; /* segment descriptor priority level */
unsigned sd_p:1; /* segment descriptor present */
unsigned sd_hilimit:4; /* segment extent (msb) */
unsigned sd_xx:1; /* unused */
unsigned sd_long:1; /* long mode (cs only) */
unsigned sd_def32:1; /* default 32 vs 16 bit size */
unsigned sd_gran:1; /* limit granularity (byte/page units)*/
unsigned sd_hibase:8; /* segment base address (msb) */
} __packed;
#define USD_GETBASE(sd) (((sd)->sd_lobase) | (sd)->sd_hibase << 24)
#define USD_SETBASE(sd, b) (sd)->sd_lobase = (b); \
(sd)->sd_hibase = ((b) >> 24);
#define USD_GETLIMIT(sd) (((sd)->sd_lolimit) | (sd)->sd_hilimit << 16)
#define USD_SETLIMIT(sd, l) (sd)->sd_lolimit = (l); \
(sd)->sd_hilimit = ((l) >> 16);
#ifdef __i386__
/*
* Gate descriptors (e.g. indirect descriptors)
*/
struct gate_descriptor {
unsigned gd_looffset:16; /* gate offset (lsb) */
unsigned gd_selector:16; /* gate segment selector */
unsigned gd_stkcpy:5; /* number of stack wds to cpy */
unsigned gd_xx:3; /* unused */
unsigned gd_type:5; /* segment type */
unsigned gd_dpl:2; /* segment descriptor priority level */
unsigned gd_p:1; /* segment descriptor present */
unsigned gd_hioffset:16; /* gate offset (msb) */
} __packed;
/*
* Generic descriptor
*/
union descriptor {
struct segment_descriptor sd;
struct gate_descriptor gd;
};
#else
/*
* Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit)
* Only interrupt and trap gates have gd_ist.
*/
struct gate_descriptor {
uint64_t gd_looffset:16; /* gate offset (lsb) */
uint64_t gd_selector:16; /* gate segment selector */
uint64_t gd_ist:3; /* IST table index */
uint64_t gd_xx:5; /* unused */
uint64_t gd_type:5; /* segment type */
uint64_t gd_dpl:2; /* segment descriptor priority level */
uint64_t gd_p:1; /* segment descriptor present */
uint64_t gd_hioffset:48; /* gate offset (msb) */
uint64_t sd_xx1:32;
} __packed;
/*
* Generic descriptor
*/
union descriptor {
struct user_segment_descriptor sd;
struct gate_descriptor gd;
};
#endif
/* system segments and gate types */
#define SDT_SYSNULL 0 /* system null */
#define SDT_SYS286TSS 1 /* system 286 TSS available */
#define SDT_SYSLDT 2 /* system local descriptor table */
#define SDT_SYS286BSY 3 /* system 286 TSS busy */
#define SDT_SYS286CGT 4 /* system 286 call gate */
#define SDT_SYSTASKGT 5 /* system task gate */
#define SDT_SYS286IGT 6 /* system 286 interrupt gate */
#define SDT_SYS286TGT 7 /* system 286 trap gate */
#define SDT_SYSNULL2 8 /* system null again */
#define SDT_SYS386TSS 9 /* system 386 TSS available */
#define SDT_SYSTSS 9 /* system available 64 bit TSS */
#define SDT_SYSNULL3 10 /* system null again */
#define SDT_SYS386BSY 11 /* system 386 TSS busy */
#define SDT_SYSBSY 11 /* system busy 64 bit TSS */
#define SDT_SYS386CGT 12 /* system 386 call gate */
#define SDT_SYSCGT 12 /* system 64 bit call gate */
#define SDT_SYSNULL4 13 /* system null again */
#define SDT_SYS386IGT 14 /* system 386 interrupt gate */
#define SDT_SYSIGT 14 /* system 64 bit interrupt gate */
#define SDT_SYS386TGT 15 /* system 386 trap gate */
#define SDT_SYSTGT 15 /* system 64 bit trap gate */
/* memory segment types */
#define SDT_MEMRO 16 /* memory read only */
#define SDT_MEMROA 17 /* memory read only accessed */
#define SDT_MEMRW 18 /* memory read write */
#define SDT_MEMRWA 19 /* memory read write accessed */
#define SDT_MEMROD 20 /* memory read only expand dwn limit */
#define SDT_MEMRODA 21 /* memory read only expand dwn limit accessed */
#define SDT_MEMRWD 22 /* memory read write expand dwn limit */
#define SDT_MEMRWDA 23 /* memory read write expand dwn limit accessed*/
#define SDT_MEME 24 /* memory execute only */
#define SDT_MEMEA 25 /* memory execute only accessed */
#define SDT_MEMER 26 /* memory execute read */
#define SDT_MEMERA 27 /* memory execute read accessed */
#define SDT_MEMEC 28 /* memory execute only conforming */
#define SDT_MEMEAC 29 /* memory execute only accessed conforming */
#define SDT_MEMERC 30 /* memory execute read conforming */
#define SDT_MEMERAC 31 /* memory execute read accessed conforming */
/*
* Size of IDT table
*/
#define NIDT 256 /* 32 reserved, 0x80 syscall, most are h/w */
#define NRSVIDT 32 /* reserved entries for cpu exceptions */
/*
* Entries in the Interrupt Descriptor Table (IDT)
*/
#define IDT_DE 0 /* #DE: Divide Error */
#define IDT_DB 1 /* #DB: Debug */
#define IDT_NMI 2 /* Nonmaskable External Interrupt */
#define IDT_BP 3 /* #BP: Breakpoint */
#define IDT_OF 4 /* #OF: Overflow */
#define IDT_BR 5 /* #BR: Bound Range Exceeded */
#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */
#define IDT_NM 7 /* #NM: No Math Coprocessor */
#define IDT_DF 8 /* #DF: Double Fault */
#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */
#define IDT_TS 10 /* #TS: Invalid TSS */
#define IDT_NP 11 /* #NP: Segment Not Present */
#define IDT_SS 12 /* #SS: Stack Segment Fault */
#define IDT_GP 13 /* #GP: General Protection Fault */
#define IDT_PF 14 /* #PF: Page Fault */
#define IDT_MF 16 /* #MF: FPU Floating-Point Error */
#define IDT_AC 17 /* #AC: Alignment Check */
#define IDT_MC 18 /* #MC: Machine Check */
#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */
#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */
#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */
#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */
#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */
#if defined(__i386__) || defined(__ia64__)
/*
* Entries in the Global Descriptor Table (GDT)
* Note that each 4 entries share a single 32 byte L1 cache line.
* Some of the fast syscall instructions require a specific order here.
*/
#define GNULL_SEL 0 /* Null Descriptor */
#define GPRIV_SEL 1 /* SMP Per-Processor Private Data */
#define GUFS_SEL 2 /* User %fs Descriptor (order critical: 1) */
#define GUGS_SEL 3 /* User %gs Descriptor (order critical: 2) */
#define GCODE_SEL 4 /* Kernel Code Descriptor (order critical: 1) */
#define GDATA_SEL 5 /* Kernel Data Descriptor (order critical: 2) */
#define GUCODE_SEL 6 /* User Code Descriptor (order critical: 3) */
#define GUDATA_SEL 7 /* User Data Descriptor (order critical: 4) */
#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */
#define GPROC0_SEL 9 /* Task state process slot zero and up */
#define GLDT_SEL 10 /* Default User LDT */
#define GUSERLDT_SEL 11 /* User LDT */
#define GPANIC_SEL 12 /* Task state to consider panic from */
#define GBIOSCODE32_SEL 13 /* BIOS interface (32bit Code) */
#define GBIOSCODE16_SEL 14 /* BIOS interface (16bit Code) */
#define GBIOSDATA_SEL 15 /* BIOS interface (Data) */
#define GBIOSUTIL_SEL 16 /* BIOS interface (Utility) */
#define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */
#define GNDIS_SEL 18 /* For the NDIS layer */
#ifdef XEN
#define NGDT 9
#else
#define NGDT 19
#endif
/*
* Entries in the Local Descriptor Table (LDT)
*/
#define LSYS5CALLS_SEL 0 /* forced by intel BCS */
#define LSYS5SIGR_SEL 1
#define L43BSDCALLS_SEL 2 /* notyet */
#define LUCODE_SEL 3
#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */
#define LUDATA_SEL 5
/* separate stack, es,fs,gs sels ? */
/* #define LPOSIXCALLS_SEL 5*/ /* notyet */
#define LBSDICALLS_SEL 16 /* BSDI system call gate */
#define NLDT (LBSDICALLS_SEL + 1)
#else /* !__i386__ && !__ia64__ */
/*
* Entries in the Global Descriptor Table (GDT)
*/
#define GNULL_SEL 0 /* Null Descriptor */
#define GNULL2_SEL 1 /* Null Descriptor */
#define GUFS32_SEL 2 /* User 32 bit %fs Descriptor */
#define GUGS32_SEL 3 /* User 32 bit %gs Descriptor */
#define GCODE_SEL 4 /* Kernel Code Descriptor */
#define GDATA_SEL 5 /* Kernel Data Descriptor */
#define GUCODE32_SEL 6 /* User 32 bit code Descriptor */
#define GUDATA_SEL 7 /* User 32/64 bit Data Descriptor */
#define GUCODE_SEL 8 /* User 64 bit Code Descriptor */
#define GPROC0_SEL 9 /* TSS for entering kernel etc */
/* slot 10 is second half of GPROC0_SEL */
#define GUSERLDT_SEL 11 /* LDT */
/* slot 12 is second half of GUSERLDT_SEL */
#define NGDT 13
#endif /* __i386__ || __ia64__ */
#endif /* !_X86_SEGMENTS_H_ */