First cut at porting the kernel portions of 221828 and 221905 from the

BHyVe reference branch to HEAD.
This commit is contained in:
John Baldwin 2011-05-14 20:35:01 +00:00
commit 34a6b2d627
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/bhyve/; revision=221914
60 changed files with 11201 additions and 104 deletions

View File

@ -57,8 +57,15 @@ IDTVEC(vec_name) ; \
PUSH_FRAME ; \
FAKE_MCOUNT(TF_RIP(%rsp)) ; \
movq lapic, %rdx ; /* pointer to local APIC */ \
testq %rdx, %rdx; \
jnz 3f; \
movl $MSR_APIC_ISR ## index, %ecx; \
rdmsr; \
jmp 4f; \
3: ; \
movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \
bsrl %eax, %eax ; /* index of highest set bit in ISR */ \
4: ; \
bsrl %eax, %eax ; /* index of highset set bit in ISR */ \
jz 1f ; \
addl $(32 * index),%eax ; \
movq %rsp, %rsi ; \
@ -129,6 +136,26 @@ IDTVEC(errorint)
jmp doreti
#ifdef SMP
/*
* We assume that %rax is being saved/restored outside of this macro
*/
#define DO_EOI \
movq lapic, %rax; \
testq %rax, %rax; \
jz 8f; \
movl $0, LA_EOI(%rax); \
jmp 9f; \
8:; \
pushq %rcx; \
pushq %rdx; \
xorl %edx, %edx; /* eax is already zero */ \
movl $MSR_APIC_EOI, %ecx; \
wrmsr; \
popq %rdx; \
popq %rcx; \
9:
/*
* Global address space TLB shootdown.
*/
@ -153,8 +180,7 @@ IDTVEC(invltlb)
movq %cr3, %rax /* invalidate the TLB */
movq %rax, %cr3
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
lock
incl smp_tlb_wait
@ -186,8 +212,7 @@ IDTVEC(invlpg)
movq smp_tlb_addr1, %rax
invlpg (%rax) /* invalidate single page */
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
lock
incl smp_tlb_wait
@ -224,8 +249,7 @@ IDTVEC(invlrng)
cmpq %rax, %rdx
jb 1b
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
lock
incl smp_tlb_wait
@ -252,8 +276,7 @@ IDTVEC(invlcache)
wbinvd
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
lock
incl smp_tlb_wait
@ -269,9 +292,8 @@ IDTVEC(invlcache)
IDTVEC(ipi_intr_bitmap_handler)
PUSH_FRAME
movq lapic, %rdx
movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */
DO_EOI
FAKE_MCOUNT(TF_RIP(%rsp))
call ipi_bitmap_handler
@ -286,8 +308,7 @@ IDTVEC(ipi_intr_bitmap_handler)
IDTVEC(cpustop)
PUSH_FRAME
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
call cpustop_handler
jmp doreti
@ -300,8 +321,7 @@ IDTVEC(cpustop)
IDTVEC(cpususpend)
PUSH_FRAME
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
call cpususpend_handler
@ -323,7 +343,6 @@ IDTVEC(rendezvous)
incq (%rax)
#endif
call smp_rendezvous_action
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
DO_EOI
jmp doreti
#endif /* SMP */

View File

@ -78,6 +78,8 @@ static STAILQ_HEAD(, pic) pics;
#ifdef SMP
static int assign_cpu;
static int round_robin_interrupts = 1;
TUNABLE_INT("round_robin_interrupts", &round_robin_interrupts);
#endif
static int intr_assign_cpu(void *arg, u_char cpu);
@ -460,6 +462,10 @@ intr_next_cpu(void)
if (!assign_cpu)
return (PCPU_GET(apic_id));
/* All interrupts go to the BSP if not allowed to round robin */
if (!round_robin_interrupts)
return (cpu_apic_ids[0]);
mtx_lock_spin(&icu_lock);
apic_id = cpu_apic_ids[current_cpu];
do {

View File

@ -27,6 +27,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_pmap.h"
#include "opt_watchdog.h"
#include <sys/param.h>

View File

@ -146,6 +146,26 @@ struct cpu_info {
int cpu_apic_ids[MAXCPU];
int apic_cpuids[MAX_APIC_ID + 1];
/*
* Trampoline for hypervisor direct 64-bit jump.
*
* 0 - signature for guest->host verification
* 8 - virtual address of this page
* 16 - instruction virtual address
* 24 - stack pointer virtual address
* 32 - CR3, physical address of kernel page table
* 40 - 24-byte area for null/code/data GDT entries
*/
#define MP_V64T_SIG 0xcafebabecafebabeULL
struct mp_v64tramp {
uint64_t mt_sig;
uint64_t mt_virt;
uint64_t mt_eip;
uint64_t mt_rsp;
uint64_t mt_cr3;
uint64_t mt_gdtr[3];
};
/* Holds pending bitmap based IPIs per CPU */
static volatile u_int cpu_ipi_pending[MAXCPU];
@ -948,6 +968,29 @@ start_all_aps(void)
bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
bootAP = cpu;
/*
* If running in a VM that doesn't support the unrestricted
* guest 16-bit mode, forget most of the above and create
* the data block that allows the hypervisor to direct-jump
* into 64-bit mode. Copy this over the top of the 16-bit
* bootstrap. The startup-IPI informs the hypervisor which
* physical page this data block lies in. The hypervisor
* will then use the block to initialise register state of
* the AP in an almost identical fashion to how it builds
* the BSP initial register state.
*/
if (testenv("hw.use_bvm_mptramp")) {
struct mp_v64tramp mv;
bzero(&mv, sizeof(mv));
mv.mt_sig = MP_V64T_SIG;
mv.mt_virt = (uint64_t) va;
mv.mt_eip = (uint64_t) init_secondary;
mv.mt_rsp = (uint64_t) bootSTK;
mv.mt_cr3 = KPML4phys;
bcopy(&mv, (void *) va, sizeof(mv));
}
/* attempt to start the Application Processor */
if (!start_ap(apic_id)) {
/* restore the warmstart vector */

View File

@ -514,8 +514,10 @@ cpu_reset_proxy()
{
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
while (cpu_reset_proxy_active == 1) {
ia32_pause();
; /* Wait for other cpu to see that we've started */
}
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@ -547,14 +549,17 @@ cpu_reset()
atomic_store_rel_int(&started_cpus, 1 << 0);
cnt = 0;
while (cpu_reset_proxy_active == 0 && cnt < 10000000)
while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
ia32_pause();
cnt++; /* Wait for BSP to announce restart */
}
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
enable_intr();
cpu_reset_proxy_active = 2;
while (1);
while (1)
ia32_pause();
/* NOTREACHED */
}

View File

@ -67,6 +67,7 @@ options INCLUDE_CONFIG_FILE # Include this file in kernel
# Debugging for use in -current
options KDB # Enable kernel debugger support.
options KDB_TRACE
options DDB # Support DDB.
options GDB # Support remote GDB.
options DEADLKRES # Enable the deadlock resolver
@ -337,3 +338,8 @@ device fwe # Ethernet over FireWire (non-standard!)
device fwip # IP over FireWire (RFC 2734,3146)
device dcons # Dumb console driver
device dcons_crom # Configuration ROM for dcons
# bhyve options
device bvmconsole # brain dead simple bvm console
device bvmdebug # brain dead simple bvm gdb pipe
device mptable

View File

@ -315,11 +315,42 @@
#define MSR_MC4_ADDR 0x412
#define MSR_MC4_MISC 0x413
/* X2APIC MSRs */
#define MSR_APIC_ID 0x802
#define MSR_APIC_VERSION 0x803
#define MSR_APIC_TPR 0x808
#define MSR_APIC_EOI 0x80b
#define MSR_APIC_LDR 0x80d
#define MSR_APIC_SVR 0x80f
#define MSR_APIC_ISR0 0x810
#define MSR_APIC_ISR1 0x811
#define MSR_APIC_ISR2 0x812
#define MSR_APIC_ISR3 0x813
#define MSR_APIC_ISR4 0x814
#define MSR_APIC_ISR5 0x815
#define MSR_APIC_ISR6 0x816
#define MSR_APIC_ISR7 0x817
#define MSR_APIC_TMR0 0x818
#define MSR_APIC_IRR0 0x820
#define MSR_APIC_ESR 0x828
#define MSR_APIC_LVT_CMCI 0x82F
#define MSR_APIC_ICR 0x830
#define MSR_APIC_LVT_TIMER 0x832
#define MSR_APIC_LVT_THERMAL 0x833
#define MSR_APIC_LVT_PCINT 0x834
#define MSR_APIC_LVT_LINT0 0x835
#define MSR_APIC_LVT_LINT1 0x836
#define MSR_APIC_LVT_ERROR 0x837
#define MSR_APIC_ICR_TIMER 0x838
#define MSR_APIC_CCR_TIMER 0x839
#define MSR_APIC_DCR_TIMER 0x83e
/*
* Constants related to MSR's.
*/
#define APICBASE_RESERVED 0x000006ff
#define APICBASE_RESERVED 0x000002ff
#define APICBASE_BSP 0x00000100
#define APICBASE_X2APIC 0x00000400
#define APICBASE_ENABLED 0x00000800
#define APICBASE_ADDRESS 0xfffff000

268
sys/amd64/include/vmm.h Normal file
View File

@ -0,0 +1,268 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: vmm.h 482 2011-05-09 21:22:43Z grehan $
*/
#ifndef _VMM_H_
#define _VMM_H_
#ifdef _KERNEL
#define VM_MAX_NAMELEN 32
struct vm;
struct vm_memory_segment;
struct seg_desc;
struct vm_exit;
struct vm_run;
struct vlapic;
typedef int (*vmm_init_func_t)(void);
typedef int (*vmm_cleanup_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
struct vm_exit *vmexit);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_mmap_func_t)(void *vmi, vm_paddr_t gpa, vm_paddr_t hpa,
size_t length, vm_memattr_t attr,
int prot, boolean_t superpages_ok);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
uint64_t *retval);
typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
uint64_t val);
typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
struct seg_desc *desc);
typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
struct seg_desc *desc);
typedef int (*vmi_inject_event_t)(void *vmi, int vcpu,
int type, int vector,
uint32_t code, int code_valid);
typedef int (*vmi_inject_nmi_t)(void *vmi, int vcpu);
typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
vmm_cleanup_func_t cleanup;
vmi_init_func_t vminit; /* vm-specific initialization */
vmi_run_func_t vmrun;
vmi_cleanup_func_t vmcleanup;
vmi_mmap_func_t vmmmap;
vmi_get_register_t vmgetreg;
vmi_set_register_t vmsetreg;
vmi_get_desc_t vmgetdesc;
vmi_set_desc_t vmsetdesc;
vmi_inject_event_t vminject;
vmi_inject_nmi_t vmnmi;
vmi_get_cap_t vmgetcap;
vmi_set_cap_t vmsetcap;
};
extern struct vmm_ops vmm_ops_intel;
extern struct vmm_ops vmm_ops_amd;
struct vm *vm_create(const char *name);
void vm_destroy(struct vm *vm);
const char *vm_name(struct vm *vm);
int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
struct vm_memory_segment *seg);
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc);
int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
int vm_run(struct vm *vm, struct vm_run *vmrun);
int vm_inject_event(struct vm *vm, int vcpu, int type,
int vector, uint32_t error_code, int error_code_valid);
int vm_inject_nmi(struct vm *vm, int vcpu);
uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
struct vlapic *vm_lapic(struct vm *vm, int cpu);
int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
void vm_activate_cpu(struct vm *vm, int vcpu);
cpumask_t vm_active_cpus(struct vm *vm);
/*
* Return 1 if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
*
* Return 0 otherwise.
*/
int vmm_is_pptdev(int bus, int slot, int func);
void *vm_iommu_domain(struct vm *vm);
#define VCPU_STOPPED 0
#define VCPU_RUNNING 1
void vm_set_run_state(struct vm *vm, int vcpu, int running);
int vm_get_run_state(struct vm *vm, int vcpu, int *hostcpu);
void *vcpu_stats(struct vm *vm, int vcpu);
static int __inline
vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
{
return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
}
static cpumask_t __inline
vcpu_mask(int vcpuid)
{
return ((cpumask_t)1 << vcpuid);
}
#endif /* KERNEL */
#define VM_MAXCPU 8 /* maximum virtual cpus */
/*
* Identifiers for events that can be injected into the VM
*/
enum vm_event_type {
VM_EVENT_NONE,
VM_HW_INTR,
VM_NMI,
VM_HW_EXCEPTION,
VM_SW_INTR,
VM_PRIV_SW_EXCEPTION,
VM_SW_EXCEPTION,
VM_EVENT_MAX
};
/*
* Identifiers for architecturally defined registers.
*/
enum vm_reg_name {
VM_REG_GUEST_RAX,
VM_REG_GUEST_RBX,
VM_REG_GUEST_RCX,
VM_REG_GUEST_RDX,
VM_REG_GUEST_RSI,
VM_REG_GUEST_RDI,
VM_REG_GUEST_RBP,
VM_REG_GUEST_R8,
VM_REG_GUEST_R9,
VM_REG_GUEST_R10,
VM_REG_GUEST_R11,
VM_REG_GUEST_R12,
VM_REG_GUEST_R13,
VM_REG_GUEST_R14,
VM_REG_GUEST_R15,
VM_REG_GUEST_CR0,
VM_REG_GUEST_CR3,
VM_REG_GUEST_CR4,
VM_REG_GUEST_DR7,
VM_REG_GUEST_RSP,
VM_REG_GUEST_RIP,
VM_REG_GUEST_RFLAGS,
VM_REG_GUEST_ES,
VM_REG_GUEST_CS,
VM_REG_GUEST_SS,
VM_REG_GUEST_DS,
VM_REG_GUEST_FS,
VM_REG_GUEST_GS,
VM_REG_GUEST_LDTR,
VM_REG_GUEST_TR,
VM_REG_GUEST_IDTR,
VM_REG_GUEST_GDTR,
VM_REG_GUEST_EFER,
VM_REG_LAST
};
/*
* Identifiers for optional vmm capabilities
*/
enum vm_cap_type {
VM_CAP_HALT_EXIT,
VM_CAP_MTRAP_EXIT,
VM_CAP_PAUSE_EXIT,
VM_CAP_UNRESTRICTED_GUEST,
VM_CAP_MAX
};
/*
* The 'access' field has the format specified in Table 21-2 of the Intel
* Architecture Manual vol 3b.
*
* XXX The contents of the 'access' field are architecturally defined except
* bit 16 - Segment Unusable.
*/
struct seg_desc {
uint64_t base;
uint32_t limit;
uint32_t access;
};
enum vm_exitcode {
VM_EXITCODE_INOUT,
VM_EXITCODE_VMX,
VM_EXITCODE_BOGUS,
VM_EXITCODE_RDMSR,
VM_EXITCODE_WRMSR,
VM_EXITCODE_HLT,
VM_EXITCODE_MTRAP,
VM_EXITCODE_PAUSE,
VM_EXITCODE_MAX,
};
struct vm_exit {
enum vm_exitcode exitcode;
int inst_length; /* 0 means unknown */
uint64_t rip;
union {
struct {
uint16_t bytes:3; /* 1 or 2 or 4 */
uint16_t in:1; /* out is 0, in is 1 */
uint16_t string:1;
uint16_t rep:1;
uint16_t port;
uint32_t eax; /* valid for out */
} inout;
/*
* VMX specific payload. Used when there is no "better"
* exitcode to represent the VM-exit.
*/
struct {
int error; /* vmx inst error */
uint32_t exit_reason;
uint64_t exit_qualification;
} vmx;
struct {
uint32_t code; /* ecx value */
uint64_t wval;
} msr;
} u;
};
#endif /* _VMM_H_ */

191
sys/amd64/include/vmm_dev.h Normal file
View File

@ -0,0 +1,191 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: vmm_dev.h 482 2011-05-09 21:22:43Z grehan $
*/
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
#ifdef _KERNEL
void vmmdev_init(void);
void vmmdev_cleanup(void);
#endif
struct vm_memory_segment {
vm_paddr_t hpa; /* out */
vm_paddr_t gpa; /* in */
size_t len; /* in */
};
struct vm_register {
int cpuid;
int regnum; /* enum vm_reg_name */
uint64_t regval;
};
struct vm_seg_desc { /* data or code segment */
int cpuid;
int regnum; /* enum vm_reg_name */
struct seg_desc desc;
};
struct vm_pin {
int vm_cpuid;
int host_cpuid; /* -1 to unpin */
};
struct vm_run {
int cpuid;
uint64_t rip; /* start running here */
struct vm_exit vm_exit;
};
struct vm_event {
int cpuid;
enum vm_event_type type;
int vector;
uint32_t error_code;
int error_code_valid;
};
struct vm_lapic_irq {
int cpuid;
int vector;
};
struct vm_capability {
int cpuid;
enum vm_cap_type captype;
int capval;
int allcpus;
};
struct vm_pptdev {
int bus;
int slot;
int func;
};
struct vm_pptdev_mmio {
int bus;
int slot;
int func;
vm_paddr_t gpa;
vm_paddr_t hpa;
size_t len;
};
struct vm_pptdev_msi {
int vcpu;
int bus;
int slot;
int func;
int numvec; /* 0 means disabled */
int vector;
int destcpu;
};
struct vm_nmi {
int cpuid;
};
#define MAX_VM_STATS 64
struct vm_stats {
int cpuid; /* in */
int num_entries; /* out */
struct timeval tv;
uint64_t statbuf[MAX_VM_STATS];
};
struct vm_stat_desc {
int index; /* in */
char desc[128]; /* out */
};
enum {
IOCNUM_RUN,
IOCNUM_SET_PINNING,
IOCNUM_GET_PINNING,
IOCNUM_MAP_MEMORY,
IOCNUM_GET_MEMORY_SEG,
IOCNUM_SET_REGISTER,
IOCNUM_GET_REGISTER,
IOCNUM_SET_SEGMENT_DESCRIPTOR,
IOCNUM_GET_SEGMENT_DESCRIPTOR,
IOCNUM_INJECT_EVENT,
IOCNUM_LAPIC_IRQ,
IOCNUM_SET_CAPABILITY,
IOCNUM_GET_CAPABILITY,
IOCNUM_BIND_PPTDEV,
IOCNUM_UNBIND_PPTDEV,
IOCNUM_MAP_PPTDEV_MMIO,
IOCNUM_PPTDEV_MSI,
IOCNUM_INJECT_NMI,
IOCNUM_VM_STATS,
IOCNUM_VM_STAT_DESC,
};
#define VM_RUN \
_IOWR('v', IOCNUM_RUN, struct vm_run)
#define VM_SET_PINNING \
_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
#define VM_GET_PINNING \
_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
#define VM_MAP_MEMORY \
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
#define VM_GET_MEMORY_SEG \
_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
#define VM_SET_REGISTER \
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
#define VM_GET_REGISTER \
_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
#define VM_SET_SEGMENT_DESCRIPTOR \
_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
#define VM_GET_SEGMENT_DESCRIPTOR \
_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
#define VM_INJECT_EVENT \
_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
#define VM_LAPIC_IRQ \
_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
#define VM_SET_CAPABILITY \
_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
#define VM_GET_CAPABILITY \
_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
#define VM_BIND_PPTDEV \
_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
#define VM_UNBIND_PPTDEV \
_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
#define VM_MAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
#define VM_PPTDEV_MSI \
_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \
_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
#define VM_STAT_DESC \
_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
#endif

247
sys/amd64/vmm/amd/amdv.c Normal file
View File

@ -0,0 +1,247 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <machine/vmm.h>
#include "io/iommu.h"
static int
amdv_init(void)
{
printf("amdv_init: not implemented\n");
return (ENXIO);
}
static int
amdv_cleanup(void)
{
printf("amdv_cleanup: not implemented\n");
return (ENXIO);
}
static void *
amdv_vminit(struct vm *vm)
{
printf("amdv_vminit: not implemented\n");
return (NULL);
}
static int
amdv_vmrun(void *arg, int vcpu, register_t rip, struct vm_exit *vmexit)
{
printf("amdv_vmrun: not implemented\n");
return (ENXIO);
}
static void
amdv_vmcleanup(void *arg)
{
printf("amdv_vmcleanup: not implemented\n");
return;
}
static int
amdv_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
vm_memattr_t attr, int prot, boolean_t spok)
{
printf("amdv_vmmmap: not implemented\n");
return (EINVAL);
}
static int
amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
{
printf("amdv_getreg: not implemented\n");
return (EINVAL);
}
static int
amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
{
printf("amdv_setreg: not implemented\n");
return (EINVAL);
}
static int
amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
{
printf("amdv_get_desc: not implemented\n");
return (EINVAL);
}
static int
amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
{
printf("amdv_get_desc: not implemented\n");
return (EINVAL);
}
static int
amdv_inject_event(void *vmi, int vcpu, int type, int vector,
uint32_t error_code, int error_code_valid)
{
printf("amdv_inject_event: not implemented\n");
return (EINVAL);
}
static int
amdv_nmi(void *arg, int vcpu)
{
printf("amdv_nmi: not implemented\n");
return (EINVAL);
}
static int
amdv_getcap(void *arg, int vcpu, int type, int *retval)
{
printf("amdv_getcap: not implemented\n");
return (EINVAL);
}
static int
amdv_setcap(void *arg, int vcpu, int type, int val)
{
printf("amdv_setcap: not implemented\n");
return (EINVAL);
}
struct vmm_ops vmm_ops_amd = {
amdv_init,
amdv_cleanup,
amdv_vminit,
amdv_vmrun,
amdv_vmcleanup,
amdv_vmmmap,
amdv_getreg,
amdv_setreg,
amdv_getdesc,
amdv_setdesc,
amdv_inject_event,
amdv_nmi,
amdv_getcap,
amdv_setcap
};
static int
amd_iommu_init(void)
{
printf("amd_iommu_init: not implemented\n");
return (ENXIO);
}
static void
amd_iommu_cleanup(void)
{
printf("amd_iommu_cleanup: not implemented\n");
}
static void
amd_iommu_enable(void)
{
printf("amd_iommu_enable: not implemented\n");
}
static void
amd_iommu_disable(void)
{
printf("amd_iommu_disable: not implemented\n");
}
static void *
amd_iommu_create_domain(vm_paddr_t maxaddr)
{
printf("amd_iommu_create_domain: not implemented\n");
return (NULL);
}
static void
amd_iommu_destroy_domain(void *domain)
{
printf("amd_iommu_destroy_domain: not implemented\n");
}
static uint64_t
amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
uint64_t len)
{
printf("amd_iommu_create_mapping: not implemented\n");
return (0);
}
static void
amd_iommu_add_device(void *domain, int bus, int slot, int func)
{
printf("amd_iommu_add_device: not implemented\n");
}
static void
amd_iommu_remove_device(void *domain, int bus, int slot, int func)
{
printf("amd_iommu_remove_device: not implemented\n");
}
struct iommu_ops iommu_ops_amd = {
amd_iommu_init,
amd_iommu_cleanup,
amd_iommu_enable,
amd_iommu_disable,
amd_iommu_create_domain,
amd_iommu_destroy_domain,
amd_iommu_create_mapping,
amd_iommu_add_device,
amd_iommu_remove_device,
};

312
sys/amd64/vmm/intel/ept.c Normal file
View File

@ -0,0 +1,312 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/param.h>
#include <machine/cpufunc.h>
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
#include "vmx_cpufunc.h"
#include "vmx_msr.h"
#include "vmx.h"
#include "ept.h"
#define EPT_PWL4(cap) ((cap) & (1UL << 6))
#define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
#define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
#define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
#define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
#define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
#define INVVPID_ALL_TYPES_SUPPORTED(cap) \
(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
#define INVEPT_ALL_TYPES_MASK 0x6000000UL
#define INVEPT_ALL_TYPES_SUPPORTED(cap) \
(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
#define EPT_PG_RD (1 << 0)
#define EPT_PG_WR (1 << 1)
#define EPT_PG_EX (1 << 2)
#define EPT_PG_MEMORY_TYPE(x) ((x) << 3)
#define EPT_PG_IGNORE_PAT (1 << 6)
#define EPT_PG_SUPERPAGE (1 << 7)
#define EPT_ADDR_MASK ((uint64_t)-1 << 12)
MALLOC_DECLARE(M_VMX);
static uint64_t page_sizes_mask;
int
ept_init(void)
{
int page_shift;
uint64_t cap;
cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
/*
* Verify that:
* - page walk length is 4 steps
* - extended page tables can be laid out in write-back memory
* - invvpid instruction with all possible types is supported
* - invept instruction with all possible types is supported
*/
if (!EPT_PWL4(cap) ||
!EPT_MEMORY_TYPE_WB(cap) ||
!INVVPID_SUPPORTED(cap) ||
!INVVPID_ALL_TYPES_SUPPORTED(cap) ||
!INVEPT_SUPPORTED(cap) ||
!INVEPT_ALL_TYPES_SUPPORTED(cap))
return (EINVAL);
/* Set bits in 'page_sizes_mask' for each valid page size */
page_shift = PAGE_SHIFT;
page_sizes_mask = 1UL << page_shift; /* 4KB page */
page_shift += 9;
if (EPT_PDE_SUPERPAGE(cap))
page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */
page_shift += 9;
if (EPT_PDPTE_SUPERPAGE(cap))
page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */
return (0);
}
static size_t
ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
{
int spshift, ptpshift, ptpindex, nlevels;
/*
* Compute the size of the mapping that we can accomodate.
*
* This is based on three factors:
* - super page sizes supported by the processor
* - alignment of the region starting at 'gpa' and 'hpa'
* - length of the region 'len'
*/
spshift = PAGE_SHIFT;
if (spok)
spshift += (EPT_PWLEVELS - 1) * 9;
while (spshift >= PAGE_SHIFT) {
uint64_t spsize = 1UL << spshift;
if ((page_sizes_mask & spsize) != 0 &&
(gpa & (spsize - 1)) == 0 &&
(hpa & (spsize - 1)) == 0 &&
length >= spsize) {
break;
}
spshift -= 9;
}
if (spshift < PAGE_SHIFT) {
panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
"length 0x%016lx, page_sizes_mask 0x%016lx",
gpa, hpa, length, page_sizes_mask);
}
nlevels = EPT_PWLEVELS;
while (--nlevels >= 0) {
ptpshift = PAGE_SHIFT + nlevels * 9;
ptpindex = (gpa >> ptpshift) & 0x1FF;
/* We have reached the leaf mapping */
if (spshift >= ptpshift)
break;
/*
* We are working on a non-leaf page table page.
*
* Create the next level page table page if necessary and point
* to it from the current page table.
*/
if (ptp[ptpindex] == 0) {
void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
ptp[ptpindex] = vtophys(nlp);
ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
}
/* Work our way down to the next level page table page */
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
}
if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
"mismatch\n", gpa, ptpshift);
}
/* Do the mapping */
ptp[ptpindex] = hpa;
/* Apply the access controls */
if (prot & VM_PROT_READ)
ptp[ptpindex] |= EPT_PG_RD;
if (prot & VM_PROT_WRITE)
ptp[ptpindex] |= EPT_PG_WR;
if (prot & VM_PROT_EXECUTE)
ptp[ptpindex] |= EPT_PG_EX;
/*
* XXX should we enforce this memory type by setting the ignore PAT
* bit to 1.
*/
ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
if (nlevels > 0)
ptp[ptpindex] |= EPT_PG_SUPERPAGE;
return (1UL << ptpshift);
}
static void
ept_free_pt_entry(pt_entry_t pte)
{
if (pte == 0)
return;
/* sanity check */
if ((pte & EPT_PG_SUPERPAGE) != 0)
panic("ept_free_pt_entry: pte cannot have superpage bit");
return;
}
static void
ept_free_pd_entry(pd_entry_t pde)
{
pt_entry_t *pt;
int i;
if (pde == 0)
return;
if ((pde & EPT_PG_SUPERPAGE) == 0) {
pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
for (i = 0; i < NPTEPG; i++)
ept_free_pt_entry(pt[i]);
free(pt, M_VMX); /* free the page table page */
}
}
static void
ept_free_pdp_entry(pdp_entry_t pdpe)
{
pd_entry_t *pd;
int i;
if (pdpe == 0)
return;
if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
for (i = 0; i < NPDEPG; i++)
ept_free_pd_entry(pd[i]);
free(pd, M_VMX); /* free the page directory page */
}
}
static void
ept_free_pml4_entry(pml4_entry_t pml4e)
{
pdp_entry_t *pdp;
int i;
if (pml4e == 0)
return;
if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
for (i = 0; i < NPDPEPG; i++)
ept_free_pdp_entry(pdp[i]);
free(pdp, M_VMX); /* free the page directory ptr page */
}
}
void
ept_vmcleanup(struct vmx *vmx)
{
int i;
for (i = 0; i < NPML4EPG; i++)
ept_free_pml4_entry(vmx->pml4ept[i]);
}
int
ept_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
vm_memattr_t attr, int prot, boolean_t spok)
{
size_t n;
struct vmx *vmx = arg;
while (len > 0) {
n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
prot, spok);
len -= n;
gpa += n;
hpa += n;
}
return (0);
}
static void
invept_single_context(void *arg)
{
struct invept_desc desc = *(struct invept_desc *)arg;
invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
}
void
ept_invalidate_mappings(u_long pml4ept)
{
struct invept_desc invept_desc = { 0 };
invept_desc.eptp = EPTP(pml4ept);
smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
}

42
sys/amd64/vmm/intel/ept.h Normal file
View File

@ -0,0 +1,42 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _EPT_H_
#define _EPT_H_
struct vmx;
#define EPT_PWLEVELS 4 /* page walk levels */
#define EPTP(pml4) ((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
int ept_init(void);
int ept_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
void ept_invalidate_mappings(u_long ept_pml4);
void ept_vmcleanup(struct vmx *vmx);
#endif

451
sys/amd64/vmm/intel/vmcs.c Normal file
View File

@ -0,0 +1,451 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pcpu.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/segments.h>
#include <machine/pmap.h>
#include <machine/vmm.h>
#include "vmcs.h"
#include "vmx_cpufunc.h"
#include "ept.h"
#include "vmx.h"
static uint64_t
vmcs_fix_regval(uint32_t encoding, uint64_t val)
{
switch (encoding) {
case VMCS_GUEST_CR0:
val = vmx_fix_cr0(val);
break;
case VMCS_GUEST_CR4:
val = vmx_fix_cr4(val);
break;
default:
break;
}
return (val);
}
static uint32_t
vmcs_field_encoding(int ident)
{
switch (ident) {
case VM_REG_GUEST_CR0:
return (VMCS_GUEST_CR0);
case VM_REG_GUEST_CR3:
return (VMCS_GUEST_CR3);
case VM_REG_GUEST_CR4:
return (VMCS_GUEST_CR4);
case VM_REG_GUEST_DR7:
return (VMCS_GUEST_DR7);
case VM_REG_GUEST_RSP:
return (VMCS_GUEST_RSP);
case VM_REG_GUEST_RIP:
return (VMCS_GUEST_RIP);
case VM_REG_GUEST_RFLAGS:
return (VMCS_GUEST_RFLAGS);
case VM_REG_GUEST_ES:
return (VMCS_GUEST_ES_SELECTOR);
case VM_REG_GUEST_CS:
return (VMCS_GUEST_CS_SELECTOR);
case VM_REG_GUEST_SS:
return (VMCS_GUEST_SS_SELECTOR);
case VM_REG_GUEST_DS:
return (VMCS_GUEST_DS_SELECTOR);
case VM_REG_GUEST_FS:
return (VMCS_GUEST_FS_SELECTOR);
case VM_REG_GUEST_GS:
return (VMCS_GUEST_GS_SELECTOR);
case VM_REG_GUEST_TR:
return (VMCS_GUEST_TR_SELECTOR);
case VM_REG_GUEST_LDTR:
return (VMCS_GUEST_LDTR_SELECTOR);
case VM_REG_GUEST_EFER:
return (VMCS_GUEST_IA32_EFER);
default:
return (-1);
}
}
static int
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
{
switch (seg) {
case VM_REG_GUEST_ES:
*base = VMCS_GUEST_ES_BASE;
*lim = VMCS_GUEST_ES_LIMIT;
*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_CS:
*base = VMCS_GUEST_CS_BASE;
*lim = VMCS_GUEST_CS_LIMIT;
*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_SS:
*base = VMCS_GUEST_SS_BASE;
*lim = VMCS_GUEST_SS_LIMIT;
*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_DS:
*base = VMCS_GUEST_DS_BASE;
*lim = VMCS_GUEST_DS_LIMIT;
*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_FS:
*base = VMCS_GUEST_FS_BASE;
*lim = VMCS_GUEST_FS_LIMIT;
*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_GS:
*base = VMCS_GUEST_GS_BASE;
*lim = VMCS_GUEST_GS_LIMIT;
*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_TR:
*base = VMCS_GUEST_TR_BASE;
*lim = VMCS_GUEST_TR_LIMIT;
*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_LDTR:
*base = VMCS_GUEST_LDTR_BASE;
*lim = VMCS_GUEST_LDTR_LIMIT;
*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_IDTR:
*base = VMCS_GUEST_IDTR_BASE;
*lim = VMCS_GUEST_IDTR_LIMIT;
*acc = VMCS_INVALID_ENCODING;
break;
case VM_REG_GUEST_GDTR:
*base = VMCS_GUEST_GDTR_BASE;
*lim = VMCS_GUEST_GDTR_LIMIT;
*acc = VMCS_INVALID_ENCODING;
break;
default:
return (EINVAL);
}
return (0);
}
int
vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
{
int error;
uint32_t encoding;
/*
* If we need to get at vmx-specific state in the VMCS we can bypass
* the translation of 'ident' to 'encoding' by simply setting the
* sign bit. As it so happens the upper 16 bits are reserved (i.e
* set to 0) in the encodings for the VMCS so we are free to use the
* sign bit.
*/
if (ident < 0)
encoding = ident & 0x7fffffff;
else
encoding = vmcs_field_encoding(ident);
if (encoding == (uint32_t)-1)
return (EINVAL);
VMPTRLD(vmcs);
error = vmread(encoding, retval);
VMCLEAR(vmcs);
return (error);
}
int
vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
{
int error;
uint32_t encoding;
if (ident < 0)
encoding = ident & 0x7fffffff;
else
encoding = vmcs_field_encoding(ident);
if (encoding == (uint32_t)-1)
return (EINVAL);
val = vmcs_fix_regval(encoding, val);
VMPTRLD(vmcs);
error = vmwrite(encoding, val);
VMCLEAR(vmcs);
return (error);
}
int
vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
if (error != 0)
panic("vmcs_setdesc: invalid segment register %d", seg);
VMPTRLD(vmcs);
if ((error = vmwrite(base, desc->base)) != 0)
goto done;
if ((error = vmwrite(limit, desc->limit)) != 0)
goto done;
if (access != VMCS_INVALID_ENCODING) {
if ((error = vmwrite(access, desc->access)) != 0)
goto done;
}
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
uint64_t u64;
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
if (error != 0)
panic("vmcs_getdesc: invalid segment register %d", seg);
VMPTRLD(vmcs);
if ((error = vmread(base, &u64)) != 0)
goto done;
desc->base = u64;
if ((error = vmread(limit, &u64)) != 0)
goto done;
desc->limit = u64;
if (access != VMCS_INVALID_ENCODING) {
if ((error = vmread(access, &u64)) != 0)
goto done;
desc->access = u64;
}
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
{
int error;
VMPTRLD(vmcs);
/*
* Guest MSRs are saved in the VM-exit MSR-store area.
* Guest MSRs are loaded from the VM-entry MSR-load area.
* Both areas point to the same location in memory.
*/
if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
goto done;
if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
goto done;
error = 0;
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_set_defaults(struct vmcs *vmcs,
u_long host_rip, u_long host_rsp, u_long ept_pml4,
uint32_t pinbased_ctls, uint32_t procbased_ctls,
uint32_t procbased_ctls2, uint32_t exit_ctls,
uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
{
int error, codesel, datasel, tsssel;
u_long cr0, cr4, efer;
uint64_t eptp, pat;
uint32_t exc_bitmap;
codesel = GSEL(GCODE_SEL, SEL_KPL);
datasel = GSEL(GDATA_SEL, SEL_KPL);
tsssel = GSEL(GPROC0_SEL, SEL_KPL);
/*
* Make sure we have a "current" VMCS to work with.
*/
VMPTRLD(vmcs);
/*
* Load the VMX controls
*/
if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
goto done;
if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
goto done;
/* Guest state */
/* Initialize guest IA32_PAT MSR with the default value */
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
goto done;
/* Host state */
/* Initialize host IA32_PAT MSR */
pat = rdmsr(MSR_PAT);
if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
goto done;
/* Load the IA32_EFER MSR */
efer = rdmsr(MSR_EFER);
if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
goto done;
/* Load the control registers */
cr0 = rcr0();
if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
goto done;
cr4 = rcr4();
if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
goto done;
/* Load the segment selectors */
if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
goto done;
/*
* Load the Base-Address for %fs and idtr.
*
* Note that we exclude %gs, tss and gdtr here because their base
* address is pcpu specific.
*/
if ((error = vmwrite(VMCS_HOST_FS_BASE, 0)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_IDTR_BASE, r_idt.rd_base)) != 0)
goto done;
/* instruction pointer */
if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
goto done;
/* stack pointer */
if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
goto done;
/* eptp */
eptp = EPTP(ept_pml4);
if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
goto done;
/* vpid */
if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
goto done;
/* msr bitmap */
if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
goto done;
/* exception bitmap */
exc_bitmap = 1 << IDT_MC;
if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
goto done;
/* link pointer */
if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
goto done;
done:
VMCLEAR(vmcs);
return (error);
}
uint64_t
vmcs_read(uint32_t encoding)
{
int error;
uint64_t val;
error = vmread(encoding, &val);
if (error != 0)
panic("vmcs_read(%u) error %d", encoding, error);
return (val);
}

324
sys/amd64/vmm/intel/vmcs.h Normal file
View File

@ -0,0 +1,324 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMCS_H_
#define _VMCS_H_
#ifdef _KERNEL
struct vmcs {
uint32_t identifier;
uint32_t abort_code;
char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
};
CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
/* MSR save region is composed of an array of 'struct msr_entry' */
struct msr_entry {
uint32_t index;
uint32_t reserved;
uint64_t val;
};
int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
int vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
u_long ept_pml4,
uint32_t pinbased_ctls, uint32_t procbased_ctls,
uint32_t procbased_ctls2, uint32_t exit_ctls,
uint32_t entry_ctls, u_long msr_bitmap,
uint16_t vpid);
int vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval);
int vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val);
int vmcs_getdesc(struct vmcs *vmcs, int ident,
struct seg_desc *desc);
int vmcs_setdesc(struct vmcs *vmcs, int ident,
struct seg_desc *desc);
uint64_t vmcs_read(uint32_t encoding);
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
#define vmcs_instruction_error() vmcs_read(VMCS_INSTRUCTION_ERROR)
#define vmcs_exit_reason() (vmcs_read(VMCS_EXIT_REASON) & 0xffff)
#define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION)
#endif /* _KERNEL */
#define VMCS_IDENT(encoding) ((encoding) | 0x80000000)
/*
* VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
*/
#define VMCS_INVALID_ENCODING 0xffffffff
/* 16-bit control fields */
#define VMCS_VPID 0x00000000
/* 16-bit guest-state fields */
#define VMCS_GUEST_ES_SELECTOR 0x00000800
#define VMCS_GUEST_CS_SELECTOR 0x00000802
#define VMCS_GUEST_SS_SELECTOR 0x00000804
#define VMCS_GUEST_DS_SELECTOR 0x00000806
#define VMCS_GUEST_FS_SELECTOR 0x00000808
#define VMCS_GUEST_GS_SELECTOR 0x0000080A
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
#define VMCS_GUEST_TR_SELECTOR 0x0000080E
/* 16-bit host-state fields */
#define VMCS_HOST_ES_SELECTOR 0x00000C00
#define VMCS_HOST_CS_SELECTOR 0x00000C02
#define VMCS_HOST_SS_SELECTOR 0x00000C04
#define VMCS_HOST_DS_SELECTOR 0x00000C06
#define VMCS_HOST_FS_SELECTOR 0x00000C08
#define VMCS_HOST_GS_SELECTOR 0x00000C0A
#define VMCS_HOST_TR_SELECTOR 0x00000C0C
/* 64-bit control fields */
#define VMCS_IO_BITMAP_A 0x00002000
#define VMCS_IO_BITMAP_B 0x00002002
#define VMCS_MSR_BITMAP 0x00002004
#define VMCS_EXIT_MSR_STORE 0x00002006
#define VMCS_EXIT_MSR_LOAD 0x00002008
#define VMCS_ENTRY_MSR_LOAD 0x0000200A
#define VMCS_EXECUTIVE_VMCS 0x0000200C
#define VMCS_TSC_OFFSET 0x00002010
#define VMCS_VIRTUAL_APIC 0x00002012
#define VMCS_APIC_ACCESS 0x00002014
#define VMCS_EPTP 0x0000201A
/* 64-bit read-only fields */
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
/* 64-bit guest-state fields */
#define VMCS_LINK_POINTER 0x00002800
#define VMCS_GUEST_IA32_DEBUGCTL 0x00002802
#define VMCS_GUEST_IA32_PAT 0x00002804
#define VMCS_GUEST_IA32_EFER 0x00002806
#define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
#define VMCS_GUEST_PDPTE0 0x0000280A
#define VMCS_GUEST_PDPTE1 0x0000280C
#define VMCS_GUEST_PDPTE2 0x0000280E
#define VMCS_GUEST_PDPTE3 0x00002810
/* 64-bit host-state fields */
#define VMCS_HOST_IA32_PAT 0x00002C00
#define VMCS_HOST_IA32_EFER 0x00002C02
#define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04
/* 32-bit control fields */
#define VMCS_PIN_BASED_CTLS 0x00004000
#define VMCS_PRI_PROC_BASED_CTLS 0x00004002
#define VMCS_EXCEPTION_BITMAP 0x00004004
#define VMCS_PF_ERROR_MASK 0x00004006
#define VMCS_PF_ERROR_MATCH 0x00004008
#define VMCS_CR3_TARGET_COUNT 0x0000400A
#define VMCS_EXIT_CTLS 0x0000400C
#define VMCS_EXIT_MSR_STORE_COUNT 0x0000400E
#define VMCS_EXIT_MSR_LOAD_COUNT 0x00004010
#define VMCS_ENTRY_CTLS 0x00004012
#define VMCS_ENTRY_MSR_LOAD_COUNT 0x00004014
#define VMCS_ENTRY_INTR_INFO 0x00004016
#define VMCS_ENTRY_EXCEPTION_ERROR 0x00004018
#define VMCS_ENTRY_INST_LENGTH 0x0000401A
#define VMCS_TPR_THRESHOLD 0x0000401C
#define VMCS_SEC_PROC_BASED_CTLS 0x0000401E
#define VMCS_PLE_GAP 0x00004020
#define VMCS_PLE_WINDOW 0x00004022
/* 32-bit read-only data fields */
#define VMCS_INSTRUCTION_ERROR 0x00004400
#define VMCS_EXIT_REASON 0x00004402
#define VMCS_EXIT_INTERRUPTION_INFO 0x00004404
#define VMCS_EXIT_INTERRUPTION_ERROR 0x00004406
#define VMCS_IDT_VECTORING_INFO 0x00004408
#define VMCS_IDT_VECTORING_ERROR 0x0000440A
#define VMCS_EXIT_INSTRUCTION_LENGTH 0x0000440C
#define VMCS_EXIT_INSTRUCTION_INFO 0x0000440E
/* 32-bit guest-state fields */
#define VMCS_GUEST_ES_LIMIT 0x00004800
#define VMCS_GUEST_CS_LIMIT 0x00004802
#define VMCS_GUEST_SS_LIMIT 0x00004804
#define VMCS_GUEST_DS_LIMIT 0x00004806
#define VMCS_GUEST_FS_LIMIT 0x00004808
#define VMCS_GUEST_GS_LIMIT 0x0000480A
#define VMCS_GUEST_LDTR_LIMIT 0x0000480C
#define VMCS_GUEST_TR_LIMIT 0x0000480E
#define VMCS_GUEST_GDTR_LIMIT 0x00004810
#define VMCS_GUEST_IDTR_LIMIT 0x00004812
#define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814
#define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816
#define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818
#define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481A
#define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481C
#define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481E
#define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820
#define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822
#define VMCS_GUEST_INTERRUPTIBILITY 0x00004824
#define VMCS_GUEST_ACTIVITY 0x00004826
#define VMCS_GUEST_SMBASE 0x00004828
#define VMCS_GUEST_IA32_SYSENTER_CS 0x0000482A
#define VMCS_PREEMPTION_TIMER_VALUE 0x0000482E
/* 32-bit host state fields */
#define VMCS_HOST_IA32_SYSENTER_CS 0x00004C00
/* Natural Width control fields */
#define VMCS_CR0_MASK 0x00006000
#define VMCS_CR4_MASK 0x00006002
#define VMCS_CR0_SHADOW 0x00006004
#define VMCS_CR4_SHADOW 0x00006006
#define VMCS_CR3_TARGET0 0x00006008
#define VMCS_CR3_TARGET1 0x0000600A
#define VMCS_CR3_TARGET2 0x0000600C
#define VMCS_CR3_TARGET3 0x0000600E
/* Natural Width read-only fields */
#define VMCS_EXIT_QUALIFICATION 0x00006400
#define VMCS_IO_RCX 0x00006402
#define VMCS_IO_RSI 0x00006404
#define VMCS_IO_RDI 0x00006406
#define VMCS_IO_RIP 0x00006408
#define VMCS_GUEST_LINEAR_ADDRESS 0x0000640A
/* Natural Width guest-state fields */
#define VMCS_GUEST_CR0 0x00006800
#define VMCS_GUEST_CR3 0x00006802
#define VMCS_GUEST_CR4 0x00006804
#define VMCS_GUEST_ES_BASE 0x00006806
#define VMCS_GUEST_CS_BASE 0x00006808
#define VMCS_GUEST_SS_BASE 0x0000680A
#define VMCS_GUEST_DS_BASE 0x0000680C
#define VMCS_GUEST_FS_BASE 0x0000680E
#define VMCS_GUEST_GS_BASE 0x00006810
#define VMCS_GUEST_LDTR_BASE 0x00006812
#define VMCS_GUEST_TR_BASE 0x00006814
#define VMCS_GUEST_GDTR_BASE 0x00006816
#define VMCS_GUEST_IDTR_BASE 0x00006818
#define VMCS_GUEST_DR7 0x0000681A
#define VMCS_GUEST_RSP 0x0000681C
#define VMCS_GUEST_RIP 0x0000681E
#define VMCS_GUEST_RFLAGS 0x00006820
#define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
#define VMCS_GUEST_IA32_SYSENTER_ESP 0x00006824
#define VMCS_GUEST_IA32_SYSENTER_EIP 0x00006826
/* Natural Width host-state fields */
#define VMCS_HOST_CR0 0x00006C00
#define VMCS_HOST_CR3 0x00006C02
#define VMCS_HOST_CR4 0x00006C04
#define VMCS_HOST_FS_BASE 0x00006C06
#define VMCS_HOST_GS_BASE 0x00006C08
#define VMCS_HOST_TR_BASE 0x00006C0A
#define VMCS_HOST_GDTR_BASE 0x00006C0C
#define VMCS_HOST_IDTR_BASE 0x00006C0E
#define VMCS_HOST_IA32_SYSENTER_ESP 0x00006C10
#define VMCS_HOST_IA32_SYSENTER_EIP 0x00006C12
#define VMCS_HOST_RSP 0x00006C14
#define VMCS_HOST_RIP 0x00006c16
/*
* VM instruction error numbers
*/
#define VMRESUME_WITH_NON_LAUNCHED_VMCS 5
/*
* VMCS exit reasons
*/
#define EXIT_REASON_EXCEPTION 0
#define EXIT_REASON_EXT_INTR 1
#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_INIT 3
#define EXIT_REASON_SIPI 4
#define EXIT_REASON_IO_SMI 5
#define EXIT_REASON_SMI 6
#define EXIT_REASON_INTR_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
#define EXIT_REASON_TASK_SWITCH 9
#define EXIT_REASON_CPUID 10
#define EXIT_REASON_GETSEC 11
#define EXIT_REASON_HLT 12
#define EXIT_REASON_INVD 13
#define EXIT_REASON_INVLPG 14
#define EXIT_REASON_RDPMC 15
#define EXIT_REASON_RDTSC 16
#define EXIT_REASON_RSM 17
#define EXIT_REASON_VMCALL 18
#define EXIT_REASON_VMCLEAR 19
#define EXIT_REASON_VMLAUNCH 20
#define EXIT_REASON_VMPTRLD 21
#define EXIT_REASON_VMPTRST 22
#define EXIT_REASON_VMREAD 23
#define EXIT_REASON_VMRESUME 24
#define EXIT_REASON_VMWRITE 25
#define EXIT_REASON_VMXOFF 26
#define EXIT_REASON_VMXON 27
#define EXIT_REASON_CR_ACCESS 28
#define EXIT_REASON_DR_ACCESS 29
#define EXIT_REASON_INOUT 30
#define EXIT_REASON_RDMSR 31
#define EXIT_REASON_WRMSR 32
#define EXIT_REASON_INVAL_VMCS 33
#define EXIT_REASON_INVAL_MSR 34
#define EXIT_REASON_MWAIT 36
#define EXIT_REASON_MTF 37
#define EXIT_REASON_MONITOR 39
#define EXIT_REASON_PAUSE 40
#define EXIT_REASON_MCE 41
#define EXIT_REASON_TPR 43
#define EXIT_REASON_APIC 44
#define EXIT_REASON_GDTR_IDTR 46
#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_FAULT 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_RDTSCP 51
#define EXIT_REASON_VMX_PREEMPT 52
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
/*
* VMCS interrupt information fields
*/
#define VMCS_INTERRUPTION_INFO_VALID (1 << 31)
#define VMCS_INTERRUPTION_INFO_HW_INTR (0 << 8)
#define VMCS_INTERRUPTION_INFO_NMI (2 << 8)
/*
* VMCS Guest interruptibility field
*/
#define VMCS_INTERRUPTIBILITY_STI_BLOCKING (1 << 0)
#define VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING (1 << 1)
#define VMCS_INTERRUPTIBILITY_SMI_BLOCKING (1 << 2)
#define VMCS_INTERRUPTIBILITY_NMI_BLOCKING (1 << 3)
/*
* Exit qualification for EXIT_REASON_INVAL_VMCS
*/
#define EXIT_QUAL_NMI_WHILE_STI_BLOCKING 3
#endif

1673
sys/amd64/vmm/intel/vmx.c Normal file

File diff suppressed because it is too large Load Diff

115
sys/amd64/vmm/intel/vmx.h Normal file
View File

@ -0,0 +1,115 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMX_H_
#define _VMX_H_
#include "vmcs.h"
#define GUEST_MSR_MAX_ENTRIES 64 /* arbitrary */
struct vmxctx {
register_t guest_rdi; /* Guest state */
register_t guest_rsi;
register_t guest_rdx;
register_t guest_rcx;
register_t guest_r8;
register_t guest_r9;
register_t guest_rax;
register_t guest_rbx;
register_t guest_rbp;
register_t guest_r10;
register_t guest_r11;
register_t guest_r12;
register_t guest_r13;
register_t guest_r14;
register_t guest_r15;
register_t guest_cr2;
register_t host_r15; /* Host state */
register_t host_r14;
register_t host_r13;
register_t host_r12;
register_t host_rbp;
register_t host_rsp;
register_t host_rbx;
register_t host_rip;
/*
* XXX todo debug registers and fpu state
*/
int launch_error;
};
struct vmxcap {
int set;
uint32_t proc_ctls;
};
struct vmxstate {
int request_nmi;
int lastcpu; /* host cpu that this 'vcpu' last ran on */
uint16_t vpid;
};
/* virtual machine softc */
struct vmx {
pml4_entry_t pml4ept[NPML4EPG];
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
char msr_bitmap[PAGE_SIZE];
struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
struct vmxctx ctx[VM_MAXCPU];
struct vmxcap cap[VM_MAXCPU];
struct vmxstate state[VM_MAXCPU];
struct vm *vm;
};
CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
#define VMX_RETURN_DIRECT 0
#define VMX_RETURN_LONGJMP 1
#define VMX_RETURN_VMRESUME 2
#define VMX_RETURN_VMLAUNCH 3
/*
* vmx_setjmp() returns:
* - 0 when it returns directly
* - 1 when it returns from vmx_longjmp
* - 2 when it returns from vmx_resume (which would only be in the error case)
* - 3 when it returns from vmx_launch (which would only be in the error case)
*/
int vmx_setjmp(struct vmxctx *ctx);
void vmx_longjmp(void); /* returns via vmx_setjmp */
void vmx_launch(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
void vmx_resume(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
u_long vmx_fix_cr0(u_long cr0);
u_long vmx_fix_cr4(u_long cr4);
#endif

View File

@ -0,0 +1,92 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMX_CONTROLS_H_
#define _VMX_CONTROLS_H_
/* Pin-Based VM-Execution Controls */
#define PINBASED_EXTINT_EXITING (1 << 0)
#define PINBASED_NMI_EXITING (1 << 3)
#define PINBASED_VIRTUAL_NMI (1 << 5)
#define PINBASED_PREMPTION_TIMER (1 << 6)
/* Primary Processor-Based VM-Execution Controls */
#define PROCBASED_INT_WINDOW_EXITING (1 << 2)
#define PROCBASED_TSC_OFFSET (1 << 3)
#define PROCBASED_HLT_EXITING (1 << 7)
#define PROCBASED_INVLPG_EXITING (1 << 9)
#define PROCBASED_MWAIT_EXITING (1 << 10)
#define PROCBASED_RDPMC_EXITING (1 << 11)
#define PROCBASED_RDTSC_EXITING (1 << 12)
#define PROCBASED_CR3_LOAD_EXITING (1 << 15)
#define PROCBASED_CR3_STORE_EXITING (1 << 16)
#define PROCBASED_CR8_LOAD_EXITING (1 << 19)
#define PROCBASED_CR8_STORE_EXITING (1 << 20)
#define PROCBASED_USE_TPR_SHADOW (1 << 21)
#define PROCBASED_NMI_WINDOW_EXITING (1 << 22)
#define PROCBASED_MOV_DR_EXITING (1 << 23)
#define PROCBASED_IO_EXITING (1 << 24)
#define PROCBASED_IO_BITMAPS (1 << 25)
#define PROCBASED_MTF (1 << 27)
#define PROCBASED_MSR_BITMAPS (1 << 28)
#define PROCBASED_MONITOR_EXITING (1 << 29)
#define PROCBASED_PAUSE_EXITING (1 << 30)
#define PROCBASED_SECONDARY_CONTROLS (1 << 31)
/* Secondary Processor-Based VM-Execution Controls */
#define PROCBASED2_VIRTUALIZE_APIC (1 << 0)
#define PROCBASED2_ENABLE_EPT (1 << 1)
#define PROCBASED2_DESC_TABLE_EXITING (1 << 2)
#define PROCBASED2_ENABLE_RDTSCP (1 << 3)
#define PROCBASED2_VIRTUALIZE_X2APIC (1 << 4)
#define PROCBASED2_ENABLE_VPID (1 << 5)
#define PROCBASED2_WBINVD_EXITING (1 << 6)
#define PROCBASED2_UNRESTRICTED_GUEST (1 << 7)
#define PROCBASED2_PAUSE_LOOP_EXITING (1 << 10)
/* VM Exit Controls */
#define VM_EXIT_SAVE_DEBUG_CONTROLS (1 << 2)
#define VM_EXIT_HOST_LMA (1 << 9)
#define VM_EXIT_LOAD_PERF_GLOBAL_CTRL (1 << 12)
#define VM_EXIT_ACKNOWLEDGE_INTERRUPT (1 << 15)
#define VM_EXIT_SAVE_PAT (1 << 18)
#define VM_EXIT_LOAD_PAT (1 << 19)
#define VM_EXIT_SAVE_EFER (1 << 20)
#define VM_EXIT_LOAD_EFER (1 << 21)
#define VM_EXIT_SAVE_PREEMPTION_TIMER (1 << 22)
/* VM Entry Controls */
#define VM_ENTRY_LOAD_DEBUG_CONTROLS (1 << 2)
#define VM_ENTRY_GUEST_LMA (1 << 9)
#define VM_ENTRY_INTO_SMM (1 << 10)
#define VM_ENTRY_DEACTIVATE_DUAL_MONITOR (1 << 11)
#define VM_ENTRY_LOAD_PERF_GLOBAL_CTRL (1 << 13)
#define VM_ENTRY_LOAD_PAT (1 << 14)
#define VM_ENTRY_LOAD_EFER (1 << 15)
#endif

View File

@ -0,0 +1,199 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMX_CPUFUNC_H_
#define _VMX_CPUFUNC_H_
struct vmcs;
/*
* Section 5.2 "Conventions" from Intel Architecture Manual 2B.
*
* error
* VMsucceed 0
* VMFailInvalid 1
* VMFailValid 2 see also VMCS VM-Instruction Error Field
*/
#define VM_SUCCESS 0
#define VM_FAIL_INVALID 1
#define VM_FAIL_VALID 2
#define VMX_SET_ERROR_CODE(varname) \
do { \
__asm __volatile(" jnc 1f;" \
" mov $1, %0;" /* CF: error = 1 */ \
" jmp 3f;" \
"1: jnz 2f;" \
" mov $2, %0;" /* ZF: error = 2 */ \
" jmp 3f;" \
"2: mov $0, %0;" \
"3: nop" \
:"=r" (varname)); \
} while (0)
/* returns 0 on success and non-zero on failure */
static __inline int
vmxon(char *region)
{
int error;
uint64_t addr;
addr = vtophys(region);
__asm __volatile("vmxon %0" : : "m" (*(uint64_t *)&addr) : "memory");
VMX_SET_ERROR_CODE(error);
return (error);
}
/* returns 0 on success and non-zero on failure */
static __inline int
vmclear(struct vmcs *vmcs)
{
int error;
uint64_t addr;
addr = vtophys(vmcs);
__asm __volatile("vmclear %0" : : "m" (*(uint64_t *)&addr) : "memory");
VMX_SET_ERROR_CODE(error);
return (error);
}
static __inline void
vmxoff(void)
{
__asm __volatile("vmxoff");
}
static __inline void
vmptrst(uint64_t *addr)
{
__asm __volatile("vmptrst %0" : : "m" (*addr) : "memory");
}
static __inline int
vmptrld(struct vmcs *vmcs)
{
int error;
uint64_t addr;
addr = vtophys(vmcs);
__asm __volatile("vmptrld %0" : : "m" (*(uint64_t *)&addr) : "memory");
VMX_SET_ERROR_CODE(error);
return (error);
}
static __inline int
vmwrite(uint64_t reg, uint64_t val)
{
int error;
__asm __volatile("vmwrite %0, %1" : : "r" (val), "r" (reg) : "memory");
VMX_SET_ERROR_CODE(error);
return (error);
}
static __inline int
vmread(uint64_t r, uint64_t *addr)
{
int error;
__asm __volatile("vmread %0, %1" : : "r" (r), "m" (*addr) : "memory");
VMX_SET_ERROR_CODE(error);
return (error);
}
static void __inline
VMCLEAR(struct vmcs *vmcs)
{
int err;
err = vmclear(vmcs);
if (err != 0)
panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
critical_exit();
}
static void __inline
VMPTRLD(struct vmcs *vmcs)
{
int err;
critical_enter();
err = vmptrld(vmcs);
if (err != 0)
panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
}
#define INVVPID_TYPE_ADDRESS 0UL
#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
#define INVVPID_TYPE_ALL_CONTEXTS 2UL
struct invvpid_desc {
uint16_t vpid;
uint16_t _res1;
uint32_t _res2;
uint64_t linear_addr;
};
CTASSERT(sizeof(struct invvpid_desc) == 16);
static void __inline
invvpid(uint64_t type, struct invvpid_desc desc)
{
int error;
__asm __volatile("invvpid %0, %1" :: "m" (desc), "r" (type) : "memory");
VMX_SET_ERROR_CODE(error);
if (error)
panic("invvpid error %d", error);
}
#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
#define INVEPT_TYPE_ALL_CONTEXTS 2UL
struct invept_desc {
uint64_t eptp;
uint64_t _res;
};
CTASSERT(sizeof(struct invept_desc) == 16);
static void __inline
invept(uint64_t type, struct invept_desc desc)
{
int error;
__asm __volatile("invept %0, %1" :: "m" (desc), "r" (type) : "memory");
VMX_SET_ERROR_CODE(error);
if (error)
panic("invept error %d", error);
}
#endif

View File

@ -0,0 +1,81 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/assym.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/pmap.h>
#include <machine/vmm.h>
#include "vmx.h"
#include "vmx_cpufunc.h"
ASSYM(VMXCTX_GUEST_RDI, offsetof(struct vmxctx, guest_rdi));
ASSYM(VMXCTX_GUEST_RSI, offsetof(struct vmxctx, guest_rsi));
ASSYM(VMXCTX_GUEST_RDX, offsetof(struct vmxctx, guest_rdx));
ASSYM(VMXCTX_GUEST_RCX, offsetof(struct vmxctx, guest_rcx));
ASSYM(VMXCTX_GUEST_R8, offsetof(struct vmxctx, guest_r8));
ASSYM(VMXCTX_GUEST_R9, offsetof(struct vmxctx, guest_r9));
ASSYM(VMXCTX_GUEST_RAX, offsetof(struct vmxctx, guest_rax));
ASSYM(VMXCTX_GUEST_RBX, offsetof(struct vmxctx, guest_rbx));
ASSYM(VMXCTX_GUEST_RBP, offsetof(struct vmxctx, guest_rbp));
ASSYM(VMXCTX_GUEST_R10, offsetof(struct vmxctx, guest_r10));
ASSYM(VMXCTX_GUEST_R11, offsetof(struct vmxctx, guest_r11));
ASSYM(VMXCTX_GUEST_R12, offsetof(struct vmxctx, guest_r12));
ASSYM(VMXCTX_GUEST_R13, offsetof(struct vmxctx, guest_r13));
ASSYM(VMXCTX_GUEST_R14, offsetof(struct vmxctx, guest_r14));
ASSYM(VMXCTX_GUEST_R15, offsetof(struct vmxctx, guest_r15));
ASSYM(VMXCTX_GUEST_CR2, offsetof(struct vmxctx, guest_cr2));
ASSYM(VMXCTX_HOST_R15, offsetof(struct vmxctx, host_r15));
ASSYM(VMXCTX_HOST_R14, offsetof(struct vmxctx, host_r14));
ASSYM(VMXCTX_HOST_R13, offsetof(struct vmxctx, host_r13));
ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12));
ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp));
ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp));
ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
ASSYM(VMXCTX_LAUNCH_ERROR, offsetof(struct vmxctx, launch_error));
ASSYM(VM_SUCCESS, VM_SUCCESS);
ASSYM(VM_FAIL_INVALID, VM_FAIL_INVALID);
ASSYM(VM_FAIL_VALID, VM_FAIL_VALID);
ASSYM(VMX_RETURN_DIRECT, VMX_RETURN_DIRECT);
ASSYM(VMX_RETURN_LONGJMP, VMX_RETURN_LONGJMP);
ASSYM(VMX_RETURN_VMRESUME, VMX_RETURN_VMRESUME);
ASSYM(VMX_RETURN_VMLAUNCH, VMX_RETURN_VMLAUNCH);

View File

@ -0,0 +1,172 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/cpufunc.h>
#include "vmx_msr.h"
static boolean_t
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
{
if (msr_val & (1UL << (bitpos + 32)))
return (TRUE);
else
return (FALSE);
}
static boolean_t
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
{
if ((msr_val & (1UL << bitpos)) == 0)
return (TRUE);
else
return (FALSE);
}
uint32_t
vmx_revision(void)
{
return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
}
/*
* Generate a bitmask to be used for the VMCS execution control fields.
*
* The caller specifies what bits should be set to one in 'ones_mask'
* and what bits should be set to zero in 'zeros_mask'. The don't-care
* bits are set to the default value. The default values are obtained
* based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
* VMX Capabilities".
*
* Returns zero on success and non-zero on error.
*/
int
vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
uint32_t zeros_mask, uint32_t *retval)
{
int i;
uint64_t val, trueval;
boolean_t true_ctls_avail, one_allowed, zero_allowed;
/* We cannot ask the same bit to be set to both '1' and '0' */
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
return (EINVAL);
if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
true_ctls_avail = TRUE;
else
true_ctls_avail = FALSE;
val = rdmsr(ctl_reg);
if (true_ctls_avail)
trueval = rdmsr(true_ctl_reg); /* step c */
else
trueval = val; /* step a */
for (i = 0; i < 32; i++) {
one_allowed = vmx_ctl_allows_one_setting(trueval, i);
zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
KASSERT(one_allowed || zero_allowed,
("invalid zero/one setting for bit %d of ctl 0x%0x, "
"truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
if (zero_allowed && !one_allowed) { /* b(i),c(i) */
if (ones_mask & (1 << i))
return (EINVAL);
*retval &= ~(1 << i);
} else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
if (zeros_mask & (1 << i))
return (EINVAL);
*retval |= 1 << i;
} else {
if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
*retval &= ~(1 << i);
else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
*retval |= 1 << i;
else if (!true_ctls_avail)
*retval &= ~(1 << i); /* b(iii) */
else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
*retval &= ~(1 << i);
else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
*retval |= 1 << i;
else {
panic("vmx_set_ctlreg: unable to determine "
"correct value of ctl bit %d for msr "
"0x%0x and true msr 0x%0x", i, ctl_reg,
true_ctl_reg);
}
}
}
return (0);
}
void
msr_bitmap_initialize(char *bitmap)
{
memset(bitmap, 0xff, PAGE_SIZE);
}
int
msr_bitmap_change_access(char *bitmap, u_int msr, int access)
{
int byte, bit;
if (msr >= 0x00000000 && msr <= 0x00001FFF)
byte = msr / 8;
else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
byte = 1024 + (msr - 0xC0000000) / 8;
else
return (EINVAL);
bit = msr & 0x7;
if (access & MSR_BITMAP_ACCESS_READ)
bitmap[byte] &= ~(1 << bit);
else
bitmap[byte] |= 1 << bit;
byte += 2048;
if (access & MSR_BITMAP_ACCESS_WRITE)
bitmap[byte] &= ~(1 << bit);
else
bitmap[byte] |= 1 << bit;
return (0);
}

View File

@ -0,0 +1,78 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMX_MSR_H_
#define _VMX_MSR_H_
#define MSR_VMX_BASIC 0x480
#define MSR_VMX_EPT_VPID_CAP 0x48C
#define MSR_VMX_PROCBASED_CTLS 0x482
#define MSR_VMX_TRUE_PROCBASED_CTLS 0x48E
#define MSR_VMX_PINBASED_CTLS 0x481
#define MSR_VMX_TRUE_PINBASED_CTLS 0x48D
#define MSR_VMX_PROCBASED_CTLS2 0x48B
#define MSR_VMX_EXIT_CTLS 0x483
#define MSR_VMX_TRUE_EXIT_CTLS 0x48f
#define MSR_VMX_ENTRY_CTLS 0x484
#define MSR_VMX_TRUE_ENTRY_CTLS 0x490
#define MSR_VMX_CR0_FIXED0 0x486
#define MSR_VMX_CR0_FIXED1 0x487
#define MSR_VMX_CR4_FIXED0 0x488
#define MSR_VMX_CR4_FIXED1 0x489
uint32_t vmx_revision(void);
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
uint32_t zeros_mask, uint32_t *retval);
/*
* According to Section 21.10.4 "Software Access to Related Structures",
* changes to data structures pointed to by the VMCS must be made only when
* there is no logical processor with a current VMCS that points to the
* data structure.
*
* This pretty much limits us to configuring the MSR bitmap before VMCS
* initialization for SMP VMs. Unless of course we do it the hard way - which
* would involve some form of synchronization between the vcpus to vmclear
* all VMCSs' that point to the bitmap.
*/
#define MSR_BITMAP_ACCESS_NONE 0x0
#define MSR_BITMAP_ACCESS_READ 0x1
#define MSR_BITMAP_ACCESS_WRITE 0x2
#define MSR_BITMAP_ACCESS_RW (MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
void msr_bitmap_initialize(char *bitmap);
int msr_bitmap_change_access(char *bitmap, u_int msr, int access);
#endif

View File

@ -0,0 +1,204 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <machine/asmacros.h>
#include "vmx_assym.s"
/*
* Assumes that %rdi holds a pointer to the 'vmxctx'
*/
#define VMX_GUEST_RESTORE \
/* \
* Make sure that interrupts are disabled before restoring CR2. \
* Otherwise there could be a page fault during the interrupt \
* handler execution that would end up trashing CR2. \
*/ \
cli; \
movq VMXCTX_GUEST_CR2(%rdi),%rsi; \
movq %rsi,%cr2; \
movq VMXCTX_GUEST_RSI(%rdi),%rsi; \
movq VMXCTX_GUEST_RDX(%rdi),%rdx; \
movq VMXCTX_GUEST_RCX(%rdi),%rcx; \
movq VMXCTX_GUEST_R8(%rdi),%r8; \
movq VMXCTX_GUEST_R9(%rdi),%r9; \
movq VMXCTX_GUEST_RAX(%rdi),%rax; \
movq VMXCTX_GUEST_RBX(%rdi),%rbx; \
movq VMXCTX_GUEST_RBP(%rdi),%rbp; \
movq VMXCTX_GUEST_R10(%rdi),%r10; \
movq VMXCTX_GUEST_R11(%rdi),%r11; \
movq VMXCTX_GUEST_R12(%rdi),%r12; \
movq VMXCTX_GUEST_R13(%rdi),%r13; \
movq VMXCTX_GUEST_R14(%rdi),%r14; \
movq VMXCTX_GUEST_R15(%rdi),%r15; \
movq VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
#define VM_INSTRUCTION_ERROR(reg) \
jnc 1f; \
movl $VM_FAIL_INVALID,reg; /* CF is set */ \
jmp 3f; \
1: jnz 2f; \
movl $VM_FAIL_VALID,reg; /* ZF is set */ \
jmp 3f; \
2: movl $VM_SUCCESS,reg; \
3: movl reg,VMXCTX_LAUNCH_ERROR(%rsp)
.text
/*
* int vmx_setjmp(ctxp)
* %rdi = ctxp
*
* Return value is '0' when it returns directly from here.
* Return value is '1' when it returns after a vm exit through vmx_longjmp.
*/
ENTRY(vmx_setjmp)
movq (%rsp),%rax /* return address */
movq %r15,VMXCTX_HOST_R15(%rdi)
movq %r14,VMXCTX_HOST_R14(%rdi)
movq %r13,VMXCTX_HOST_R13(%rdi)
movq %r12,VMXCTX_HOST_R12(%rdi)
movq %rbp,VMXCTX_HOST_RBP(%rdi)
movq %rsp,VMXCTX_HOST_RSP(%rdi)
movq %rbx,VMXCTX_HOST_RBX(%rdi)
movq %rax,VMXCTX_HOST_RIP(%rdi)
/*
* XXX save host debug registers
*/
movl $VMX_RETURN_DIRECT,%eax
ret
END(vmx_setjmp)
/*
* void vmx_return(struct vmxctx *ctxp, int retval)
* %rdi = ctxp
* %rsi = retval
* Return to vmm context through vmx_setjmp() with a value of 'retval'.
*/
ENTRY(vmx_return)
/* Restore host context. */
movq VMXCTX_HOST_R15(%rdi),%r15
movq VMXCTX_HOST_R14(%rdi),%r14
movq VMXCTX_HOST_R13(%rdi),%r13
movq VMXCTX_HOST_R12(%rdi),%r12
movq VMXCTX_HOST_RBP(%rdi),%rbp
movq VMXCTX_HOST_RSP(%rdi),%rsp
movq VMXCTX_HOST_RBX(%rdi),%rbx
movq VMXCTX_HOST_RIP(%rdi),%rax
movq %rax,(%rsp) /* return address */
/*
* XXX restore host debug registers
*/
movl %esi,%eax
ret
END(vmx_return)
/*
* void vmx_longjmp(void)
* %rsp points to the struct vmxctx
*/
ENTRY(vmx_longjmp)
/*
* Save guest state that is not automatically saved in the vmcs.
*/
movq %rdi,VMXCTX_GUEST_RDI(%rsp)
movq %rsi,VMXCTX_GUEST_RSI(%rsp)
movq %rdx,VMXCTX_GUEST_RDX(%rsp)
movq %rcx,VMXCTX_GUEST_RCX(%rsp)
movq %r8,VMXCTX_GUEST_R8(%rsp)
movq %r9,VMXCTX_GUEST_R9(%rsp)
movq %rax,VMXCTX_GUEST_RAX(%rsp)
movq %rbx,VMXCTX_GUEST_RBX(%rsp)
movq %rbp,VMXCTX_GUEST_RBP(%rsp)
movq %r10,VMXCTX_GUEST_R10(%rsp)
movq %r11,VMXCTX_GUEST_R11(%rsp)
movq %r12,VMXCTX_GUEST_R12(%rsp)
movq %r13,VMXCTX_GUEST_R13(%rsp)
movq %r14,VMXCTX_GUEST_R14(%rsp)
movq %r15,VMXCTX_GUEST_R15(%rsp)
movq %cr2,%rdi
movq %rdi,VMXCTX_GUEST_CR2(%rsp)
movq %rsp,%rdi
movq $VMX_RETURN_LONGJMP,%rsi
callq vmx_return
END(vmx_longjmp)
/*
* void vmx_resume(struct vmxctx *ctxp)
* %rdi = ctxp
*
* Although the return type is a 'void' this function may return indirectly
* through vmx_setjmp() with a return value of 2.
*/
ENTRY(vmx_resume)
/*
* Restore guest state that is not automatically loaded from the vmcs.
*/
VMX_GUEST_RESTORE
vmresume
/*
* Capture the reason why vmresume failed.
*/
VM_INSTRUCTION_ERROR(%eax)
/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
movq %rsp,%rdi
movq $VMX_RETURN_VMRESUME,%rsi
callq vmx_return
END(vmx_resume)
/*
* void vmx_launch(struct vmxctx *ctxp)
* %rdi = ctxp
*
* Although the return type is a 'void' this function may return indirectly
* through vmx_setjmp() with a return value of 3.
*/
ENTRY(vmx_launch)
/*
* Restore guest state that is not automatically loaded from the vmcs.
*/
VMX_GUEST_RESTORE
vmlaunch
/*
* Capture the reason why vmlaunch failed.
*/
VM_INSTRUCTION_ERROR(%eax)
/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
movq %rsp,%rdi
movq $VMX_RETURN_VMLAUNCH,%rsi
callq vmx_return
END(vmx_launch)

637
sys/amd64/vmm/intel/vtd.c Normal file
View File

@ -0,0 +1,637 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <dev/pci/pcireg.h>
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/pci_cfgreg.h>
#include "io/iommu.h"
/*
* Documented in the "Intel Virtualization Technology for Directed I/O",
* Architecture Spec, September 2008.
*/
/* Section 10.4 "Register Descriptions" */
struct vtdmap {
volatile uint32_t version;
volatile uint32_t res0;
volatile uint64_t cap;
volatile uint64_t ext_cap;
volatile uint32_t gcr;
volatile uint32_t gsr;
volatile uint64_t rta;
volatile uint64_t ccr;
};
#define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
#define VTD_CAP_ND(cap) ((cap) & 0x7)
#define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
#define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
#define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
#define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
#define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
#define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
#define VTD_GCR_WBF (1 << 27)
#define VTD_GCR_SRTP (1 << 30)
#define VTD_GCR_TE (1 << 31)
#define VTD_GSR_WBFS (1 << 27)
#define VTD_GSR_RTPS (1 << 30)
#define VTD_GSR_TES (1 << 31)
#define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */
#define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */
#define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */
#define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */
#define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */
#define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */
#define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */
#define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */
#define VTD_IIR_DOMAIN_P 32
#define VTD_ROOT_PRESENT 0x1
#define VTD_CTX_PRESENT 0x1
#define VTD_CTX_TT_ALL (1UL << 2)
#define VTD_PTE_RD (1UL << 0)
#define VTD_PTE_WR (1UL << 1)
#define VTD_PTE_SUPERPAGE (1UL << 7)
#define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
struct domain {
uint64_t *ptp; /* first level page table page */
int pt_levels; /* number of page table levels */
int addrwidth; /* 'AW' field in context entry */
int spsmask; /* supported super page sizes */
u_int id; /* domain id */
vm_paddr_t maxaddr; /* highest address to be mapped */
SLIST_ENTRY(domain) next;
};
static SLIST_HEAD(, domain) domhead;
#define DRHD_MAX_UNITS 8
static int drhd_num;
static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
static int max_domains;
typedef int (*drhd_ident_func_t)(void);
static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
/*
* Config space register definitions from the "Intel 5520 and 5500" datasheet.
*/
static int
tylersburg_vtd_ident(void)
{
int units, nlbus;
uint16_t did, vid;
uint32_t miscsts, vtbar;
const int bus = 0;
const int slot = 20;
const int func = 0;
units = 0;
vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
if (vid != 0x8086 || did != 0x342E)
goto done;
/*
* Check if this is a dual IOH configuration.
*/
miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
if (miscsts & (1 << 25))
nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
else
nlbus = -1;
vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
if (vtbar & 0x1) {
vtdmaps[units++] = (struct vtdmap *)
PHYS_TO_DMAP(vtbar & 0xffffe000);
} else if (bootverbose)
printf("VT-d unit in legacy IOH is disabled!\n");
if (nlbus != -1) {
vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
if (vtbar & 0x1) {
vtdmaps[units++] = (struct vtdmap *)
PHYS_TO_DMAP(vtbar & 0xffffe000);
} else if (bootverbose)
printf("VT-d unit in non-legacy IOH is disabled!\n");
}
done:
return (units);
}
static drhd_ident_func_t drhd_ident_funcs[] = {
tylersburg_vtd_ident,
NULL
};
static int
vtd_max_domains(struct vtdmap *vtdmap)
{
int nd;
nd = VTD_CAP_ND(vtdmap->cap);
switch (nd) {
case 0:
return (16);
case 1:
return (64);
case 2:
return (256);
case 3:
return (1024);
case 4:
return (4 * 1024);
case 5:
return (16 * 1024);
case 6:
return (64 * 1024);
default:
panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
}
}
static u_int
domain_id(void)
{
u_int id;
struct domain *dom;
/* Skip domain id 0 - it is reserved when Caching Mode field is set */
for (id = 1; id < max_domains; id++) {
SLIST_FOREACH(dom, &domhead, next) {
if (dom->id == id)
break;
}
if (dom == NULL)
break; /* found it */
}
if (id >= max_domains)
panic("domain ids exhausted");
return (id);
}
static void
vtd_wbflush(struct vtdmap *vtdmap)
{
if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
pmap_invalidate_cache();
if (VTD_CAP_RWBF(vtdmap->cap)) {
vtdmap->gcr = VTD_GCR_WBF;
while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
;
}
}
static void
vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
{
vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
;
}
static void
vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
{
int offset;
volatile uint64_t *iotlb_reg, val;
vtd_wbflush(vtdmap);
offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
*iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
while (1) {
val = *iotlb_reg;
if ((val & VTD_IIR_IVT) == 0)
break;
}
}
static void
vtd_translation_enable(struct vtdmap *vtdmap)
{
vtdmap->gcr = VTD_GCR_TE;
while ((vtdmap->gsr & VTD_GSR_TES) == 0)
;
}
static void
vtd_translation_disable(struct vtdmap *vtdmap)
{
vtdmap->gcr = 0;
while ((vtdmap->gsr & VTD_GSR_TES) != 0)
;
}
static int
vtd_init(void)
{
int i, units;
struct vtdmap *vtdmap;
vm_paddr_t ctx_paddr;
for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
units = (*drhd_ident_funcs[i])();
if (units > 0)
break;
}
if (units <= 0)
return (ENXIO);
drhd_num = units;
vtdmap = vtdmaps[0];
if (VTD_CAP_CM(vtdmap->cap) != 0)
panic("vtd_init: invalid caching mode");
max_domains = vtd_max_domains(vtdmap);
/*
* Set up the root-table to point to the context-entry tables
*/
for (i = 0; i < 256; i++) {
ctx_paddr = vtophys(ctx_tables[i]);
if (ctx_paddr & PAGE_MASK)
panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
}
return (0);
}
static void
vtd_cleanup(void)
{
}
static void
vtd_enable(void)
{
int i;
struct vtdmap *vtdmap;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_wbflush(vtdmap);
/* Update the root table address */
vtdmap->rta = vtophys(root_table);
vtdmap->gcr = VTD_GCR_SRTP;
while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
;
vtd_ctx_global_invalidate(vtdmap);
vtd_iotlb_global_invalidate(vtdmap);
vtd_translation_enable(vtdmap);
}
}
static void
vtd_disable(void)
{
int i;
struct vtdmap *vtdmap;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_translation_disable(vtdmap);
}
}
static void
vtd_add_device(void *arg, int bus, int slot, int func)
{
int idx;
uint64_t *ctxp;
struct domain *dom = arg;
vm_paddr_t pt_paddr;
struct vtdmap *vtdmap;
if (bus < 0 || bus > PCI_BUSMAX ||
slot < 0 || slot > PCI_SLOTMAX ||
func < 0 || func > PCI_FUNCMAX)
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
vtdmap = vtdmaps[0];
ctxp = ctx_tables[bus];
pt_paddr = vtophys(dom->ptp);
idx = (slot << 3 | func) * 2;
if (ctxp[idx] & VTD_CTX_PRESENT) {
panic("vtd_add_device: device %d/%d/%d is already owned by "
"domain %d", bus, slot, func,
(uint16_t)(ctxp[idx + 1] >> 8));
}
/*
* Order is important. The 'present' bit is set only after all fields
* of the context pointer are initialized.
*/
ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
if (VTD_ECAP_DI(vtdmap->ext_cap))
ctxp[idx] = VTD_CTX_TT_ALL;
else
ctxp[idx] = 0;
ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
/*
* 'Not Present' entries are not cached in either the Context Cache
* or in the IOTLB, so there is no need to invalidate either of them.
*/
}
static void
vtd_remove_device(void *arg, int bus, int slot, int func)
{
int i, idx;
uint64_t *ctxp;
struct vtdmap *vtdmap;
if (bus < 0 || bus > PCI_BUSMAX ||
slot < 0 || slot > PCI_SLOTMAX ||
func < 0 || func > PCI_FUNCMAX)
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
ctxp = ctx_tables[bus];
idx = (slot << 3 | func) * 2;
/*
* Order is important. The 'present' bit is must be cleared first.
*/
ctxp[idx] = 0;
ctxp[idx + 1] = 0;
/*
* Invalidate the Context Cache and the IOTLB.
*
* XXX use device-selective invalidation for Context Cache
* XXX use domain-selective invalidation for IOTLB
*/
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_ctx_global_invalidate(vtdmap);
vtd_iotlb_global_invalidate(vtdmap);
}
}
static uint64_t
vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
{
struct domain *dom;
int i, spshift, ptpshift, ptpindex, nlevels;
uint64_t spsize, *ptp;
dom = arg;
ptpindex = 0;
ptpshift = 0;
if (gpa & PAGE_MASK)
panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
if (hpa & PAGE_MASK)
panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
if (len & PAGE_MASK)
panic("vtd_create_mapping: unaligned len 0x%0lx", len);
/*
* Compute the size of the mapping that we can accomodate.
*
* This is based on three factors:
* - supported super page size
* - alignment of the region starting at 'gpa' and 'hpa'
* - length of the region 'len'
*/
spshift = 48;
for (i = 3; i >= 0; i--) {
spsize = 1UL << spshift;
if ((dom->spsmask & (1 << i)) != 0 &&
(gpa & (spsize - 1)) == 0 &&
(hpa & (spsize - 1)) == 0 &&
(len >= spsize)) {
break;
}
spshift -= 9;
}
ptp = dom->ptp;
nlevels = dom->pt_levels;
while (--nlevels >= 0) {
ptpshift = 12 + nlevels * 9;
ptpindex = (gpa >> ptpshift) & 0x1FF;
/* We have reached the leaf mapping */
if (spshift >= ptpshift) {
break;
}
/*
* We are working on a non-leaf page table page.
*
* Create a downstream page table page if necessary and point
* to it from the current page table.
*/
if (ptp[ptpindex] == 0) {
void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
}
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
}
if ((gpa & ((1UL << ptpshift) - 1)) != 0)
panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
/*
* Create a 'gpa' -> 'hpa' mapping
*/
ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
if (nlevels > 0)
ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
return (1UL << ptpshift);
}
static void *
vtd_create_domain(vm_paddr_t maxaddr)
{
struct domain *dom;
vm_paddr_t addr;
int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
struct vtdmap *vtdmap;
if (drhd_num <= 0)
panic("vtd_create_domain: no dma remapping hardware available");
vtdmap = vtdmaps[0];
/*
* Calculate AGAW.
* Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
*/
addr = 0;
for (gaw = 0; addr < maxaddr; gaw++)
addr = 1ULL << gaw;
res = (gaw - 12) % 9;
if (res == 0)
agaw = gaw;
else
agaw = gaw + 9 - res;
if (agaw > 64)
agaw = 64;
/*
* Select the smallest Supported AGAW and the corresponding number
* of page table levels.
*/
pt_levels = 2;
sagaw = 30;
addrwidth = 0;
tmp = VTD_CAP_SAGAW(vtdmap->cap);
for (i = 0; i < 5; i++) {
if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
break;
pt_levels++;
addrwidth++;
sagaw += 9;
if (sagaw > 64)
sagaw = 64;
}
if (i >= 5) {
panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
VTD_CAP_SAGAW(vtdmap->cap), agaw);
}
dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
dom->pt_levels = pt_levels;
dom->addrwidth = addrwidth;
dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
dom->id = domain_id();
dom->maxaddr = maxaddr;
dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
if ((uintptr_t)dom->ptp & PAGE_MASK)
panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
SLIST_INSERT_HEAD(&domhead, dom, next);
return (dom);
}
static void
vtd_free_ptp(uint64_t *ptp, int level)
{
int i;
uint64_t *nlp;
if (level > 1) {
for (i = 0; i < 512; i++) {
if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
continue;
if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
continue;
nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
vtd_free_ptp(nlp, level - 1);
}
}
bzero(ptp, PAGE_SIZE);
free(ptp, M_VTD);
}
static void
vtd_destroy_domain(void *arg)
{
struct domain *dom;
dom = arg;
SLIST_REMOVE(&domhead, dom, domain, next);
vtd_free_ptp(dom->ptp, dom->pt_levels);
free(dom, M_VTD);
}
struct iommu_ops iommu_ops_intel = {
vtd_init,
vtd_cleanup,
vtd_enable,
vtd_disable,
vtd_create_domain,
vtd_destroy_domain,
vtd_create_mapping,
vtd_add_device,
vtd_remove_device,
};

230
sys/amd64/vmm/io/iommu.c Normal file
View File

@ -0,0 +1,230 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <machine/md_var.h>
#include "vmm_util.h"
#include "iommu.h"
static boolean_t iommu_avail;
static struct iommu_ops *ops;
static void *host_domain;
static __inline int
IOMMU_INIT(void)
{
if (ops != NULL)
return ((*ops->init)());
else
return (ENXIO);
}
static __inline void
IOMMU_CLEANUP(void)
{
if (ops != NULL && iommu_avail)
(*ops->cleanup)();
}
static __inline void *
IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr)
{
if (ops != NULL && iommu_avail)
return ((*ops->create_domain)(maxaddr));
else
return (NULL);
}
static __inline void
IOMMU_DESTROY_DOMAIN(void *dom)
{
if (ops != NULL && iommu_avail)
(*ops->destroy_domain)(dom);
}
static __inline uint64_t
IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
{
if (ops != NULL && iommu_avail)
return ((*ops->create_mapping)(domain, gpa, hpa, len));
else
return (len); /* XXX */
}
static __inline void
IOMMU_ADD_DEVICE(void *domain, int bus, int slot, int func)
{
if (ops != NULL && iommu_avail)
(*ops->add_device)(domain, bus, slot, func);
}
static __inline void
IOMMU_REMOVE_DEVICE(void *domain, int bus, int slot, int func)
{
if (ops != NULL && iommu_avail)
(*ops->remove_device)(domain, bus, slot, func);
}
static __inline void
IOMMU_ENABLE(void)
{
if (ops != NULL && iommu_avail)
(*ops->enable)();
}
static __inline void
IOMMU_DISABLE(void)
{
if (ops != NULL && iommu_avail)
(*ops->disable)();
}
void
iommu_init(void)
{
int error, bus, slot, func;
vm_paddr_t maxaddr;
const char *name;
device_t dev;
if (vmm_is_intel())
ops = &iommu_ops_intel;
else if (vmm_is_amd())
ops = &iommu_ops_amd;
else
ops = NULL;
error = IOMMU_INIT();
if (error)
return;
iommu_avail = TRUE;
/*
* Create a domain for the devices owned by the host
*/
maxaddr = ptoa(Maxmem);
host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
if (host_domain == NULL)
panic("iommu_init: unable to create a host domain");
/*
* Create 1:1 mappings from '0' to 'Maxmem' for devices assigned to
* the host
*/
iommu_create_mapping(host_domain, 0, 0, maxaddr);
for (bus = 0; bus <= PCI_BUSMAX; bus++) {
for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
for (func = 0; func <= PCI_FUNCMAX; func++) {
dev = pci_find_dbsf(0, bus, slot, func);
if (dev == NULL)
continue;
/* skip passthrough devices */
name = device_get_name(dev);
if (name != NULL && strcmp(name, "ppt") == 0)
continue;
/* everything else belongs to the host domain */
iommu_add_device(host_domain, bus, slot, func);
}
}
}
IOMMU_ENABLE();
}
void
iommu_cleanup(void)
{
IOMMU_DISABLE();
IOMMU_DESTROY_DOMAIN(host_domain);
IOMMU_CLEANUP();
}
void *
iommu_create_domain(vm_paddr_t maxaddr)
{
return (IOMMU_CREATE_DOMAIN(maxaddr));
}
void
iommu_destroy_domain(void *dom)
{
IOMMU_DESTROY_DOMAIN(dom);
}
void
iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
{
uint64_t mapped, remaining;
remaining = len;
while (remaining > 0) {
mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
gpa += mapped;
hpa += mapped;
remaining -= mapped;
}
}
void
iommu_add_device(void *dom, int bus, int slot, int func)
{
IOMMU_ADD_DEVICE(dom, bus, slot, func);
}
void
iommu_remove_device(void *dom, int bus, int slot, int func)
{
IOMMU_REMOVE_DEVICE(dom, bus, slot, func);
}

67
sys/amd64/vmm/io/iommu.h Normal file
View File

@ -0,0 +1,67 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _IO_IOMMU_H_
#define _IO_IOMMU_H_
typedef int (*iommu_init_func_t)(void);
typedef void (*iommu_cleanup_func_t)(void);
typedef void (*iommu_enable_func_t)(void);
typedef void (*iommu_disable_func_t)(void);
typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr);
typedef void (*iommu_destroy_domain_t)(void *domain);
typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
vm_paddr_t hpa, uint64_t len);
typedef void (*iommu_add_device_t)(void *domain, int bus, int slot, int func);
typedef void (*iommu_remove_device_t)(void *dom, int bus, int slot, int func);
struct iommu_ops {
iommu_init_func_t init; /* module wide */
iommu_cleanup_func_t cleanup;
iommu_enable_func_t enable;
iommu_disable_func_t disable;
iommu_create_domain_t create_domain; /* domain-specific */
iommu_destroy_domain_t destroy_domain;
iommu_create_mapping_t create_mapping;
iommu_add_device_t add_device;
iommu_remove_device_t remove_device;
};
extern struct iommu_ops iommu_ops_intel;
extern struct iommu_ops iommu_ops_amd;
void iommu_init(void);
void iommu_cleanup(void);
void *iommu_create_domain(vm_paddr_t maxaddr);
void iommu_destroy_domain(void *dom);
void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
size_t len);
void iommu_add_device(void *dom, int bus, int slot, int func);
void iommu_remove_device(void *dom, int bus, int slot, int func);
#endif

449
sys/amd64/vmm/io/ppt.c Normal file
View File

@ -0,0 +1,449 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/pciio.h>
#include <sys/rman.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <machine/resource.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
#include "iommu.h"
#include "ppt.h"
#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1)
#define MAX_MSIMSGS 32
struct pptintr_arg { /* pptintr(pptintr_arg) */
struct pptdev *pptdev;
int msg;
};
static struct pptdev {
device_t dev;
struct vm *vm; /* owner of this device */
struct vm_memory_segment mmio[MAX_MMIOSEGS];
struct {
int num_msgs; /* guest state */
int vector;
int vcpu;
int startrid; /* host state */
struct resource *res[MAX_MSIMSGS];
void *cookie[MAX_MSIMSGS];
struct pptintr_arg arg[MAX_MSIMSGS];
} msi;
} pptdevs[32];
static int num_pptdevs;
static int
ppt_probe(device_t dev)
{
int bus, slot, func;
struct pci_devinfo *dinfo;
dinfo = (struct pci_devinfo *)device_get_ivars(dev);
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
/*
* To qualify as a pci passthrough device a device must:
* - be allowed by administrator to be used in this role
* - be an endpoint device
*/
if (vmm_is_pptdev(bus, slot, func) &&
(dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
return (0);
else
return (ENXIO);
}
static int
ppt_attach(device_t dev)
{
int n;
if (num_pptdevs >= MAX_PPTDEVS) {
printf("ppt_attach: maximum number of pci passthrough devices "
"exceeded\n");
return (ENXIO);
}
n = num_pptdevs++;
pptdevs[n].dev = dev;
if (bootverbose)
device_printf(dev, "attached\n");
return (0);
}
static int
ppt_detach(device_t dev)
{
/*
* XXX check whether there are any pci passthrough devices assigned
* to guests before we allow this driver to detach.
*/
return (0);
}
static device_method_t ppt_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, ppt_probe),
DEVMETHOD(device_attach, ppt_attach),
DEVMETHOD(device_detach, ppt_detach),
{0, 0}
};
static devclass_t ppt_devclass;
DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
static struct pptdev *
ppt_find(int bus, int slot, int func)
{
device_t dev;
int i, b, s, f;
for (i = 0; i < num_pptdevs; i++) {
dev = pptdevs[i].dev;
b = pci_get_bus(dev);
s = pci_get_slot(dev);
f = pci_get_function(dev);
if (bus == b && slot == s && func == f)
return (&pptdevs[i]);
}
return (NULL);
}
static void
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
struct vm_memory_segment *seg;
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
bzero(seg, sizeof(struct vm_memory_segment));
}
}
static void
ppt_teardown_msi(struct pptdev *ppt)
{
int i, rid;
void *cookie;
struct resource *res;
if (ppt->msi.num_msgs == 0)
return;
for (i = 0; i < ppt->msi.num_msgs; i++) {
rid = ppt->msi.startrid + i;
res = ppt->msi.res[i];
cookie = ppt->msi.cookie[i];
if (cookie != NULL)
bus_teardown_intr(ppt->dev, res, cookie);
if (res != NULL)
bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
ppt->msi.res[i] = NULL;
ppt->msi.cookie[i] = NULL;
}
if (ppt->msi.startrid == 1)
pci_release_msi(ppt->dev);
ppt->msi.num_msgs = 0;
}
int
ppt_assign_device(struct vm *vm, int bus, int slot, int func)
{
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);
if (ppt != NULL) {
/*
* If this device is owned by a different VM then we
* cannot change its owner.
*/
if (ppt->vm != NULL && ppt->vm != vm)
return (EBUSY);
ppt->vm = vm;
iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
return (0);
}
return (ENOENT);
}
int
ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
{
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);
if (ppt != NULL) {
/*
* If this device is not owned by this 'vm' then bail out.
*/
if (ppt->vm != vm)
return (EBUSY);
ppt_unmap_mmio(vm, ppt);
ppt_teardown_msi(ppt);
iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
ppt->vm = NULL;
return (0);
}
return (ENOENT);
}
int
ppt_unassign_all(struct vm *vm)
{
int i, bus, slot, func;
device_t dev;
for (i = 0; i < num_pptdevs; i++) {
if (pptdevs[i].vm == vm) {
dev = pptdevs[i].dev;
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
ppt_unassign_device(vm, bus, slot, func);
}
}
return (0);
}
int
ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
int i, error;
struct vm_memory_segment *seg;
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);
if (ppt != NULL) {
if (ppt->vm != vm)
return (EBUSY);
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0) {
error = vm_map_mmio(vm, gpa, len, hpa);
if (error == 0) {
seg->gpa = gpa;
seg->len = len;
seg->hpa = hpa;
}
return (error);
}
}
return (ENOSPC);
}
return (ENOENT);
}
static int
pptintr(void *arg)
{
int vec;
struct pptdev *ppt;
struct pptintr_arg *pptarg;
pptarg = arg;
ppt = pptarg->pptdev;
vec = ppt->msi.vector + pptarg->msg;
if (ppt->vm != NULL)
(void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec);
else {
/*
* XXX
* This is not expected to happen - panic?
*/
}
/*
* For legacy interrupts give other filters a chance in case
* the interrupt was not generated by the passthrough device.
*/
if (ppt->msi.startrid == 0)
return (FILTER_STRAY);
else
return (FILTER_HANDLED);
}
/*
* XXX
* When we try to free the MSI resource the kernel will bind the thread to
* the host cpu was originally handling the MSI. The function freeing the
* MSI vector (apic_free_vector()) will panic the kernel if the thread
* is already bound to a cpu.
*
* So, we temporarily unbind the vcpu thread before freeing the MSI resource.
*/
static void
PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
{
int pincpu = -1;
vm_get_pinning(vm, vcpu, &pincpu);
if (pincpu >= 0)
vm_set_pinning(vm, vcpu, -1);
ppt_teardown_msi(ppt);
if (pincpu >= 0)
vm_set_pinning(vm, vcpu, pincpu);
}
int
ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
int destcpu, int vector, int numvec)
{
int i, rid, flags;
int msi_count, startrid, error, tmp;
struct pptdev *ppt;
if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
(vector < 0 || vector > 255) ||
(numvec < 0 || numvec > MAX_MSIMSGS))
return (EINVAL);
ppt = ppt_find(bus, slot, func);
if (ppt == NULL)
return (ENOENT);
if (ppt->vm != vm) /* Make sure we own this device */
return (EBUSY);
/* Free any allocated resources */
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
if (numvec == 0) /* nothing more to do */
return (0);
flags = RF_ACTIVE;
msi_count = pci_msi_count(ppt->dev);
if (msi_count == 0) {
startrid = 0; /* legacy interrupt */
msi_count = 1;
flags |= RF_SHAREABLE;
} else
startrid = 1; /* MSI */
/*
* The device must be capable of supporting the number of vectors
* the guest wants to allocate.
*/
if (numvec > msi_count)
return (EINVAL);
/*
* Make sure that we can allocate all the MSI vectors that are needed
* by the guest.
*/
if (startrid == 1) {
tmp = numvec;
error = pci_alloc_msi(ppt->dev, &tmp);
if (error)
return (error);
else if (tmp != numvec) {
pci_release_msi(ppt->dev);
return (ENOSPC);
} else {
/* success */
}
}
ppt->msi.vector = vector;
ppt->msi.vcpu = destcpu;
ppt->msi.startrid = startrid;
/*
* Allocate the irq resource and attach it to the interrupt handler.
*/
for (i = 0; i < numvec; i++) {
ppt->msi.num_msgs = i + 1;
ppt->msi.cookie[i] = NULL;
rid = startrid + i;
ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
&rid, flags);
if (ppt->msi.res[i] == NULL)
break;
ppt->msi.arg[i].pptdev = ppt;
ppt->msi.arg[i].msg = i;
error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
INTR_TYPE_NET | INTR_MPSAFE,
pptintr, NULL, &ppt->msi.arg[i],
&ppt->msi.cookie[i]);
if (error != 0)
break;
}
if (i < numvec) {
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
return (ENXIO);
}
return (0);
}

40
sys/amd64/vmm/io/ppt.h Normal file
View File

@ -0,0 +1,40 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _IO_PPT_H_
#define _IO_PPT_H_
int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
int ppt_unassign_all(struct vm *vm);
int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
int destcpu, int vector, int numvec);
#endif

270
sys/amd64/vmm/io/vdev.c Normal file
View File

@ -0,0 +1,270 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include "vdev.h"
struct vdev {
SLIST_ENTRY(vdev) entry;
struct vdev_ops *ops;
void *dev;
};
static SLIST_HEAD(, vdev) vdev_head;
static int vdev_count;
struct vdev_region {
SLIST_ENTRY(vdev_region) entry;
struct vdev_ops *ops;
void *dev;
struct io_region *io;
};
static SLIST_HEAD(, vdev_region) region_head;
static int region_count;
static MALLOC_DEFINE(M_VDEV, "vdev", "vdev");
#define VDEV_INIT (0)
#define VDEV_RESET (1)
#define VDEV_HALT (2)
// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"};
static int
vdev_system_event(int event)
{
struct vdev *vd;
int rc;
// TODO: locking
SLIST_FOREACH(vd, &vdev_head, entry) {
// printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name);
switch (event) {
case VDEV_INIT:
rc = vd->ops->init(vd->dev);
break;
case VDEV_RESET:
rc = vd->ops->reset(vd->dev);
break;
case VDEV_HALT:
rc = vd->ops->halt(vd->dev);
break;
default:
break;
}
if (rc) {
printf("vdev %s init failed rc=%d\n",
vd->ops->name, rc);
return rc;
}
}
return 0;
}
int
vdev_init(void)
{
return vdev_system_event(VDEV_INIT);
}
int
vdev_reset(void)
{
return vdev_system_event(VDEV_RESET);
}
int
vdev_halt(void)
{
return vdev_system_event(VDEV_HALT);
}
void
vdev_vm_init(void)
{
SLIST_INIT(&vdev_head);
vdev_count = 0;
SLIST_INIT(&region_head);
region_count = 0;
}
void
vdev_vm_cleanup(void)
{
struct vdev *vd;
// TODO: locking
while (!SLIST_EMPTY(&vdev_head)) {
vd = SLIST_FIRST(&vdev_head);
SLIST_REMOVE_HEAD(&vdev_head, entry);
free(vd, M_VDEV);
vdev_count--;
}
}
int
vdev_register(struct vdev_ops *ops, void *dev)
{
struct vdev *vd;
vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO);
vd->ops = ops;
vd->dev = dev;
// TODO: locking
SLIST_INSERT_HEAD(&vdev_head, vd, entry);
vdev_count++;
return 0;
}
void
vdev_unregister(void *dev)
{
struct vdev *vd, *found;
found = NULL;
// TODO: locking
SLIST_FOREACH(vd, &vdev_head, entry) {
if (vd->dev == dev) {
found = vd;
}
}
if (found) {
SLIST_REMOVE(&vdev_head, found, vdev, entry);
free(found, M_VDEV);
}
}
#define IN_RANGE(val, start, end) \
(((val) >= (start)) && ((val) < (end)))
static struct vdev_region*
vdev_find_region(struct io_region *io, void *dev)
{
struct vdev_region *region, *found;
uint64_t region_base;
uint64_t region_end;
found = NULL;
// TODO: locking
// FIXME: we should verify we are in the context the current
// vcpu here as well.
SLIST_FOREACH(region, &region_head, entry) {
region_base = region->io->base;
region_end = region_base + region->io->len;
if (IN_RANGE(io->base, region_base, region_end) &&
IN_RANGE(io->base+io->len, region_base, region_end+1) &&
(dev && dev == region->dev)) {
found = region;
break;
}
}
return found;
}
int
vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io)
{
struct vdev_region *region;
region = vdev_find_region(io, dev);
if (region) {
return -EEXIST;
}
region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO);
region->io = io;
region->ops = ops;
region->dev = dev;
// TODO: locking
SLIST_INSERT_HEAD(&region_head, region, entry);
region_count++;
return 0;
}
void
vdev_unregister_region(void *dev, struct io_region *io)
{
struct vdev_region *region;
region = vdev_find_region(io, dev);
if (region) {
SLIST_REMOVE(&region_head, region, vdev_region, entry);
free(region, M_VDEV);
region_count--;
}
}
static int
vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read)
{
struct vdev_region *region;
struct io_region io;
region_attr_t attr;
int rc;
io.base = gpa;
io.len = size;
region = vdev_find_region(&io, NULL);
if (!region)
return -EINVAL;
attr = (read) ? MMIO_READ : MMIO_WRITE;
if (!(region->io->attr & attr))
return -EPERM;
if (read)
rc = region->ops->memread(region->dev, gpa, size, data);
else
rc = region->ops->memwrite(region->dev, gpa, size, *data);
return rc;
}
int
vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data)
{
return vdev_memrw(gpa, size, data, 1);
}
int
vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data)
{
return vdev_memrw(gpa, size, &data, 0);
}

84
sys/amd64/vmm/io/vdev.h Normal file
View File

@ -0,0 +1,84 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VDEV_H_
#define _VDEV_H_
typedef enum {
BYTE = 1,
WORD = 2,
DWORD = 4,
QWORD = 8,
} opsize_t;
typedef enum {
MMIO_READ = 1,
MMIO_WRITE = 2,
} region_attr_t;
struct io_region {
uint64_t base;
uint64_t len;
region_attr_t attr;
int vcpu;
};
typedef int (*vdev_init_t)(void* dev);
typedef int (*vdev_reset_t)(void* dev);
typedef int (*vdev_halt_t)(void* dev);
typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data);
typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data);
struct vdev_ops {
const char *name;
vdev_init_t init;
vdev_reset_t reset;
vdev_halt_t halt;
vdev_memread_t memread;
vdev_memwrite_t memwrite;
};
void vdev_vm_init(void);
void vdev_vm_cleanup(void);
int vdev_register(struct vdev_ops *ops, void *dev);
void vdev_unregister(void *dev);
int vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io);
void vdev_unregister_region(void *dev, struct io_region *io);
int vdev_init(void);
int vdev_reset(void);
int vdev_halt(void);
int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data);
int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data);
#endif /* _VDEV_H_ */

812
sys/amd64/vmm/io/vlapic.c Normal file
View File

@ -0,0 +1,812 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <machine/clock.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
#include "vdev.h"
#include "vlapic.h"
#define VLAPIC_CTR0(vlapic, format) \
VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
#define VLAPIC_CTR1(vlapic, format, p1) \
VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
#define VLAPIC_CTR_IRR(vlapic, msg) \
do { \
uint32_t *irrptr = &(vlapic)->apic.irr0; \
irrptr[0] = irrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \
} while (0)
#define VLAPIC_CTR_ISR(vlapic, msg) \
do { \
uint32_t *isrptr = &(vlapic)->apic.isr0; \
isrptr[0] = isrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
} while (0)
static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
#define PRIO(x) ((x) >> 4)
#define VLAPIC_VERSION (16)
#define VLAPIC_MAXLVT_ENTRIES (5)
struct vlapic {
struct vm *vm;
int vcpuid;
struct io_region *mmio;
struct vdev_ops *ops;
struct LAPIC apic;
int esr_update;
int divisor;
int ccr_ticks;
/*
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
* A vector is popped from the stack when the processor does an EOI.
* The vector on the top of the stack is used to compute the
* Processor Priority in conjunction with the TPR.
*/
uint8_t isrvec_stk[ISRVEC_STK_SIZE];
int isrvec_stk_top;
};
static void
vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
{
int i;
for (i = 0; i < num_lvt; i++) {
*lvts |= APIC_LVT_M;
lvts += 4;
}
}
#if 0
static inline void
vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
{
printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
*lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
*lvt & APIC_LVTT_M);
}
#endif
static uint64_t
vlapic_get_ccr(struct vlapic *vlapic)
{
struct LAPIC *lapic = &vlapic->apic;
return lapic->ccr_timer;
}
static void
vlapic_update_errors(struct vlapic *vlapic)
{
struct LAPIC *lapic = &vlapic->apic;
lapic->esr = 0; // XXX
}
static void
vlapic_init_ipi(struct vlapic *vlapic)
{
struct LAPIC *lapic = &vlapic->apic;
lapic->version = VLAPIC_VERSION;
lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
lapic->dfr = 0xffffffff;
lapic->svr = APIC_SVR_VECTOR;
vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
}
static int
vlapic_op_reset(void* dev)
{
struct vlapic *vlapic = (struct vlapic*)dev;
struct LAPIC *lapic = &vlapic->apic;
memset(lapic, 0, sizeof(*lapic));
lapic->id = vlapic->vcpuid << 24;
lapic->apr = vlapic->vcpuid;
vlapic_init_ipi(vlapic);
return 0;
}
static int
vlapic_op_init(void* dev)
{
struct vlapic *vlapic = (struct vlapic*)dev;
vdev_register_region(vlapic->ops, vlapic, vlapic->mmio);
return vlapic_op_reset(dev);
}
static int
vlapic_op_halt(void* dev)
{
struct vlapic *vlapic = (struct vlapic*)dev;
vdev_unregister_region(vlapic, vlapic->mmio);
return 0;
}
void
vlapic_set_intr_ready(struct vlapic *vlapic, int vector)
{
struct LAPIC *lapic = &vlapic->apic;
uint32_t *irrptr;
int idx;
if (vector < 0 || vector >= 256)
panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
idx = (vector / 32) * 4;
irrptr = &lapic->irr0;
atomic_set_int(&irrptr[idx], 1 << (vector % 32));
VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
}
#define VLAPIC_BUS_FREQ tsc_freq
#define VLAPIC_DCR(x) ((x->dcr_timer & 0x8) >> 1)|(x->dcr_timer & 0x3)
static int
vlapic_timer_divisor(uint32_t dcr)
{
switch (dcr & 0xB) {
case APIC_TDCR_2:
return (2);
case APIC_TDCR_4:
return (4);
case APIC_TDCR_8:
return (8);
case APIC_TDCR_16:
return (16);
case APIC_TDCR_32:
return (32);
case APIC_TDCR_64:
return (64);
case APIC_TDCR_128:
return (128);
default:
panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
}
}
static void
vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
{
uint32_t icr_timer;
icr_timer = vlapic->apic.icr_timer;
vlapic->ccr_ticks = ticks;
if (elapsed < icr_timer)
vlapic->apic.ccr_timer = icr_timer - elapsed;
else {
/*
* This can happen when the guest is trying to run its local
* apic timer higher that the setting of 'hz' in the host.
*
* We deal with this by running the guest local apic timer
* at the rate of the host's 'hz' setting.
*/
vlapic->apic.ccr_timer = 0;
}
}
static __inline uint32_t *
vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
{
struct LAPIC *lapic = &vlapic->apic;
int i;
if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
panic("vlapic_get_lvt: invalid LVT\n");
}
i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
return ((&lapic->lvt_timer) + i);;
}
#if 1
static void
dump_isrvec_stk(struct vlapic *vlapic)
{
int i;
uint32_t *isrptr;
isrptr = &vlapic->apic.isr0;
for (i = 0; i < 8; i++)
printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
for (i = 0; i <= vlapic->isrvec_stk_top; i++)
printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
}
#endif
/*
* Algorithm adopted from section "Interrupt, Task and Processor Priority"
* in Intel Architecture Manual Vol 3a.
*/
static void
vlapic_update_ppr(struct vlapic *vlapic)
{
int isrvec, tpr, ppr;
/*
* Note that the value on the stack at index 0 is always 0.
*
* This is a placeholder for the value of ISRV when none of the
* bits is set in the ISRx registers.
*/
isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
tpr = vlapic->apic.tpr;
#if 1
{
int i, lastprio, curprio, vector, idx;
uint32_t *isrptr;
if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
panic("isrvec_stk is corrupted: %d", isrvec);
/*
* Make sure that the priority of the nested interrupts is
* always increasing.
*/
lastprio = -1;
for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
curprio = PRIO(vlapic->isrvec_stk[i]);
if (curprio <= lastprio) {
dump_isrvec_stk(vlapic);
panic("isrvec_stk does not satisfy invariant");
}
lastprio = curprio;
}
/*
* Make sure that each bit set in the ISRx registers has a
* corresponding entry on the isrvec stack.
*/
i = 1;
isrptr = &vlapic->apic.isr0;
for (vector = 0; vector < 256; vector++) {
idx = (vector / 32) * 4;
if (isrptr[idx] & (1 << (vector % 32))) {
if (i > vlapic->isrvec_stk_top ||
vlapic->isrvec_stk[i] != vector) {
dump_isrvec_stk(vlapic);
panic("ISR and isrvec_stk out of sync");
}
i++;
}
}
}
#endif
if (PRIO(tpr) >= PRIO(isrvec))
ppr = tpr;
else
ppr = isrvec & 0xf0;
vlapic->apic.ppr = ppr;
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
}
static void
vlapic_process_eoi(struct vlapic *vlapic)
{
struct LAPIC *lapic = &vlapic->apic;
uint32_t *isrptr;
int i, idx, bitpos;
isrptr = &lapic->isr0;
/*
* The x86 architecture reserves the the first 32 vectors for use
* by the processor.
*/
for (i = 7; i > 0; i--) {
idx = i * 4;
bitpos = fls(isrptr[idx]);
if (bitpos != 0) {
if (vlapic->isrvec_stk_top <= 0) {
panic("invalid vlapic isrvec_stk_top %d",
vlapic->isrvec_stk_top);
}
isrptr[idx] &= ~(1 << (bitpos - 1));
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
vlapic->isrvec_stk_top--;
vlapic_update_ppr(vlapic);
return;
}
}
}
static __inline int
vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
{
return (*lvt & mask);
}
static __inline int
vlapic_periodic_timer(struct vlapic *vlapic)
{
uint32_t *lvt;
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
}
static void
vlapic_fire_timer(struct vlapic *vlapic)
{
int vector;
uint32_t *lvt;
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
vlapic_set_intr_ready(vlapic, vector);
}
}
static int
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
{
int i;
cpumask_t dmask, thiscpumask;
uint32_t dest, vec, mode;
thiscpumask = vcpu_mask(vlapic->vcpuid);
dmask = 0;
dest = icrval >> 32;
vec = icrval & APIC_VECTOR_MASK;
mode = icrval & APIC_DELMODE_MASK;
if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
switch (icrval & APIC_DEST_MASK) {
case APIC_DEST_DESTFLD:
dmask = vcpu_mask(dest);
break;
case APIC_DEST_SELF:
dmask = thiscpumask;
break;
case APIC_DEST_ALLISELF:
dmask = vm_active_cpus(vlapic->vm);
break;
case APIC_DEST_ALLESELF:
dmask = vm_active_cpus(vlapic->vm) & ~thiscpumask;
break;
}
for (i = 0; i < VM_MAXCPU; i++) {
if (dmask & vcpu_mask(i)) {
if (mode == APIC_DELMODE_FIXED)
lapic_set_intr(vlapic->vm, i, vec);
else
vm_inject_nmi(vlapic->vm, i);
}
}
return (0); /* handled completely in the kernel */
}
/*
* XXX this assumes that the startup IPI always succeeds
*/
if (mode == APIC_DELMODE_STARTUP)
vm_activate_cpu(vlapic->vm, dest);
/*
* This will cause a return to userland.
*/
return (1);
}
int
vlapic_pending_intr(struct vlapic *vlapic)
{
struct LAPIC *lapic = &vlapic->apic;
int idx, i, bitpos, vector;
uint32_t *irrptr, val;
irrptr = &lapic->irr0;
/*
* The x86 architecture reserves the the first 32 vectors for use
* by the processor.
*/
for (i = 7; i > 0; i--) {
idx = i * 4;
val = atomic_load_acq_int(&irrptr[idx]);
bitpos = fls(val);
if (bitpos != 0) {
vector = i * 32 + (bitpos - 1);
if (PRIO(vector) > PRIO(lapic->ppr)) {
VLAPIC_CTR1(vlapic, "pending intr %d", vector);
return (vector);
} else
break;
}
}
VLAPIC_CTR0(vlapic, "no pending intr");
return (-1);
}
void
vlapic_intr_accepted(struct vlapic *vlapic, int vector)
{
struct LAPIC *lapic = &vlapic->apic;
uint32_t *irrptr, *isrptr;
int idx, stk_top;
/*
* clear the ready bit for vector being accepted in irr
* and set the vector as in service in isr.
*/
idx = (vector / 32) * 4;
irrptr = &lapic->irr0;
atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
isrptr = &lapic->isr0;
isrptr[idx] |= 1 << (vector % 32);
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
/*
* Update the PPR
*/
vlapic->isrvec_stk_top++;
stk_top = vlapic->isrvec_stk_top;
if (stk_top >= ISRVEC_STK_SIZE)
panic("isrvec_stk_top overflow %d", stk_top);
vlapic->isrvec_stk[stk_top] = vector;
vlapic_update_ppr(vlapic);
}
int
vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data)
{
struct vlapic *vlapic = (struct vlapic*)dev;
struct LAPIC *lapic = &vlapic->apic;
uint64_t offset = gpa & ~(PAGE_SIZE);
uint32_t *reg;
int i;
if (offset > sizeof(*lapic)) {
*data = 0;
return 0;
}
offset &= ~3;
switch(offset)
{
case APIC_OFFSET_ID:
*data = lapic->id;
break;
case APIC_OFFSET_VER:
*data = lapic->version;
break;
case APIC_OFFSET_TPR:
*data = lapic->tpr;
break;
case APIC_OFFSET_APR:
*data = lapic->apr;
break;
case APIC_OFFSET_PPR:
*data = lapic->ppr;
break;
case APIC_OFFSET_EOI:
*data = lapic->eoi;
break;
case APIC_OFFSET_LDR:
*data = lapic->ldr;
break;
case APIC_OFFSET_DFR:
*data = lapic->dfr;
break;
case APIC_OFFSET_SVR:
*data = lapic->svr;
break;
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
i = (offset - APIC_OFFSET_ISR0) >> 2;
reg = &lapic->isr0;
*data = *(reg + i);
break;
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
i = (offset - APIC_OFFSET_TMR0) >> 2;
reg = &lapic->tmr0;
*data = *(reg + i);
break;
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
i = (offset - APIC_OFFSET_IRR0) >> 2;
reg = &lapic->irr0;
*data = atomic_load_acq_int(reg + i);
break;
case APIC_OFFSET_ESR:
*data = lapic->esr;
break;
case APIC_OFFSET_ICR_LOW:
*data = lapic->icr_lo;
break;
case APIC_OFFSET_ICR_HI:
*data = lapic->icr_hi;
break;
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
reg = vlapic_get_lvt(vlapic, offset);
*data = *(reg);
break;
case APIC_OFFSET_ICR:
*data = lapic->icr_timer;
break;
case APIC_OFFSET_CCR:
*data = vlapic_get_ccr(vlapic);
break;
case APIC_OFFSET_DCR:
*data = lapic->dcr_timer;
break;
case APIC_OFFSET_RRR:
default:
*data = 0;
break;
}
return 0;
}
int
vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
{
struct vlapic *vlapic = (struct vlapic*)dev;
struct LAPIC *lapic = &vlapic->apic;
uint64_t offset = gpa & ~(PAGE_SIZE);
uint32_t *reg;
int retval;
if (offset > sizeof(*lapic)) {
return 0;
}
retval = 0;
offset &= ~3;
switch(offset)
{
case APIC_OFFSET_ID:
lapic->id = data;
break;
case APIC_OFFSET_TPR:
lapic->tpr = data & 0xff;
vlapic_update_ppr(vlapic);
break;
case APIC_OFFSET_EOI:
vlapic_process_eoi(vlapic);
break;
case APIC_OFFSET_LDR:
break;
case APIC_OFFSET_DFR:
break;
case APIC_OFFSET_SVR:
lapic->svr = data;
break;
case APIC_OFFSET_ICR_LOW:
retval = lapic_process_icr(vlapic, data);
break;
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
reg = vlapic_get_lvt(vlapic, offset);
if (!(lapic->svr & APIC_SVR_ENABLE)) {
data |= APIC_LVT_M;
}
*reg = data;
// vlapic_dump_lvt(offset, reg);
break;
case APIC_OFFSET_ICR:
lapic->icr_timer = data;
vlapic_start_timer(vlapic, 0);
break;
case APIC_OFFSET_DCR:
lapic->dcr_timer = data;
vlapic->divisor = vlapic_timer_divisor(data);
break;
case APIC_OFFSET_ESR:
vlapic_update_errors(vlapic);
break;
case APIC_OFFSET_VER:
case APIC_OFFSET_APR:
case APIC_OFFSET_PPR:
case APIC_OFFSET_RRR:
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
case APIC_OFFSET_CCR:
default:
// Read only.
break;
}
return (retval);
}
void
vlapic_timer_tick(struct vlapic *vlapic)
{
int curticks, delta, periodic;
uint32_t ccr;
uint32_t decrement, remainder;
curticks = ticks;
/* Common case */
delta = curticks - vlapic->ccr_ticks;
if (delta == 0)
return;
/* Local APIC timer is disabled */
if (vlapic->apic.icr_timer == 0)
return;
/* One-shot mode and timer has already counted down to zero */
periodic = vlapic_periodic_timer(vlapic);
if (!periodic && vlapic->apic.ccr_timer == 0)
return;
/*
* The 'curticks' and 'ccr_ticks' are out of sync by more than
* 2^31 ticks. We deal with this by restarting the timer.
*/
if (delta < 0) {
vlapic_start_timer(vlapic, 0);
return;
}
ccr = vlapic->apic.ccr_timer;
decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
while (delta-- > 0) {
if (ccr <= decrement) {
remainder = decrement - ccr;
vlapic_fire_timer(vlapic);
if (periodic) {
vlapic_start_timer(vlapic, remainder);
ccr = vlapic->apic.ccr_timer;
} else {
/*
* One-shot timer has counted down to zero.
*/
ccr = 0;
break;
}
} else
ccr -= decrement;
}
vlapic->ccr_ticks = curticks;
vlapic->apic.ccr_timer = ccr;
}
struct vdev_ops vlapic_dev_ops = {
.name = "vlapic",
.init = vlapic_op_init,
.reset = vlapic_op_reset,
.halt = vlapic_op_halt,
.memread = vlapic_op_mem_read,
.memwrite = vlapic_op_mem_write,
};
static struct io_region vlapic_mmio[VM_MAXCPU];
struct vlapic *
vlapic_init(struct vm *vm, int vcpuid)
{
struct vlapic *vlapic;
vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
vlapic->vm = vm;
vlapic->vcpuid = vcpuid;
vlapic->ops = &vlapic_dev_ops;
vlapic->mmio = vlapic_mmio + vcpuid;
vlapic->mmio->base = DEFAULT_APIC_BASE;
vlapic->mmio->len = PAGE_SIZE;
vlapic->mmio->attr = MMIO_READ|MMIO_WRITE;
vlapic->mmio->vcpu = vcpuid;
vdev_register(&vlapic_dev_ops, vlapic);
vlapic_op_init(vlapic);
return (vlapic);
}
void
vlapic_cleanup(struct vlapic *vlapic)
{
vdev_unregister(vlapic);
free(vlapic, M_VLAPIC);
}

105
sys/amd64/vmm/io/vlapic.h Normal file
View File

@ -0,0 +1,105 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VLAPIC_H_
#define _VLAPIC_H_
#include "vdev.h"
struct vm;
/*
* Map of APIC Registers: Offset Description Access
*/
#define APIC_OFFSET_ID 0x20 // Local APIC ID R/W
#define APIC_OFFSET_VER 0x30 // Local APIC Version R
#define APIC_OFFSET_TPR 0x80 // Task Priority Register R/W
#define APIC_OFFSET_APR 0x90 // Arbitration Priority Register R
#define APIC_OFFSET_PPR 0xA0 // Processor Priority Register R
#define APIC_OFFSET_EOI 0xB0 // EOI Register W
#define APIC_OFFSET_RRR 0xC0 // Remote read R
#define APIC_OFFSET_LDR 0xD0 // Logical Destination R/W
#define APIC_OFFSET_DFR 0xE0 // Destination Format Register 0..27 R; 28..31 R/W
#define APIC_OFFSET_SVR 0xF0 // Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W
#define APIC_OFFSET_ISR0 0x100 // ISR 000-031 R
#define APIC_OFFSET_ISR1 0x110 // ISR 032-063 R
#define APIC_OFFSET_ISR2 0x120 // ISR 064-095 R
#define APIC_OFFSET_ISR3 0x130 // ISR 095-128 R
#define APIC_OFFSET_ISR4 0x140 // ISR 128-159 R
#define APIC_OFFSET_ISR5 0x150 // ISR 160-191 R
#define APIC_OFFSET_ISR6 0x160 // ISR 192-223 R
#define APIC_OFFSET_ISR7 0x170 // ISR 224-255 R
#define APIC_OFFSET_TMR0 0x180 // TMR 000-031 R
#define APIC_OFFSET_TMR1 0x190 // TMR 032-063 R
#define APIC_OFFSET_TMR2 0x1A0 // TMR 064-095 R
#define APIC_OFFSET_TMR3 0x1B0 // TMR 095-128 R
#define APIC_OFFSET_TMR4 0x1C0 // TMR 128-159 R
#define APIC_OFFSET_TMR5 0x1D0 // TMR 160-191 R
#define APIC_OFFSET_TMR6 0x1E0 // TMR 192-223 R
#define APIC_OFFSET_TMR7 0x1F0 // TMR 224-255 R
#define APIC_OFFSET_IRR0 0x200 // IRR 000-031 R
#define APIC_OFFSET_IRR1 0x210 // IRR 032-063 R
#define APIC_OFFSET_IRR2 0x220 // IRR 064-095 R
#define APIC_OFFSET_IRR3 0x230 // IRR 095-128 R
#define APIC_OFFSET_IRR4 0x240 // IRR 128-159 R
#define APIC_OFFSET_IRR5 0x250 // IRR 160-191 R
#define APIC_OFFSET_IRR6 0x260 // IRR 192-223 R
#define APIC_OFFSET_IRR7 0x270 // IRR 224-255 R
#define APIC_OFFSET_ESR 0x280 // Error Status Register R
#define APIC_OFFSET_ICR_LOW 0x300 // Interrupt Command Reg. (0-31) R/W
#define APIC_OFFSET_ICR_HI 0x310 // Interrupt Command Reg. (32-63) R/W
#define APIC_OFFSET_TIMER_LVT 0x320 // Local Vector Table (Timer) R/W
#define APIC_OFFSET_THERM_LVT 0x330 // Local Vector Table (Thermal) R/W (PIV+)
#define APIC_OFFSET_PERF_LVT 0x340 // Local Vector Table (Performance) R/W (P6+)
#define APIC_OFFSET_LINT0_LVT 0x350 // Local Vector Table (LINT0) R/W
#define APIC_OFFSET_LINT1_LVT 0x360 // Local Vector Table (LINT1) R/W
#define APIC_OFFSET_ERROR_LVT 0x370 // Local Vector Table (ERROR) R/W
#define APIC_OFFSET_ICR 0x380 // Initial Count Reg. for Timer R/W
#define APIC_OFFSET_CCR 0x390 // Current Count of Timer R
#define APIC_OFFSET_DCR 0x3E0 // Timer Divide Configuration Reg. R/W
/*
* 16 priority levels with at most one vector injected per level.
*/
#define ISRVEC_STK_SIZE (16 + 1)
struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
void vlapic_cleanup(struct vlapic *vlapic);
int vlapic_op_mem_write(void* dev, uint64_t gpa,
opsize_t size, uint64_t data);
int vlapic_op_mem_read(void* dev, uint64_t gpa,
opsize_t size, uint64_t *data);
int vlapic_pending_intr(struct vlapic *vlapic);
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector);
void vlapic_timer_tick(struct vlapic *vlapic);
#endif /* _VLAPIC_H_ */

738
sys/amd64/vmm/vmm.c Normal file
View File

@ -0,0 +1,738 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <machine/vm.h>
#include <machine/pcb.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
#include "vmm_mem.h"
#include "vmm_util.h"
#include <machine/vmm_dev.h>
#include "vlapic.h"
#include "vmm_msr.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "io/ppt.h"
#include "io/iommu.h"
struct vlapic;
struct vcpu {
int flags;
int pincpu; /* host cpuid this vcpu is bound to */
int hostcpu; /* host cpuid this vcpu last ran on */
uint64_t guest_msrs[VMM_MSR_NUM];
struct vlapic *vlapic;
int vcpuid;
struct savefpu savefpu; /* guest fpu state */
void *stats;
};
#define VCPU_F_PINNED 0x0001
#define VCPU_F_RUNNING 0x0002
#define VCPU_PINCPU(vm, vcpuid) \
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
#define VCPU_PIN(vm, vcpuid, host_cpuid) \
do { \
vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
vm->vcpu[vcpuid].pincpu = host_cpuid; \
} while(0)
#define VM_MAX_MEMORY_SEGMENTS 2
struct vm {
void *cookie; /* processor-specific data */
void *iommu; /* iommu-specific data */
struct vcpu vcpu[VM_MAXCPU];
int num_mem_segs;
struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
char name[VM_MAX_NAMELEN];
/*
* Mask of active vcpus.
* An active vcpu is one that has been started implicitly (BSP) or
* explicitly (AP) by sending it a startup ipi.
*/
cpumask_t active_cpus;
};
static struct vmm_ops *ops;
#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
#define VMRUN(vmi, vcpu, rip, vmexit) \
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, vmexit) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
#define VMMMAP(vmi, gpa, hpa, len, attr, prot, spm) \
(ops != NULL ? (*ops->vmmmap)(vmi, gpa, hpa, len, attr, prot, spm) : ENXIO)
#define VMGETREG(vmi, vcpu, num, retval) \
(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
#define VMSETREG(vmi, vcpu, num, val) \
(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
#define VMGETDESC(vmi, vcpu, num, desc) \
(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
#define VMSETDESC(vmi, vcpu, num, desc) \
(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \
(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
#define VMNMI(vmi, vcpu) \
(ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
#define VMGETCAP(vmi, vcpu, num, retval) \
(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
#define VMSETCAP(vmi, vcpu, num, val) \
(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define fpu_start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define fpu_stop_emulating() __asm("clts")
static MALLOC_DEFINE(M_VM, "vm", "vm");
CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
/* statistics */
static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
static void
vcpu_cleanup(struct vcpu *vcpu)
{
vlapic_cleanup(vcpu->vlapic);
vmm_stat_free(vcpu->stats);
}
static void
vcpu_init(struct vm *vm, uint32_t vcpu_id)
{
struct vcpu *vcpu;
vcpu = &vm->vcpu[vcpu_id];
vcpu->hostcpu = -1;
vcpu->vcpuid = vcpu_id;
vcpu->vlapic = vlapic_init(vm, vcpu_id);
fpugetregs(curthread);
vcpu->savefpu = curthread->td_pcb->pcb_user_save;
vcpu->stats = vmm_stat_alloc();
}
static int
vmm_init(void)
{
int error;
vmm_ipi_init();
error = vmm_mem_init();
if (error)
return (error);
if (vmm_is_intel())
ops = &vmm_ops_intel;
else if (vmm_is_amd())
ops = &vmm_ops_amd;
else
return (ENXIO);
vmm_msr_init();
return (VMM_INIT());
}
static int
vmm_handler(module_t mod, int what, void *arg)
{
int error;
switch (what) {
case MOD_LOAD:
vmmdev_init();
iommu_init();
error = vmm_init();
break;
case MOD_UNLOAD:
vmmdev_cleanup();
iommu_cleanup();
vmm_ipi_cleanup();
error = VMM_CLEANUP();
break;
default:
error = 0;
break;
}
return (error);
}
static moduledata_t vmm_kmod = {
"vmm",
vmm_handler,
NULL
};
/*
* Execute the module load handler after the pci passthru driver has had
* a chance to claim devices. We need this information at the time we do
* iommu initialization.
*/
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
MODULE_VERSION(vmm, 1);
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
struct vm *
vm_create(const char *name)
{
int i;
struct vm *vm;
vm_paddr_t maxaddr;
const int BSP = 0;
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
return (NULL);
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->cookie = VMINIT(vm);
for (i = 0; i < VM_MAXCPU; i++) {
vcpu_init(vm, i);
guest_msrs_init(vm, i);
}
maxaddr = vmm_mem_maxaddr();
vm->iommu = iommu_create_domain(maxaddr);
vm_activate_cpu(vm, BSP);
return (vm);
}
void
vm_destroy(struct vm *vm)
{
int i;
ppt_unassign_all(vm);
for (i = 0; i < vm->num_mem_segs; i++)
vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
for (i = 0; i < VM_MAXCPU; i++)
vcpu_cleanup(&vm->vcpu[i]);
iommu_destroy_domain(vm->iommu);
VMCLEANUP(vm->cookie);
free(vm, M_VM);
}
const char *
vm_name(struct vm *vm)
{
return (vm->name);
}
int
vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
const boolean_t spok = TRUE; /* superpage mappings are ok */
return (VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
VM_PROT_RW, spok));
}
int
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
{
const boolean_t spok = TRUE; /* superpage mappings are ok */
return (VMMMAP(vm->cookie, gpa, 0, len, VM_MEMATTR_UNCACHEABLE,
VM_PROT_NONE, spok));
}
int
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa)
{
int error;
vm_paddr_t hpa;
const boolean_t spok = TRUE; /* superpage mappings are ok */
/*
* find the hpa if already it was already vm_malloc'd.
*/
hpa = vm_gpa2hpa(vm, gpa, len);
if (hpa != ((vm_paddr_t)-1))
goto out;
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
return (E2BIG);
hpa = vmm_mem_alloc(len);
if (hpa == 0)
return (ENOMEM);
error = VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
VM_PROT_ALL, spok);
if (error) {
vmm_mem_free(hpa, len);
return (error);
}
iommu_create_mapping(vm->iommu, gpa, hpa, len);
vm->mem_segs[vm->num_mem_segs].gpa = gpa;
vm->mem_segs[vm->num_mem_segs].hpa = hpa;
vm->mem_segs[vm->num_mem_segs].len = len;
vm->num_mem_segs++;
out:
*ret_hpa = hpa;
return (0);
}
vm_paddr_t
vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
{
int i;
vm_paddr_t gpabase, gpalimit, hpabase;
for (i = 0; i < vm->num_mem_segs; i++) {
hpabase = vm->mem_segs[i].hpa;
gpabase = vm->mem_segs[i].gpa;
gpalimit = gpabase + vm->mem_segs[i].len;
if (gpa >= gpabase && gpa + len <= gpalimit)
return ((gpa - gpabase) + hpabase);
}
return ((vm_paddr_t)-1);
}
int
vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
struct vm_memory_segment *seg)
{
int i;
for (i = 0; i < vm->num_mem_segs; i++) {
if (gpabase == vm->mem_segs[i].gpa) {
*seg = vm->mem_segs[i];
return (0);
}
}
return (-1);
}
int
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
return (VMGETREG(vm->cookie, vcpu, reg, retval));
}
int
vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
return (VMSETREG(vm->cookie, vcpu, reg, val));
}
static boolean_t
is_descriptor_table(int reg)
{
switch (reg) {
case VM_REG_GUEST_IDTR:
case VM_REG_GUEST_GDTR:
return (TRUE);
default:
return (FALSE);
}
}
static boolean_t
is_segment_register(int reg)
{
switch (reg) {
case VM_REG_GUEST_ES:
case VM_REG_GUEST_CS:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_TR:
case VM_REG_GUEST_LDTR:
return (TRUE);
default:
return (FALSE);
}
}
int
vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
return (EINVAL);
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
}
int
vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
return (EINVAL);
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
int
vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
*cpuid = VCPU_PINCPU(vm, vcpuid);
return (0);
}
int
vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
{
struct thread *td;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
td = curthread; /* XXXSMP only safe when muxing vcpus */
/* unpin */
if (host_cpuid < 0) {
VCPU_UNPIN(vm, vcpuid);
thread_lock(td);
sched_unbind(td);
thread_unlock(td);
return (0);
}
if (CPU_ABSENT(host_cpuid))
return (EINVAL);
/*
* XXX we should check that 'host_cpuid' has not already been pinned
* by another vm.
*/
thread_lock(td);
sched_bind(td, host_cpuid);
thread_unlock(td);
VCPU_PIN(vm, vcpuid, host_cpuid);
return (0);
}
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
register_t s;
s = intr_disable();
fpu_stop_emulating();
fxrstor(&vcpu->savefpu);
fpu_start_emulating();
intr_restore(s);
}
static void
save_guest_fpustate(struct vcpu *vcpu)
{
register_t s;
s = intr_disable();
fpu_stop_emulating();
fxsave(&vcpu->savefpu);
fpu_start_emulating();
intr_restore(s);
}
int
vm_run(struct vm *vm, struct vm_run *vmrun)
{
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
uint64_t tscval;
vcpuid = vmrun->cpuid;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
critical_enter();
tscval = rdtsc();
pcb = PCPU_GET(curpcb);
set_pcb_flags(pcb, PCB_FULL_IRET);
vcpu->hostcpu = curcpu;
fpuexit(curthread);
restore_guest_msrs(vm, vcpuid);
restore_guest_fpustate(vcpu);
error = VMRUN(vm->cookie, vcpuid, vmrun->rip, &vmrun->vm_exit);
save_guest_fpustate(vcpu);
restore_host_msrs(vm, vcpuid);
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
critical_exit();
return (error);
}
int
vm_inject_event(struct vm *vm, int vcpuid, int type,
int vector, uint32_t code, int code_valid)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
return (EINVAL);
if (vector < 0 || vector > 255)
return (EINVAL);
return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
}
int
vm_inject_nmi(struct vm *vm, int vcpu)
{
int error;
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
error = VMNMI(vm->cookie, vcpu);
vm_interrupt_hostcpu(vm, vcpu);
return (error);
}
int
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (type < 0 || type >= VM_CAP_MAX)
return (EINVAL);
return (VMGETCAP(vm->cookie, vcpu, type, retval));
}
int
vm_set_capability(struct vm *vm, int vcpu, int type, int val)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (type < 0 || type >= VM_CAP_MAX)
return (EINVAL);
return (VMSETCAP(vm->cookie, vcpu, type, val));
}
uint64_t *
vm_guest_msrs(struct vm *vm, int cpu)
{
return (vm->vcpu[cpu].guest_msrs);
}
struct vlapic *
vm_lapic(struct vm *vm, int cpu)
{
return (vm->vcpu[cpu].vlapic);
}
boolean_t
vmm_is_pptdev(int bus, int slot, int func)
{
int found, b, s, f, n;
char *val, *cp, *cp2;
/*
* setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
*/
found = 0;
cp = val = getenv("pptdevs");
while (cp != NULL && *cp != '\0') {
if ((cp2 = strchr(cp, ' ')) != NULL)
*cp2 = '\0';
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
if (n == 3 && bus == b && slot == s && func == f) {
found = 1;
break;
}
if (cp2 != NULL)
*cp2++ = ' ';
cp = cp2;
}
freeenv(val);
return (found);
}
void *
vm_iommu_domain(struct vm *vm)
{
return (vm->iommu);
}
void
vm_set_run_state(struct vm *vm, int vcpuid, int state)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
if (state == VCPU_RUNNING) {
if (vcpu->flags & VCPU_F_RUNNING) {
panic("vm_set_run_state: %s[%d] is already running",
vm_name(vm), vcpuid);
}
vcpu->flags |= VCPU_F_RUNNING;
} else {
if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
panic("vm_set_run_state: %s[%d] is already stopped",
vm_name(vm), vcpuid);
}
vcpu->flags &= ~VCPU_F_RUNNING;
}
}
int
vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
{
int retval, hostcpu;
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
if (vcpu->flags & VCPU_F_RUNNING) {
retval = VCPU_RUNNING;
hostcpu = vcpu->hostcpu;
} else {
retval = VCPU_STOPPED;
hostcpu = -1;
}
if (cpuptr)
*cpuptr = hostcpu;
return (retval);
}
void
vm_activate_cpu(struct vm *vm, int vcpuid)
{
if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
vm->active_cpus |= vcpu_mask(vcpuid);
}
cpumask_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
void *
vcpu_stats(struct vm *vm, int vcpuid)
{
return (vm->vcpu[vcpuid].stats);
}

469
sys/amd64/vmm/vmm_dev.c Normal file
View File

@ -0,0 +1,469 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/sysctl.h>
#include <sys/libkern.h>
#include <sys/ioccom.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "io/ppt.h"
#include <machine/vmm_dev.h>
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
struct cdev *cdev;
SLIST_ENTRY(vmmdev_softc) link;
};
static SLIST_HEAD(, vmmdev_softc) head;
static struct mtx vmmdev_mtx;
static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
static struct vmmdev_softc *
vmmdev_lookup(const char *name)
{
struct vmmdev_softc *sc;
#ifdef notyet /* XXX kernel is not compiled with invariants */
mtx_assert(&vmmdev_mtx, MA_OWNED);
#endif
SLIST_FOREACH(sc, &head, link) {
if (strcmp(name, vm_name(sc->vm)) == 0)
break;
}
return (sc);
}
static struct vmmdev_softc *
vmmdev_lookup2(struct cdev *cdev)
{
struct vmmdev_softc *sc;
#ifdef notyet /* XXX kernel is not compiled with invariants */
mtx_assert(&vmmdev_mtx, MA_OWNED);
#endif
SLIST_FOREACH(sc, &head, link) {
if (sc->cdev == cdev)
break;
}
return (sc);
}
static int
vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
{
int error, off, c;
vm_paddr_t hpa, gpa;
struct vmmdev_softc *sc;
static char zerobuf[PAGE_SIZE];
error = 0;
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup2(cdev);
while (uio->uio_resid > 0 && error == 0) {
gpa = uio->uio_offset;
off = gpa & PAGE_MASK;
c = min(uio->uio_resid, PAGE_SIZE - off);
/*
* The VM has a hole in its physical memory map. If we want to
* use 'dd' to inspect memory beyond the hole we need to
* provide bogus data for memory that lies in the hole.
*
* Since this device does not support lseek(2), dd(1) will
* read(2) blocks of data to simulate the lseek(2).
*/
hpa = vm_gpa2hpa(sc->vm, gpa, c);
if (hpa == (vm_paddr_t)-1) {
if (uio->uio_rw == UIO_READ)
error = uiomove(zerobuf, c, uio);
else
error = EFAULT;
} else
error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
}
mtx_unlock(&vmmdev_mtx);
return (error);
}
static int
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
{
int error, vcpu;
struct vmmdev_softc *sc;
struct vm_memory_segment *seg;
struct vm_register *vmreg;
struct vm_seg_desc* vmsegdesc;
struct vm_pin *vmpin;
struct vm_run *vmrun;
struct vm_event *vmevent;
struct vm_lapic_irq *vmirq;
struct vm_capability *vmcap;
struct vm_pptdev *pptdev;
struct vm_pptdev_mmio *pptmmio;
struct vm_pptdev_msi *pptmsi;
struct vm_nmi *vmnmi;
struct vm_stats *vmstats;
struct vm_stat_desc *statdesc;
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup2(cdev);
if (sc == NULL) {
mtx_unlock(&vmmdev_mtx);
return (ENXIO);
}
/*
* Some VMM ioctls can operate only on vcpus that are not running.
*/
switch (cmd) {
case VM_RUN:
case VM_SET_PINNING:
case VM_GET_REGISTER:
case VM_SET_REGISTER:
case VM_GET_SEGMENT_DESCRIPTOR:
case VM_SET_SEGMENT_DESCRIPTOR:
case VM_INJECT_EVENT:
case VM_GET_CAPABILITY:
case VM_SET_CAPABILITY:
case VM_PPTDEV_MSI:
/*
* XXX fragile, handle with care
* Assumes that the first field of the ioctl data is the vcpu.
*/
vcpu = *(int *)data;
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
error = EINVAL;
goto done;
}
if (vcpu_is_running(sc->vm, vcpu, NULL)) {
error = EBUSY;
goto done;
}
break;
default:
break;
}
switch(cmd) {
case VM_RUN:
vmrun = (struct vm_run *)data;
vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING);
mtx_unlock(&vmmdev_mtx);
error = vm_run(sc->vm, vmrun);
mtx_lock(&vmmdev_mtx);
vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED);
break;
case VM_STAT_DESC: {
const char *desc;
statdesc = (struct vm_stat_desc *)data;
desc = vmm_stat_desc(statdesc->index);
if (desc != NULL) {
error = 0;
strlcpy(statdesc->desc, desc, sizeof(statdesc->desc));
} else
error = EINVAL;
break;
}
case VM_STATS: {
CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES);
vmstats = (struct vm_stats *)data;
getmicrotime(&vmstats->tv);
error = vmm_stat_copy(sc->vm, vmstats->cpuid,
&vmstats->num_entries, vmstats->statbuf);
break;
}
case VM_PPTDEV_MSI:
pptmsi = (struct vm_pptdev_msi *)data;
error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
pptmsi->bus, pptmsi->slot, pptmsi->func,
pptmsi->destcpu, pptmsi->vector,
pptmsi->numvec);
break;
case VM_MAP_PPTDEV_MMIO:
pptmmio = (struct vm_pptdev_mmio *)data;
error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len,
pptmmio->hpa);
break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
pptdev->func);
break;
case VM_UNBIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
pptdev->func);
break;
case VM_INJECT_EVENT:
vmevent = (struct vm_event *)data;
error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
vmevent->vector,
vmevent->error_code,
vmevent->error_code_valid);
break;
case VM_INJECT_NMI:
vmnmi = (struct vm_nmi *)data;
error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
break;
case VM_LAPIC_IRQ:
vmirq = (struct vm_lapic_irq *)data;
error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
break;
case VM_SET_PINNING:
vmpin = (struct vm_pin *)data;
error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
vmpin->host_cpuid);
break;
case VM_GET_PINNING:
vmpin = (struct vm_pin *)data;
error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
&vmpin->host_cpuid);
break;
case VM_MAP_MEMORY:
seg = (struct vm_memory_segment *)data;
error = vm_malloc(sc->vm, seg->gpa, seg->len, &seg->hpa);
break;
case VM_GET_MEMORY_SEG:
seg = (struct vm_memory_segment *)data;
seg->hpa = seg->len = 0;
(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
error = 0;
break;
case VM_GET_REGISTER:
vmreg = (struct vm_register *)data;
error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
&vmreg->regval);
break;
case VM_SET_REGISTER:
vmreg = (struct vm_register *)data;
error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
vmreg->regval);
break;
case VM_SET_SEGMENT_DESCRIPTOR:
vmsegdesc = (struct vm_seg_desc *)data;
error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
vmsegdesc->regnum,
&vmsegdesc->desc);
break;
case VM_GET_SEGMENT_DESCRIPTOR:
vmsegdesc = (struct vm_seg_desc *)data;
error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
vmsegdesc->regnum,
&vmsegdesc->desc);
break;
case VM_GET_CAPABILITY:
vmcap = (struct vm_capability *)data;
error = vm_get_capability(sc->vm, vmcap->cpuid,
vmcap->captype,
&vmcap->capval);
break;
case VM_SET_CAPABILITY:
vmcap = (struct vm_capability *)data;
error = vm_set_capability(sc->vm, vmcap->cpuid,
vmcap->captype,
vmcap->capval);
break;
default:
error = ENOTTY;
break;
}
done:
mtx_unlock(&vmmdev_mtx);
return (error);
}
static int
vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
int nprot, vm_memattr_t *memattr)
{
int error;
struct vmmdev_softc *sc;
error = -1;
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup2(cdev);
if (sc != NULL && (nprot & PROT_EXEC) == 0) {
*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
if (*paddr != (vm_paddr_t)-1)
error = 0;
}
mtx_unlock(&vmmdev_mtx);
return (error);
}
static void
vmmdev_destroy(struct vmmdev_softc *sc)
{
#ifdef notyet /* XXX kernel is not compiled with invariants */
mtx_assert(&vmmdev_mtx, MA_OWNED);
#endif
/*
* XXX must stop virtual machine instances that may be still
* running and cleanup their state.
*/
SLIST_REMOVE(&head, sc, vmmdev_softc, link);
destroy_dev(sc->cdev);
vm_destroy(sc->vm);
free(sc, M_VMMDEV);
}
static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
{
int error;
char buf[VM_MAX_NAMELEN];
struct vmmdev_softc *sc;
strlcpy(buf, "beavis", sizeof(buf));
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
if (error != 0 || req->newptr == NULL)
return (error);
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup(buf);
if (sc == NULL) {
mtx_unlock(&vmmdev_mtx);
return (EINVAL);
}
vmmdev_destroy(sc);
mtx_unlock(&vmmdev_mtx);
return (0);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
NULL, 0, sysctl_vmm_destroy, "A", NULL);
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
.d_version = D_VERSION,
.d_ioctl = vmmdev_ioctl,
.d_mmap = vmmdev_mmap,
.d_read = vmmdev_rw,
.d_write = vmmdev_rw,
};
static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
{
int error;
struct vm *vm;
struct vmmdev_softc *sc;
char buf[VM_MAX_NAMELEN];
strlcpy(buf, "beavis", sizeof(buf));
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
if (error != 0 || req->newptr == NULL)
return (error);
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup(buf);
if (sc != NULL) {
mtx_unlock(&vmmdev_mtx);
return (EEXIST);
}
vm = vm_create(buf);
if (vm == NULL) {
mtx_unlock(&vmmdev_mtx);
return (EINVAL);
}
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
sc->vm = vm;
sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
"vmm/%s", buf);
sc->cdev->si_drv1 = sc;
SLIST_INSERT_HEAD(&head, sc, link);
mtx_unlock(&vmmdev_mtx);
return (0);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
NULL, 0, sysctl_vmm_create, "A", NULL);
void
vmmdev_init(void)
{
mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
}
void
vmmdev_cleanup(void)
{
struct vmmdev_softc *sc, *sc2;
mtx_lock(&vmmdev_mtx);
SLIST_FOREACH_SAFE(sc, &head, link, sc2)
vmmdev_destroy(sc);
mtx_unlock(&vmmdev_mtx);
}

103
sys/amd64/vmm/vmm_ipi.c Normal file
View File

@ -0,0 +1,103 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <machine/intr_machdep.h>
#include <machine/apicvar.h>
#include <machine/segments.h>
#include <machine/md_var.h>
#include <machine/smp.h>
#include <machine/vmm.h>
#include "vmm_ipi.h"
extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
/*
* The default is to use the IPI_AST to interrupt a vcpu.
*/
static int ipinum = IPI_AST;
CTASSERT(APIC_SPURIOUS_INT == 255);
void
vmm_ipi_init(void)
{
int idx;
uintptr_t func;
struct gate_descriptor *ip;
/*
* Search backwards from the highest IDT vector available for use
* as our IPI vector. We install the 'justreturn' handler at that
* vector and use it to interrupt the vcpus.
*
* We do this because the IPI_AST is heavyweight and saves all
* registers in the trapframe. This is overkill for our use case
* which is simply to EOI the interrupt and return.
*/
idx = APIC_SPURIOUS_INT;
while (--idx >= APIC_IPI_INTS) {
ip = &idt[idx];
func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
if (func == (uintptr_t)&IDTVEC(rsvd)) {
ipinum = idx;
setidt(ipinum, IDTVEC(justreturn), SDT_SYSIGT,
SEL_KPL, 0);
break;
}
}
if (ipinum != IPI_AST && bootverbose) {
printf("vmm_ipi_init: installing ipi handler to interrupt "
"vcpus at vector %d\n", ipinum);
}
}
void
vmm_ipi_cleanup(void)
{
if (ipinum != IPI_AST)
setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
}
void
vm_interrupt_hostcpu(struct vm *vm, int vcpu)
{
int hostcpu;
if (vcpu_is_running(vm, vcpu, &hostcpu) && hostcpu != curcpu)
ipi_cpu(hostcpu, ipinum);
}

38
sys/amd64/vmm/vmm_ipi.h Normal file
View File

@ -0,0 +1,38 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_IPI_H_
#define _VMM_IPI_H_
struct vm;
void vmm_ipi_init(void);
void vmm_ipi_cleanup(void);
void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
#endif

51
sys/amd64/vmm/vmm_ktr.h Normal file
View File

@ -0,0 +1,51 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_KTR_H_
#define _VMM_KTR_H_
#include <sys/ktr.h>
#include <sys/pcpu.h>
#define KTR_VMM KTR_GEN
#define VMM_CTR0(vm, vcpuid, format) \
CTR3(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu)
#define VMM_CTR1(vm, vcpuid, format, p1) \
CTR4(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
(p1))
#define VMM_CTR2(vm, vcpuid, format, p1, p2) \
CTR5(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
(p1), (p2))
#define VMM_CTR3(vm, vcpuid, format, p1, p2, p3) \
CTR6(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
(p1), (p2), (p3))
#endif

121
sys/amd64/vmm/vmm_lapic.c Normal file
View File

@ -0,0 +1,121 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/vmm.h>
#include "vmm_ipi.h"
#include "vmm_lapic.h"
#include "vlapic.h"
int
lapic_write(struct vm *vm, int cpu, u_int offset, uint64_t val)
{
int handled;
struct vlapic *vlapic;
vlapic = vm_lapic(vm, cpu);
if (vlapic_op_mem_write(vlapic, offset, DWORD, val) == 0)
handled = 1;
else
handled = 0;
return (handled);
}
int
lapic_read(struct vm *vm, int cpu, u_int offset, uint64_t *rv)
{
int handled;
struct vlapic *vlapic;
vlapic = vm_lapic(vm, cpu);
if (vlapic_op_mem_read(vlapic, offset, DWORD, rv) == 0)
handled = 1;
else
handled = 0;
return (handled);
}
int
lapic_pending_intr(struct vm *vm, int cpu)
{
struct vlapic *vlapic;
vlapic = vm_lapic(vm, cpu);
return (vlapic_pending_intr(vlapic));
}
void
lapic_intr_accepted(struct vm *vm, int cpu, int vector)
{
struct vlapic *vlapic;
vlapic = vm_lapic(vm, cpu);
vlapic_intr_accepted(vlapic, vector);
}
int
lapic_set_intr(struct vm *vm, int cpu, int vector)
{
struct vlapic *vlapic;
if (cpu < 0 || cpu >= VM_MAXCPU)
return (EINVAL);
if (vector < 32 || vector > 255)
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
vlapic_set_intr_ready(vlapic, vector);
vm_interrupt_hostcpu(vm, cpu);
return (0);
}
void
lapic_timer_tick(struct vm *vm, int cpu)
{
struct vlapic *vlapic;
vlapic = vm_lapic(vm, cpu);
vlapic_timer_tick(vlapic);
}

64
sys/amd64/vmm/vmm_lapic.h Normal file
View File

@ -0,0 +1,64 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_LAPIC_H_
#define _VMM_LAPIC_H_
struct vm;
int lapic_write(struct vm *vm, int cpu, u_int offset, uint64_t val);
int lapic_read(struct vm *vm, int cpu, u_int offset, uint64_t *retval);
void lapic_timer_tick(struct vm *vm, int cpu);
/*
* Returns a vector between 32 and 255 if an interrupt is pending in the
* IRR that can be delivered based on the current state of ISR and TPR.
*
* Note that the vector does not automatically transition to the ISR as a
* result of calling this function.
*
* Returns -1 if there is no eligible vector that can be delivered to the
* guest at this time.
*/
int lapic_pending_intr(struct vm *vm, int cpu);
/*
* Transition 'vector' from IRR to ISR. This function is called with the
* vector returned by 'lapic_pending_intr()' when the guest is able to
* accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
* block interrupt delivery).
*/
void lapic_intr_accepted(struct vm *vm, int cpu, int vector);
/*
* Signals to the LAPIC that an interrupt at 'vector' needs to be generated
* to the 'cpu', the state is recorded in IRR.
*/
int lapic_set_intr(struct vm *vm, int cpu, int vector);
#endif

413
sys/amd64/vmm/vmm_mem.c Normal file
View File

@ -0,0 +1,413 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/linker.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/pc/bios.h>
#include <machine/vmparam.h>
#include <machine/pmap.h>
#include "vmm_util.h"
#include "vmm_mem.h"
static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
#define MB (1024 * 1024)
#define GB (1024 * MB)
#define VMM_MEM_MAXSEGS 64
/* protected by vmm_mem_mtx */
static struct {
vm_paddr_t base;
vm_size_t length;
} vmm_mem_avail[VMM_MEM_MAXSEGS];
static int vmm_mem_nsegs;
static vm_paddr_t maxaddr;
static struct mtx vmm_mem_mtx;
/*
* Steal any memory that was deliberately hidden from FreeBSD either by
* the use of MAXMEM kernel config option or the hw.physmem loader tunable.
*/
static int
vmm_mem_steal_memory(void)
{
int nsegs;
caddr_t kmdp;
uint32_t smapsize;
uint64_t base, length;
struct bios_smap *smapbase, *smap, *smapend;
/*
* Borrowed from hammer_time() and getmemsize() in machdep.c
*/
kmdp = preload_search_by_type("elf kernel");
if (kmdp == NULL)
kmdp = preload_search_by_type("elf64 kernel");
smapbase = (struct bios_smap *)preload_search_info(kmdp,
MODINFO_METADATA | MODINFOMD_SMAP);
if (smapbase == NULL)
panic("No BIOS smap info from loader!");
smapsize = *((uint32_t *)smapbase - 1);
smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
nsegs = 0;
for (smap = smapbase; smap < smapend; smap++) {
/*
* XXX
* Assuming non-overlapping, monotonically increasing
* memory segments.
*/
if (smap->type != SMAP_TYPE_MEMORY)
continue;
if (smap->length == 0)
break;
base = roundup(smap->base, NBPDR);
length = rounddown(smap->length, NBPDR);
/* Skip this segment if FreeBSD is using all of it. */
if (base + length <= ptoa(Maxmem))
continue;
/*
* If FreeBSD is using part of this segment then adjust
* 'base' and 'length' accordingly.
*/
if (base < ptoa(Maxmem)) {
uint64_t used;
used = roundup(ptoa(Maxmem), NBPDR) - base;
base += used;
length -= used;
}
if (length == 0)
continue;
vmm_mem_avail[nsegs].base = base;
vmm_mem_avail[nsegs].length = length;
if (base + length > maxaddr)
maxaddr = base + length;
if (0 && bootverbose) {
printf("vmm_mem_populate: index %d, base 0x%0lx, "
"length %ld\n",
nsegs, vmm_mem_avail[nsegs].base,
vmm_mem_avail[nsegs].length);
}
nsegs++;
if (nsegs >= VMM_MEM_MAXSEGS) {
printf("vmm_mem_populate: maximum number of vmm memory "
"segments reached!\n");
return (ENOSPC);
}
}
vmm_mem_nsegs = nsegs;
return (0);
}
static void
vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
{
vm_paddr_t addr, remaining;
int pdpi, pdi, superpage_size;
pml4_entry_t *pml4p;
pdp_entry_t *pdp;
pd_entry_t *pd;
uint64_t page_attr_bits;
if (end >= NBPML4)
panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
/* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */
if (0 && vmm_supports_1G_pages())
superpage_size = NBPDP;
else
superpage_size = NBPDR;
/*
* Get the page directory pointer page that contains the direct
* map address mappings.
*/
pml4p = kernel_pmap->pm_pml4;
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
addr = start;
while (addr < end) {
remaining = end - addr;
pdpi = addr / NBPDP;
if (superpage_size == NBPDP &&
remaining >= NBPDP &&
addr % NBPDP == 0) {
/*
* If there isn't a mapping for this address then
* create one but if there is one already make sure
* it matches what we expect it to be.
*/
if (pdp[pdpi] == 0) {
pdp[pdpi] = addr | page_attr_bits;
if (0 && bootverbose) {
printf("vmm_mem_populate: mapping "
"0x%lx with 1GB page at "
"pdpi %d\n", addr, pdpi);
}
} else {
pdp_entry_t pdpe = pdp[pdpi];
if ((pdpe & ~PAGE_MASK) != addr ||
(pdpe & page_attr_bits) != page_attr_bits) {
panic("An invalid mapping 0x%016lx "
"already exists for 0x%016lx\n",
pdpe, addr);
}
}
addr += NBPDP;
} else {
if (remaining < NBPDR) {
panic("vmm_mem_populate: remaining (%ld) must "
"be greater than NBPDR (%d)\n",
remaining, NBPDR);
}
if (pdp[pdpi] == 0) {
/*
* XXX we lose this memory forever because
* we do not keep track of the virtual address
* that would be required to free this page.
*/
pd = malloc(PAGE_SIZE, M_VMM_MEM,
M_WAITOK | M_ZERO);
if ((uintptr_t)pd & PAGE_MASK) {
panic("vmm_mem_populate: page directory"
"page not aligned on %d "
"boundary\n", PAGE_SIZE);
}
pdp[pdpi] = vtophys(pd);
pdp[pdpi] |= PG_RW | PG_V | PG_U;
if (0 && bootverbose) {
printf("Creating page directory "
"at pdp index %d for 0x%016lx\n",
pdpi, addr);
}
}
pdi = (addr % NBPDP) / NBPDR;
pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
/*
* Create a new mapping if one doesn't already exist
* or validate it if it does.
*/
if (pd[pdi] == 0) {
pd[pdi] = addr | page_attr_bits;
if (0 && bootverbose) {
printf("vmm_mem_populate: mapping "
"0x%lx with 2MB page at "
"pdpi %d, pdi %d\n",
addr, pdpi, pdi);
}
} else {
pd_entry_t pde = pd[pdi];
if ((pde & ~PAGE_MASK) != addr ||
(pde & page_attr_bits) != page_attr_bits) {
panic("An invalid mapping 0x%016lx "
"already exists for 0x%016lx\n",
pde, addr);
}
}
addr += NBPDR;
}
}
}
static int
vmm_mem_populate(void)
{
int seg, error;
vm_paddr_t start, end;
/* populate the vmm_mem_avail[] array */
error = vmm_mem_steal_memory();
if (error)
return (error);
/*
* Now map the memory that was hidden from FreeBSD in
* the direct map VA space.
*/
for (seg = 0; seg < vmm_mem_nsegs; seg++) {
start = vmm_mem_avail[seg].base;
end = start + vmm_mem_avail[seg].length;
if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
panic("start (0x%016lx) and end (0x%016lx) must be "
"aligned on a %dMB boundary\n",
start, end, NBPDR / MB);
}
vmm_mem_direct_map(start, end);
}
return (0);
}
int
vmm_mem_init(void)
{
int error;
mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
error = vmm_mem_populate();
if (error)
return (error);
return (0);
}
vm_paddr_t
vmm_mem_alloc(size_t size)
{
int i;
vm_paddr_t addr;
if ((size & PDRMASK) != 0) {
panic("vmm_mem_alloc: size 0x%0lx must be "
"aligned on a 0x%0x boundary\n", size, NBPDR);
}
addr = 0;
mtx_lock(&vmm_mem_mtx);
for (i = 0; i < vmm_mem_nsegs; i++) {
if (vmm_mem_avail[i].length >= size) {
addr = vmm_mem_avail[i].base;
vmm_mem_avail[i].base += size;
vmm_mem_avail[i].length -= size;
/* remove a zero length segment */
if (vmm_mem_avail[i].length == 0) {
memmove(&vmm_mem_avail[i],
&vmm_mem_avail[i + 1],
(vmm_mem_nsegs - (i + 1)) *
sizeof(vmm_mem_avail[0]));
vmm_mem_nsegs--;
}
break;
}
}
mtx_unlock(&vmm_mem_mtx);
return (addr);
}
void
vmm_mem_free(vm_paddr_t base, size_t length)
{
int i;
if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
"aligned on a 0x%0x boundary\n", base, length, NBPDR);
}
mtx_lock(&vmm_mem_mtx);
for (i = 0; i < vmm_mem_nsegs; i++) {
if (vmm_mem_avail[i].base > base)
break;
}
if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
panic("vmm_mem_free: cannot free any more segments");
/* Create a new segment at index 'i' */
memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
(vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
vmm_mem_avail[i].base = base;
vmm_mem_avail[i].length = length;
vmm_mem_nsegs++;
coalesce_some_more:
for (i = 0; i < vmm_mem_nsegs - 1; i++) {
if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
vmm_mem_avail[i + 1].base) {
vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
(vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
vmm_mem_nsegs--;
goto coalesce_some_more;
}
}
mtx_unlock(&vmm_mem_mtx);
}
vm_paddr_t
vmm_mem_maxaddr(void)
{
return (maxaddr);
}
void
vmm_mem_dump(void)
{
int i;
vm_paddr_t base;
vm_size_t length;
mtx_lock(&vmm_mem_mtx);
for (i = 0; i < vmm_mem_nsegs; i++) {
base = vmm_mem_avail[i].base;
length = vmm_mem_avail[i].length;
printf("%-4d0x%016lx 0x%016lx\n", i, base, base + length);
}
mtx_unlock(&vmm_mem_mtx);
}

38
sys/amd64/vmm/vmm_mem.h Normal file
View File

@ -0,0 +1,38 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_MEM_H_
#define _VMM_MEM_H_
int vmm_mem_init(void);
vm_paddr_t vmm_mem_alloc(size_t size);
void vmm_mem_free(vm_paddr_t start, size_t size);
vm_paddr_t vmm_mem_maxaddr(void);
void vmm_mem_dump(void);
#endif

264
sys/amd64/vmm/vmm_msr.c Normal file
View File

@ -0,0 +1,264 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/specialreg.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
#include "vmm_lapic.h"
#include "vmm_msr.h"
#define VMM_MSR_F_EMULATE 0x01
#define VMM_MSR_F_READONLY 0x02
struct vmm_msr {
int num;
int flags;
uint64_t hostval;
};
static struct vmm_msr vmm_msr[] = {
{ MSR_LSTAR, 0 },
{ MSR_CSTAR, 0 },
{ MSR_STAR, 0 },
{ MSR_SF_MASK, 0 },
{ MSR_APICBASE, VMM_MSR_F_EMULATE },
{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
{ MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
};
#define vmm_msr_num (sizeof(vmm_msr) / sizeof(vmm_msr[0]))
CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
#define readonly_msr(idx) \
((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
#define emulated_msr(idx) \
((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
void
vmm_msr_init(void)
{
int i;
for (i = 0; i < vmm_msr_num; i++) {
if (emulated_msr(i))
continue;
/*
* XXX this assumes that the value of the host msr does not
* change after we have cached it.
*/
vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
}
}
void
guest_msrs_init(struct vm *vm, int cpu)
{
int i;
uint64_t *guest_msrs;
guest_msrs = vm_guest_msrs(vm, cpu);
for (i = 0; i < vmm_msr_num; i++) {
switch (vmm_msr[i].num) {
case MSR_LSTAR:
case MSR_CSTAR:
case MSR_STAR:
case MSR_SF_MASK:
case MSR_BIOS_SIGN:
case MSR_MCG_CAP:
guest_msrs[i] = 0;
break;
case MSR_APICBASE:
guest_msrs[i] = DEFAULT_APIC_BASE | APICBASE_ENABLED |
APICBASE_X2APIC;
if (cpu == 0)
guest_msrs[i] |= APICBASE_BSP;
break;
default:
panic("guest_msrs_init: missing initialization for msr "
"0x%0x", vmm_msr[i].num);
}
}
}
static boolean_t
x2apic_msr(u_int num)
{
if (num >= 0x800 && num <= 0xBFF)
return (TRUE);
else
return (FALSE);
}
static u_int
x2apic_msr_to_regoff(u_int msr)
{
return ((msr - 0x800) << 4);
}
static boolean_t
x2apic_msr_id(u_int num)
{
return (num == 0x802);
}
static int
msr_num_to_idx(u_int num)
{
int i;
for (i = 0; i < vmm_msr_num; i++)
if (vmm_msr[i].num == num)
return (i);
return (-1);
}
int
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
{
int handled, idx;
uint64_t *guest_msrs;
handled = 0;
if (x2apic_msr(num))
return (lapic_write(vm, cpu, x2apic_msr_to_regoff(num), val));
idx = msr_num_to_idx(num);
if (idx < 0)
goto done;
if (!readonly_msr(idx)) {
guest_msrs = vm_guest_msrs(vm, cpu);
/* Stash the value */
guest_msrs[idx] = val;
/* Update processor state for non-emulated MSRs */
if (!emulated_msr(idx))
wrmsr(vmm_msr[idx].num, val);
}
handled = 1;
done:
return (handled);
}
int
emulate_rdmsr(struct vm *vm, int cpu, u_int num)
{
int error, handled, idx;
uint32_t eax, edx;
uint64_t result, *guest_msrs;
handled = 0;
if (x2apic_msr(num)) {
handled = lapic_read(vm, cpu, x2apic_msr_to_regoff(num),
&result);
/*
* The version ID needs to be massaged
*/
if (x2apic_msr_id(num)) {
result = result >> 24;
}
goto done;
}
idx = msr_num_to_idx(num);
if (idx < 0)
goto done;
guest_msrs = vm_guest_msrs(vm, cpu);
result = guest_msrs[idx];
/*
* If this is not an emulated msr register make sure that the processor
* state matches our cached state.
*/
if (!emulated_msr(idx) && (rdmsr(num) != result)) {
panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
"(0x%016lx) and actual (0x%016lx) values", num,
result, rdmsr(num));
}
handled = 1;
done:
if (handled) {
eax = result;
edx = result >> 32;
error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
if (error)
panic("vm_set_register(rax) error %d", error);
error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
if (error)
panic("vm_set_register(rdx) error %d", error);
}
return (handled);
}
void
restore_guest_msrs(struct vm *vm, int cpu)
{
int i;
uint64_t *guest_msrs;
guest_msrs = vm_guest_msrs(vm, cpu);
for (i = 0; i < vmm_msr_num; i++) {
if (emulated_msr(i))
continue;
else
wrmsr(vmm_msr[i].num, guest_msrs[i]);
}
}
void
restore_host_msrs(struct vm *vm, int cpu)
{
int i;
for (i = 0; i < vmm_msr_num; i++) {
if (emulated_msr(i))
continue;
else
wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
}
}

42
sys/amd64/vmm/vmm_msr.h Normal file
View File

@ -0,0 +1,42 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_MSR_H_
#define _VMM_MSR_H_
#define VMM_MSR_NUM 16
struct vm;
void vmm_msr_init(void);
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
void guest_msrs_init(struct vm *vm, int cpu);
void restore_host_msrs(struct vm *vm, int cpu);
void restore_guest_msrs(struct vm *vm, int cpu);
#endif

103
sys/amd64/vmm/vmm_stat.c Normal file
View File

@ -0,0 +1,103 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <machine/vmm.h>
#include "vmm_stat.h"
static int vstnum;
static struct vmm_stat_type *vsttab[MAX_VMM_STAT_TYPES];
static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
void
vmm_stat_init(void *arg)
{
struct vmm_stat_type *vst = arg;
/* We require all stats to identify themselves with a description */
if (vst->desc == NULL)
return;
if (vstnum >= MAX_VMM_STAT_TYPES) {
printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
return;
}
vst->index = vstnum;
vsttab[vstnum++] = vst;
}
int
vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
{
int i;
uint64_t *stats;
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
stats = vcpu_stats(vm, vcpu);
for (i = 0; i < vstnum; i++)
buf[i] = stats[i];
*num_stats = vstnum;
return (0);
}
void *
vmm_stat_alloc(void)
{
u_long size;
size = vstnum * sizeof(uint64_t);
return (malloc(size, M_VMM_STAT, M_ZERO | M_WAITOK));
}
void
vmm_stat_free(void *vp)
{
free(vp, M_VMM_STAT);
}
const char *
vmm_stat_desc(int index)
{
if (index >= 0 && index < vstnum)
return (vsttab[index]->desc);
else
return (NULL);
}

71
sys/amd64/vmm/vmm_stat.h Normal file
View File

@ -0,0 +1,71 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_STAT_H_
#define _VMM_STAT_H_
struct vm;
#define MAX_VMM_STAT_TYPES 64 /* arbitrary */
struct vmm_stat_type {
const char *desc; /* description of statistic */
int index; /* position in the stats buffer */
};
void vmm_stat_init(void *arg);
#define VMM_STAT_DEFINE(type, desc) \
struct vmm_stat_type type[1] = { \
{ desc, -1 } \
}; \
SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type)
void *vmm_stat_alloc(void);
void vmm_stat_free(void *vp);
/*
* 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
*/
int vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
const char *vmm_stat_desc(int index);
static void __inline
vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
{
#ifdef VMM_KEEP_STATS
uint64_t *stats = vcpu_stats(vm, vcpu);
if (vst->index >= 0)
stats[vst->index] += x;
#endif
}
#endif

View File

@ -0,0 +1,42 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define LOCORE
#include <machine/asmacros.h>
#define LA_EOI 0xB0
.text
SUPERALIGN_TEXT
IDTVEC(justreturn)
pushq %rax
movq lapic, %rax
movl $0, LA_EOI(%rax)
popq %rax
iretq

111
sys/amd64/vmm/vmm_util.c Normal file
View File

@ -0,0 +1,111 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/libkern.h>
#include <machine/md_var.h>
#include "vmm_util.h"
boolean_t
vmm_is_intel(void)
{
if (strcmp(cpu_vendor, "GenuineIntel") == 0)
return (TRUE);
else
return (FALSE);
}
boolean_t
vmm_is_amd(void)
{
if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
return (TRUE);
else
return (FALSE);
}
boolean_t
vmm_supports_1G_pages(void)
{
unsigned int regs[4];
/*
* CPUID.80000001:EDX[bit 26] = 1 indicates support for 1GB pages
*
* Both Intel and AMD support this bit.
*/
if (cpu_exthigh >= 0x80000001) {
do_cpuid(0x80000001, regs);
if (regs[3] & (1 << 26))
return (TRUE);
}
return (FALSE);
}
#include <sys/proc.h>
#include <machine/frame.h>
#define DUMP_REG(x) printf(#x "\t\t0x%016lx\n", (long)(tf->tf_ ## x))
#define DUMP_SEG(x) printf(#x "\t\t0x%04x\n", (unsigned)(tf->tf_ ## x))
void
dump_trapframe(struct trapframe *tf)
{
DUMP_REG(rdi);
DUMP_REG(rsi);
DUMP_REG(rdx);
DUMP_REG(rcx);
DUMP_REG(r8);
DUMP_REG(r9);
DUMP_REG(rax);
DUMP_REG(rbx);
DUMP_REG(rbp);
DUMP_REG(r10);
DUMP_REG(r11);
DUMP_REG(r12);
DUMP_REG(r13);
DUMP_REG(r14);
DUMP_REG(r15);
DUMP_REG(trapno);
DUMP_REG(addr);
DUMP_REG(flags);
DUMP_REG(err);
DUMP_REG(rip);
DUMP_REG(rflags);
DUMP_REG(rsp);
DUMP_SEG(cs);
DUMP_SEG(ss);
DUMP_SEG(fs);
DUMP_SEG(gs);
DUMP_SEG(es);
DUMP_SEG(ds);
}

40
sys/amd64/vmm/vmm_util.h Normal file
View File

@ -0,0 +1,40 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_UTIL_H_
#define _VMM_UTIL_H_
struct trapframe;
boolean_t vmm_is_intel(void);
boolean_t vmm_is_amd(void);
boolean_t vmm_supports_1G_pages(void);
void dump_trapframe(struct trapframe *tf);
#endif

113
sys/amd64/vmm/x86.c Normal file
View File

@ -0,0 +1,113 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
#include "x86.h"
int
x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
unsigned int func, regs[4];
func = *eax;
cpuid_count(*eax, *ecx, regs);
switch(func) {
case CPUID_0000_0000:
case CPUID_0000_0002:
case CPUID_0000_0003:
case CPUID_0000_0004:
case CPUID_0000_000A:
break;
case CPUID_8000_0000:
case CPUID_8000_0001:
case CPUID_8000_0002:
case CPUID_8000_0003:
case CPUID_8000_0004:
case CPUID_8000_0006:
case CPUID_8000_0007:
case CPUID_8000_0008:
break;
case CPUID_0000_0001:
/*
* Override the APIC ID only in ebx
*/
regs[1] &= ~(CPUID_0000_0001_APICID_MASK);
/*
* XXX fixme for MP case, set apicid properly for cpu.
*/
regs[1] |= (0 << CPUID_0000_0001_APICID_SHIFT);
/*
* Don't expose VMX capability.
* Advertise x2APIC capability.
*/
regs[2] &= ~CPUID_0000_0001_FEAT0_VMX;
regs[2] |= CPUID2_X2APIC;
/*
* Machine check handling is done in the host.
* Hide MTRR capability.
*/
regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
break;
case CPUID_0000_000B:
/*
* XXXSMP fixme
* Processor topology enumeration
*/
regs[0] = 0;
regs[1] = 0;
regs[2] = *ecx & 0xff;
regs[3] = 0;
break;
default:
return (0);
}
*eax = regs[0];
*ebx = regs[1];
*ecx = regs[2];
*edx = regs[3];
return (1);
}

62
sys/amd64/vmm/x86.h Normal file
View File

@ -0,0 +1,62 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _X86_H_
#define _X86_H_
#define CPUID_0000_0000 (0x0)
#define CPUID_0000_0001 (0x1)
#define CPUID_0000_0002 (0x2)
#define CPUID_0000_0003 (0x3)
#define CPUID_0000_0004 (0x4)
#define CPUID_0000_000A (0xA)
#define CPUID_0000_000B (0xB)
#define CPUID_8000_0000 (0x80000000)
#define CPUID_8000_0001 (0x80000001)
#define CPUID_8000_0002 (0x80000002)
#define CPUID_8000_0003 (0x80000003)
#define CPUID_8000_0004 (0x80000004)
#define CPUID_8000_0006 (0x80000006)
#define CPUID_8000_0007 (0x80000007)
#define CPUID_8000_0008 (0x80000008)
/*
* CPUID instruction Fn0000_0001:
*/
#define CPUID_0000_0001_APICID_MASK (0xff<<24)
#define CPUID_0000_0001_APICID_SHIFT 24
/*
* CPUID instruction Fn0000_0001 ECX
*/
#define CPUID_0000_0001_FEAT0_VMX (1<<5)
int x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
uint32_t *edx);
#endif

View File

@ -295,6 +295,11 @@ libkern/memset.c standard
compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | vesa
contrib/x86emu/x86emu.c optional x86bios | atkbd | dpms | vesa
#
# bvm console
#
dev/bvm/bvm_console.c optional bvmconsole
dev/bvm/bvm_dbg.c optional bvmdebug
#
# x86 shared code between IA32, AMD64 and PC98 architectures
#
x86/acpica/OsdEnvironment.c optional acpi

View File

@ -10,6 +10,7 @@ PERFMON
PMAP_SHPGPERPROC opt_pmap.h
MPTABLE_FORCE_HTT
MP_WATCHDOG
NKPT opt_pmap.h
# Options for emulators. These should only be used at config time, so
# they are handled like options for static filesystems

View File

@ -0,0 +1,129 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/linker.h>
#include <sys/libkern.h>
#include <dev/pci/pcivar.h>
static int
linker_file_iterator(linker_file_t lf, void *arg)
{
const char *file = arg;
if (strcmp(lf->filename, file) == 0)
return (1);
else
return (0);
}
static boolean_t
pptdev(int bus, int slot, int func)
{
int found, b, s, f, n;
char *val, *cp, *cp2;
/*
* setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
*/
found = 0;
cp = val = getenv("pptdevs");
while (cp != NULL && *cp != '\0') {
if ((cp2 = strchr(cp, ' ')) != NULL)
*cp2 = '\0';
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
if (n == 3 && bus == b && slot == s && func == f) {
found = 1;
break;
}
if (cp2 != NULL)
*cp2++ = ' ';
cp = cp2;
}
freeenv(val);
return (found);
}
static int
pci_blackhole_probe(device_t dev)
{
int bus, slot, func;
/*
* If 'vmm.ko' has also been loaded the don't try to claim
* any pci devices.
*/
if (linker_file_foreach(linker_file_iterator, "vmm.ko"))
return (ENXIO);
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
if (pptdev(bus, slot, func))
return (0);
else
return (ENXIO);
}
static int
pci_blackhole_attach(device_t dev)
{
/*
* We never really want to claim the devices but just want to prevent
* other drivers from getting to them.
*/
return (ENXIO);
}
static device_method_t pci_blackhole_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pci_blackhole_probe),
DEVMETHOD(device_attach, pci_blackhole_attach),
{ 0, 0 }
};
static driver_t pci_blackhole_driver = {
"blackhole",
pci_blackhole_methods,
};
devclass_t blackhole_devclass;
DRIVER_MODULE(blackhole, pci, pci_blackhole_driver, blackhole_devclass, 0, 0);
MODULE_DEPEND(blackhole, pci, 1, 1, 1);

242
sys/dev/bvm/bvm_console.c Normal file
View File

@ -0,0 +1,242 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_comconsole.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/cons.h>
#include <sys/tty.h>
#include <sys/reboot.h>
#include <sys/bus.h>
#include <sys/kdb.h>
#include <ddb/ddb.h>
#ifndef BVMCONS_POLL_HZ
#define BVMCONS_POLL_HZ 4
#endif
#define BVMBURSTLEN 16 /* max number of bytes to write in one chunk */
static tsw_open_t bvm_tty_open;
static tsw_close_t bvm_tty_close;
static tsw_outwakeup_t bvm_tty_outwakeup;
static struct ttydevsw bvm_ttydevsw = {
.tsw_flags = TF_NOPREFIX,
.tsw_open = bvm_tty_open,
.tsw_close = bvm_tty_close,
.tsw_outwakeup = bvm_tty_outwakeup,
};
static int polltime;
static struct callout_handle bvm_timeouthandle
= CALLOUT_HANDLE_INITIALIZER(&bvm_timeouthandle);
#if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER)
static int alt_break_state;
#endif
#define BVM_CONS_PORT 0x220
static int bvm_cons_port = BVM_CONS_PORT;
static void bvm_timeout(void *);
static cn_probe_t bvm_cnprobe;
static cn_init_t bvm_cninit;
static cn_term_t bvm_cnterm;
static cn_getc_t bvm_cngetc;
static cn_putc_t bvm_cnputc;
CONSOLE_DRIVER(bvm);
static int
bvm_rcons(u_char *ch)
{
int c;
c = inl(bvm_cons_port);
if (c != -1) {
*ch = (u_char)c;
return (0);
} else
return (-1);
}
static void
bvm_wcons(u_char ch)
{
outl(bvm_cons_port, ch);
}
static void
cn_drvinit(void *unused)
{
struct tty *tp;
if (bvm_consdev.cn_pri != CN_DEAD &&
bvm_consdev.cn_name[0] != '\0') {
tp = tty_alloc(&bvm_ttydevsw, NULL);
tty_makedev(tp, NULL, "bvmcons");
}
}
static int
bvm_tty_open(struct tty *tp)
{
polltime = hz / BVMCONS_POLL_HZ;
if (polltime < 1)
polltime = 1;
bvm_timeouthandle = timeout(bvm_timeout, tp, polltime);
return (0);
}
static void
bvm_tty_close(struct tty *tp)
{
/* XXX Should be replaced with callout_stop(9) */
untimeout(bvm_timeout, tp, bvm_timeouthandle);
}
static void
bvm_tty_outwakeup(struct tty *tp)
{
int len, written;
u_char buf[BVMBURSTLEN];
for (;;) {
len = ttydisc_getc(tp, buf, sizeof(buf));
if (len == 0)
break;
written = 0;
while (written < len)
bvm_wcons(buf[written++]);
}
}
static void
bvm_timeout(void *v)
{
struct tty *tp;
int c;
tp = (struct tty *)v;
tty_lock(tp);
while ((c = bvm_cngetc(NULL)) != -1)
ttydisc_rint(tp, c, 0);
ttydisc_rint_done(tp);
tty_unlock(tp);
bvm_timeouthandle = timeout(bvm_timeout, tp, polltime);
}
static void
bvm_cnprobe(struct consdev *cp)
{
int disabled, port;
disabled = 0;
resource_int_value("bvmconsole", 0, "disabled", &disabled);
if (disabled)
cp->cn_pri = CN_DEAD;
else
cp->cn_pri = CN_NORMAL;
if (resource_int_value("bvmconsole", 0, "port", &port) == 0)
bvm_cons_port = port;
}
static void
bvm_cninit(struct consdev *cp)
{
int i;
const char *bootmsg = "Using bvm console.\n";
if (boothowto & RB_VERBOSE) {
for (i = 0; i < strlen(bootmsg); i++)
bvm_cnputc(cp, bootmsg[i]);
}
strcpy(cp->cn_name, "bvmcons");
}
static void
bvm_cnterm(struct consdev *cp)
{
}
static int
bvm_cngetc(struct consdev *cp)
{
unsigned char ch;
if (bvm_rcons(&ch) == 0) {
#if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER)
int kdb_brk;
if ((kdb_brk = kdb_alt_break(ch, &alt_break_state)) != 0) {
switch (kdb_brk) {
case KDB_REQ_DEBUGGER:
kdb_enter(KDB_WHY_BREAK,
"Break sequence on console");
break;
case KDB_REQ_PANIC:
kdb_panic("Panic sequence on console");
break;
case KDB_REQ_REBOOT:
kdb_reboot();
break;
}
}
#endif
return (ch);
}
return (-1);
}
static void
bvm_cnputc(struct consdev *cp, int c)
{
bvm_wcons(c);
}
SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL);

90
sys/dev/bvm/bvm_dbg.c Normal file
View File

@ -0,0 +1,90 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <gdb/gdb.h>
#include <machine/cpufunc.h>
static gdb_probe_f bvm_dbg_probe;
static gdb_init_f bvm_dbg_init;
static gdb_term_f bvm_dbg_term;
static gdb_getc_f bvm_dbg_getc;
static gdb_putc_f bvm_dbg_putc;
GDB_DBGPORT(bvm, bvm_dbg_probe, bvm_dbg_init, bvm_dbg_term,
bvm_dbg_getc, bvm_dbg_putc);
#define BVM_DBG_PORT 0x224
static int bvm_dbg_port = BVM_DBG_PORT;
static int
bvm_dbg_probe(void)
{
int disabled, port;
disabled = 0;
resource_int_value("bvmdbg", 0, "disabled", &disabled);
if (disabled)
return (-1);
if (resource_int_value("bvmdbg", 0, "port", &port) == 0)
bvm_dbg_port = port;
return (0);
}
static void
bvm_dbg_init(void)
{
}
static void
bvm_dbg_term(void)
{
}
static void
bvm_dbg_putc(int c)
{
outl(bvm_dbg_port, c);
}
static int
bvm_dbg_getc(void)
{
return (inl(bvm_dbg_port));
}

View File

@ -2683,7 +2683,7 @@ device_attach(device_t dev)
printf("device_attach: %s%d attach returned %d\n",
dev->driver->name, dev->unit, error);
/* Unset the class; set in device_probe_child */
if (dev->devclass == NULL)
if ((dev->flags & DF_FIXEDCLASS) == 0)
device_set_devclass(dev, NULL);
device_set_driver(dev, NULL);
device_sysctl_fini(dev);

View File

@ -43,6 +43,7 @@ SUBDIR= ${_3dfx} \
${_bxe} \
${_bios} \
${_bktr} \
${_blackhole} \
${_bm} \
bridgestp \
bwi \
@ -311,6 +312,7 @@ SUBDIR= ${_3dfx} \
${_vesa} \
vge \
vkbd \
${_vmm} \
${_vpo} \
vr \
vte \
@ -541,6 +543,7 @@ _amdsbwd= amdsbwd
_amdtemp= amdtemp
_arcmsr= arcmsr
_asmc= asmc
_blackhole= blackhole
_bxe= bxe
_cardbus= cardbus
_cbb= cbb
@ -617,6 +620,7 @@ _sppp= sppp
_tpm= tpm
_twa= twa
_vesa= vesa
_vmm= vmm
_x86bios= x86bios
_wi= wi
_wpi= wpi

View File

@ -0,0 +1,9 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../dev/blackhole
KMOD= blackhole
SRCS= blackhole.c
SRCS+= bus_if.h device_if.h pci_if.h
.include <bsd.kmod.mk>

66
sys/modules/vmm/Makefile Normal file
View File

@ -0,0 +1,66 @@
# $FreeBSD$
# *REQUIRES* binutils 2.20.1 for VT-x instructions
AS= /usr/local/bin/as
LD= /usr/local/bin/ld
CFLAGS+= -B /usr/local/bin
KMOD= vmm
SRCS= device_if.h bus_if.h pci_if.h
CFLAGS+= -DVMM_KEEP_STATS -DSMP
CFLAGS+= -DOLD_BINUTILS
CFLAGS+= -I${.CURDIR}/../../amd64/vmm
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/io
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel
# generic vmm support
.PATH: ${.CURDIR}/../../amd64/vmm
SRCS+= vmm.c \
vmm_dev.c \
vmm_ipi.c \
vmm_lapic.c \
vmm_mem.c \
vmm_msr.c \
vmm_stat.c \
vmm_util.c \
x86.c \
vmm_support.S
.PATH: ${.CURDIR}/../../amd64/vmm/io
SRCS+= iommu.c \
ppt.c \
vdev.c \
vlapic.c
# intel-specific files
.PATH: ${.CURDIR}/../../amd64/vmm/intel
SRCS+= ept.c \
vmcs.c \
vmx_msr.c \
vmx.c \
vtd.c
# amd-specific files
.PATH: ${.CURDIR}/../../amd64/vmm/amd
SRCS+= amdv.c
OBJS= vmx_support.o
CLEANFILES= vmx_assym.s vmx_genassym.o
vmx_assym.s: vmx_genassym.o
.if exists(@)
vmx_assym.s: @/kern/genassym.sh
.endif
sh @/kern/genassym.sh vmx_genassym.o > ${.TARGET}
vmx_support.o: vmx_support.S vmx_assym.s
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
${.IMPSRC} -o ${.TARGET}
vmx_genassym.o: vmx_genassym.c @ machine x86
${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC}
.include <bsd.kmod.mk>

View File

@ -279,9 +279,13 @@ bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, u_int8_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
insb(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
*addr = inb(bsh + offset);
count--;
addr++;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\
@ -300,9 +304,13 @@ bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, u_int16_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
insw(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
*addr = inw(bsh + offset);
count--;
addr++;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\
@ -321,9 +329,13 @@ bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, u_int32_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
insl(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
*addr = inl(bsh + offset);
count--;
addr++;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\
@ -543,9 +555,13 @@ bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, const u_int8_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
outsb(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
outb(bsh + offset, *addr);
addr++;
count--;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\
@ -564,9 +580,13 @@ bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, const u_int16_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
outsw(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
outw(bsh + offset, *addr);
addr++;
count--;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\
@ -585,9 +605,13 @@ bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, const u_int32_t *addr, size_t count)
{
if (tag == X86_BUS_SPACE_IO)
outsl(bsh + offset, addr, count);
else {
if (tag == X86_BUS_SPACE_IO) {
while (count > 0) {
outl(bsh + offset, *addr);
addr++;
count--;
}
} else {
#ifdef __GNUCLIKE_ASM
__asm __volatile(" \n\
cld \n\

View File

@ -156,6 +156,7 @@ volatile lapic_t *lapic;
vm_paddr_t lapic_paddr;
static u_long lapic_timer_divisor;
static struct eventtimer lapic_et;
static int x2apic;
static void lapic_enable(void);
static void lapic_resume(struct pic *pic);
@ -167,6 +168,38 @@ static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value);
static int lapic_et_start(struct eventtimer *et,
struct bintime *first, struct bintime *period);
static int lapic_et_stop(struct eventtimer *et);
static uint32_t lapic_version(void);
static uint32_t lapic_ldr(void);
static uint32_t lapic_dfr(void);
static uint32_t lapic_lvt_lint0(void);
static void lapic_set_lvt_lint0(uint32_t value);
static uint32_t lapic_lvt_lint1(void);
static void lapic_set_lvt_lint1(uint32_t value);
static uint32_t lapic_tpr(void);
static uint32_t lapic_svr(void);
static void lapic_set_svr(uint32_t value);
static uint32_t lapic_lvt_timer(void);
static void lapic_set_lvt_timer(uint32_t value);
static uint32_t lapic_lvt_thermal(void);
static uint32_t lapic_lvt_error(void);
static void lapic_set_lvt_error(uint32_t value);
static uint32_t lapic_lvt_pcint(void);
static void lapic_set_lvt_pcint(uint32_t value);
static uint32_t lapic_lvt_cmci(void);
static void lapic_set_lvt_cmci(uint32_t value);
static uint32_t lapic_esr(void);
static void lapic_set_esr(uint32_t value);
static uint32_t lapic_ccr_timer(void);
static void lapic_set_dcr_timer(uint32_t value);
static void lapic_set_icr_timer(uint32_t value);
uint32_t lapic_irr(int num);
uint32_t lapic_tmr(int num);
uint32_t lapic_isr(int num);
static uint32_t lapic_icr_lo(void);
static void lapic_set_icr_lo(uint32_t value);
static uint32_t lapic_icr_hi(void);
static void lapic_set_icr_hi(uint32_t value);
static boolean_t lapic_missing(void);
struct pic lapic_pic = { .pic_resume = lapic_resume };
@ -220,11 +253,17 @@ lapic_init(vm_paddr_t addr)
u_int regs[4];
int i, arat;
/* Map the local APIC and setup the spurious interrupt handler. */
KASSERT(trunc_page(addr) == addr,
("local APIC not aligned on a page boundary"));
lapic = pmap_mapdev(addr, sizeof(lapic_t));
lapic_paddr = addr;
if ((cpu_feature2 & CPUID2_X2APIC) != 0 &&
(rdmsr(MSR_APICBASE) & APICBASE_X2APIC) != 0) {
x2apic = 1;
if (bootverbose)
printf("Local APIC access using x2APIC MSRs\n");
} else {
KASSERT(trunc_page(addr) == addr,
("local APIC not aligned on a page boundary"));
lapic = pmap_mapdev(addr, sizeof(lapic_t));
lapic_paddr = addr;
}
setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
GSEL_APIC);
@ -325,19 +364,19 @@ lapic_dump(const char* str)
{
uint32_t maxlvt;
maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
lapic->id, lapic->version, lapic->ldr, lapic->dfr);
lapic_id(), lapic_version(), lapic_ldr(), lapic_dfr());
printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
lapic_lvt_lint0(), lapic_lvt_lint1(), lapic_tpr(), lapic_svr());
printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x",
lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error);
lapic_lvt_timer(), lapic_lvt_thermal(), lapic_lvt_error());
if (maxlvt >= LVT_PMC)
printf(" pmc: 0x%08x", lapic->lvt_pcint);
printf(" pmc: 0x%08x", lapic_lvt_pcint());
printf("\n");
if (maxlvt >= LVT_CMCI)
printf(" cmci: 0x%08x\n", lapic->lvt_cmci);
printf(" cmci: 0x%08x\n", lapic_lvt_cmci());
}
void
@ -351,7 +390,7 @@ lapic_setup(int boot)
la = &lapics[lapic_id()];
KASSERT(la->la_present, ("missing APIC structure"));
saveintr = intr_disable();
maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
/* Initialize the TPR to allow all interrupts. */
lapic_set_tpr(0);
@ -360,15 +399,15 @@ lapic_setup(int boot)
lapic_enable();
/* Program LINT[01] LVT entries. */
lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
lapic_set_lvt_lint0(lvt_mode(la, LVT_LINT0, lapic_lvt_lint0()));
lapic_set_lvt_lint1(lvt_mode(la, LVT_LINT1, lapic_lvt_lint1()));
/* Program the PMC LVT entry if present. */
if (maxlvt >= LVT_PMC)
lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint()));
/* Program timer LVT and setup handler. */
lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
lapic_set_lvt_timer(lvt_mode(la, LVT_TIMER, lapic_lvt_timer()));
if (boot) {
snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
intrcnt_add(buf, &la->la_timer_count);
@ -386,14 +425,14 @@ lapic_setup(int boot)
}
/* Program error LVT and clear any existing errors. */
lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error);
lapic->esr = 0;
lapic_set_lvt_error(lvt_mode(la, LVT_ERROR, lapic_lvt_error()));
lapic_set_esr(0);
/* XXX: Thermal LVT */
/* Program the CMCI LVT entry if present. */
if (maxlvt >= LVT_CMCI)
lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci);
lapic_set_lvt_cmci(lvt_mode(la, LVT_CMCI, lapic_lvt_cmci()));
intr_restore(saveintr);
}
@ -404,9 +443,9 @@ lapic_reenable_pmc(void)
#ifdef HWPMC_HOOKS
uint32_t value;
value = lapic->lvt_pcint;
value = lapic_lvt_pcint();
value &= ~APIC_LVT_M;
lapic->lvt_pcint = value;
lapic_set_lvt_pcint(value);
#endif
}
@ -417,7 +456,7 @@ lapic_update_pmc(void *dummy)
struct lapic *la;
la = &lapics[lapic_id()];
lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint()));
}
#endif
@ -428,11 +467,11 @@ lapic_enable_pmc(void)
u_int32_t maxlvt;
/* Fail if the local APIC is not present. */
if (lapic == NULL)
if (lapic_missing())
return (0);
/* Fail if the PMC LVT is not present. */
maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
if (maxlvt < LVT_PMC)
return (0);
@ -462,11 +501,11 @@ lapic_disable_pmc(void)
u_int32_t maxlvt;
/* Fail if the local APIC is not present. */
if (lapic == NULL)
if (lapic_missing())
return;
/* Fail if the PMC LVT is not present. */
maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
if (maxlvt < LVT_PMC)
return;
@ -495,7 +534,7 @@ lapic_et_start(struct eventtimer *et,
lapic_timer_set_divisor(lapic_timer_divisor);
lapic_timer_oneshot(APIC_TIMER_MAX_COUNT, 0);
DELAY(1000000);
value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
value = APIC_TIMER_MAX_COUNT - lapic_ccr_timer();
if (value != APIC_TIMER_MAX_COUNT)
break;
lapic_timer_divisor <<= 1;
@ -549,9 +588,9 @@ lapic_disable(void)
uint32_t value;
/* Software disable the local APIC. */
value = lapic->svr;
value = lapic_svr();
value &= ~APIC_SVR_SWEN;
lapic->svr = value;
lapic_set_svr(value);
}
static void
@ -560,10 +599,10 @@ lapic_enable(void)
u_int32_t value;
/* Program the spurious vector to enable the local APIC. */
value = lapic->svr;
value = lapic_svr();
value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
lapic->svr = value;
lapic_set_svr(value);
}
/* Reset the local APIC on the BSP during resume. */
@ -574,19 +613,362 @@ lapic_resume(struct pic *pic)
lapic_setup(0);
}
static uint32_t
lapic_version(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_VERSION));
else
return (lapic->version);
}
static uint32_t
lapic_ldr(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LDR));
else
return (lapic->ldr);
}
static uint32_t
lapic_dfr(void)
{
if (x2apic)
return (0xffffffff); /* DFR not available in x2APIC mode */
else
return (lapic->dfr);
}
static uint32_t
lapic_lvt_lint0(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_LINT0));
else
return (lapic->lvt_lint0);
}
static void
lapic_set_lvt_lint0(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_LINT0, value);
else
lapic->lvt_lint0 = value;
}
static uint32_t
lapic_lvt_lint1(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_LINT1));
else
return (lapic->lvt_lint1);
}
static void
lapic_set_lvt_lint1(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_LINT1, value);
else
lapic->lvt_lint1 = value;
}
static uint32_t
lapic_tpr(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_TPR));
else
return (lapic->tpr);
}
static uint32_t
lapic_svr(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_SVR));
else
return (lapic->svr);
}
static void
lapic_set_svr(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_SVR, value);
else
lapic->svr = value;
}
static uint32_t
lapic_lvt_timer(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_TIMER));
else
return (lapic->lvt_timer);
}
static void
lapic_set_lvt_timer(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_TIMER, value);
else
lapic->lvt_timer = value;
}
static uint32_t
lapic_lvt_thermal(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_THERMAL));
else
return (lapic->lvt_thermal);
}
static uint32_t
lapic_lvt_error(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_ERROR));
else
return (lapic->lvt_error);
}
static void
lapic_set_lvt_error(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_ERROR, value);
else
lapic->lvt_error = value;
}
static uint32_t
lapic_lvt_pcint(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_PCINT));
else
return (lapic->lvt_pcint);
}
static void
lapic_set_lvt_pcint(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_PCINT, value);
else
lapic->lvt_pcint = value;
}
static uint32_t
lapic_lvt_cmci(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_LVT_CMCI));
else
return (lapic->lvt_cmci);
}
static void
lapic_set_lvt_cmci(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_LVT_CMCI, value);
else
lapic->lvt_cmci = value;
}
static uint32_t
lapic_esr(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_ESR));
else
return (lapic->esr);
}
static void
lapic_set_esr(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_ESR, value);
else
lapic->esr = value;
}
static uint32_t
lapic_ccr_timer(void)
{
if (x2apic)
return (rdmsr(MSR_APIC_CCR_TIMER));
else
return (lapic->ccr_timer);
}
static void
lapic_set_dcr_timer(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_DCR_TIMER, value);
else
lapic->dcr_timer = value;
}
static void
lapic_set_icr_timer(uint32_t value)
{
if (x2apic)
wrmsr(MSR_APIC_ICR_TIMER, value);
else
lapic->icr_timer = value;
}
uint32_t
lapic_tmr(int num)
{
int msr;
volatile uint32_t *regptr;
KASSERT(num >= 0 && num < 8, ("lapic_tmr: invalid num %d", num));
if (x2apic) {
msr = MSR_APIC_TMR0 + num;
return (rdmsr(msr));
} else {
regptr = &lapic->tmr0;
return (regptr[num * 4]);
}
}
uint32_t
lapic_irr(int num)
{
int msr;
volatile uint32_t *regptr;
KASSERT(num >= 0 && num < 8, ("lapic_irr: invalid num %d", num));
if (x2apic) {
msr = MSR_APIC_IRR0 + num;
return (rdmsr(msr));
} else {
regptr = &lapic->irr0;
return (regptr[num * 4]);
}
}
uint32_t
lapic_isr(int num)
{
int msr;
volatile uint32_t *regptr;
KASSERT(num >= 0 && num < 8, ("lapic_isr: invalid num %d", num));
if (x2apic) {
msr = MSR_APIC_ISR0 + num;
return (rdmsr(msr));
} else {
regptr = &lapic->isr0;
return (regptr[num * 4]);
}
}
static uint32_t icr_hi_stashed[MAXCPU];
static uint32_t
lapic_icr_lo(void)
{
if (x2apic)
return (0);
else
return (lapic->icr_lo);
}
static void
lapic_set_icr_lo(uint32_t value)
{
if (x2apic) {
wrmsr(MSR_APIC_ICR,
(uint64_t)icr_hi_stashed[curcpu] << 32 | value);
} else
lapic->icr_lo = value;
}
static uint32_t
lapic_icr_hi(void)
{
if (x2apic)
return (0);
else
return (lapic->icr_hi);
}
static void
lapic_set_icr_hi(uint32_t value)
{
if (x2apic)
icr_hi_stashed[curcpu] = value >> APIC_ID_SHIFT; /* XXX */
else
lapic->icr_hi = value;
}
static boolean_t
lapic_missing(void)
{
if (x2apic == 0 && lapic == NULL)
return (TRUE);
else
return (FALSE);
}
int
lapic_id(void)
{
KASSERT(lapic != NULL, ("local APIC is not mapped"));
return (lapic->id >> APIC_ID_SHIFT);
if (x2apic)
return (rdmsr(MSR_APIC_ID));
else
return (lapic->id >> APIC_ID_SHIFT);
}
int
lapic_intr_pending(u_int vector)
{
volatile u_int32_t *irr;
/*
* The IRR registers are an array of 128-bit registers each of
* which only describes 32 interrupts in the low 32 bits.. Thus,
@ -596,8 +978,7 @@ lapic_intr_pending(u_int vector)
* modulus the vector by 32 to determine the individual bit to
* test.
*/
irr = &lapic->irr0;
return (irr[(vector / 32) * 4] & 1 << (vector % 32));
return (lapic_irr(vector / 32) & 1 << (vector % 32));
}
void
@ -753,13 +1134,19 @@ void
lapic_set_tpr(u_int vector)
{
#ifdef CHEAP_TPR
lapic->tpr = vector;
if (x2apic)
wrmsr(MSR_APIC_TPR, vector);
else
lapic->tpr = vector;
#else
u_int32_t tpr;
tpr = lapic->tpr & ~APIC_TPR_PRIO;
tpr = lapic_tpr() & ~APIC_TPR_PRIO;
tpr |= vector;
lapic->tpr = tpr;
if (x2apic)
wrmsr(MSR_APIC_TPR, tpr);
else
lapic->tpr = tpr;
#endif
}
@ -767,7 +1154,10 @@ void
lapic_eoi(void)
{
lapic->eoi = 0;
if (x2apic)
wrmsr(MSR_APIC_EOI, 0);
else
lapic->eoi = 0;
}
void
@ -829,7 +1219,7 @@ lapic_timer_set_divisor(u_int divisor)
KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
lapic_set_dcr_timer(lapic_timer_divisors[ffs(divisor) - 1]);
}
static void
@ -837,13 +1227,13 @@ lapic_timer_oneshot(u_int count, int enable_int)
{
u_int32_t value;
value = lapic->lvt_timer;
value = lapic_lvt_timer();
value &= ~APIC_LVTT_TM;
value |= APIC_LVTT_TM_ONE_SHOT;
if (enable_int)
value &= ~APIC_LVT_M;
lapic->lvt_timer = value;
lapic->icr_timer = count;
lapic_set_lvt_timer(value);
lapic_set_icr_timer(count);
}
static void
@ -851,13 +1241,13 @@ lapic_timer_periodic(u_int count, int enable_int)
{
u_int32_t value;
value = lapic->lvt_timer;
value = lapic_lvt_timer();
value &= ~APIC_LVTT_TM;
value |= APIC_LVTT_TM_PERIODIC;
if (enable_int)
value &= ~APIC_LVT_M;
lapic->lvt_timer = value;
lapic->icr_timer = count;
lapic_set_lvt_timer(value);
lapic_set_icr_timer(count);
}
static void
@ -865,10 +1255,10 @@ lapic_timer_stop(void)
{
u_int32_t value;
value = lapic->lvt_timer;
value = lapic_lvt_timer();
value &= ~APIC_LVTT_TM;
value |= APIC_LVT_M;
lapic->lvt_timer = value;
lapic_set_lvt_timer(value);
}
void
@ -910,8 +1300,8 @@ lapic_handle_error(void)
* to update its value to indicate any errors that have
* occurred since the previous write to the register.
*/
lapic->esr = 0;
esr = lapic->esr;
lapic_set_esr(0);
esr = lapic_esr();
printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
lapic_eoi();
@ -1179,17 +1569,17 @@ DB_SHOW_COMMAND(lapic, db_show_lapic)
uint32_t v;
db_printf("lapic ID = %d\n", lapic_id());
v = lapic->version;
v = lapic_version();
db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
v & 0xf);
db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
v = lapic->svr;
v = lapic_svr();
db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR,
v & APIC_SVR_ENABLE ? "enabled" : "disabled");
db_printf("TPR = %02x\n", lapic->tpr);
db_printf("TPR = %02x\n", lapic_tpr());
#define dump_field(prefix, index) \
dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index, \
dump_mask(__XSTRING(prefix ## index), lapic_ ## prefix(index), \
index * 32)
db_printf("In-service Interrupts:\n");
@ -1390,7 +1780,7 @@ lapic_ipi_wait(int delay)
} else
incr = 1;
for (x = 0; x < delay; x += incr) {
if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
if ((lapic_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
return (1);
ia32_pause();
}
@ -1403,7 +1793,7 @@ lapic_ipi_raw(register_t icrlo, u_int dest)
register_t value, saveintr;
/* XXX: Need more sanity checking of icrlo? */
KASSERT(lapic != NULL, ("%s called too early", __func__));
KASSERT(!lapic_missing(), ("%s called too early", __func__));
KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
("%s: invalid dest field", __func__));
KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
@ -1412,17 +1802,17 @@ lapic_ipi_raw(register_t icrlo, u_int dest)
/* Set destination in ICR HI register if it is being used. */
saveintr = intr_disable();
if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
value = lapic->icr_hi;
value = lapic_icr_hi();
value &= ~APIC_ID_MASK;
value |= dest << APIC_ID_SHIFT;
lapic->icr_hi = value;
lapic_set_icr_hi(value);
}
/* Program the contents of the IPI and dispatch it. */
value = lapic->icr_lo;
value = lapic_icr_lo();
value &= APIC_ICRLO_RESV_MASK;
value |= icrlo;
lapic->icr_lo = value;
lapic_set_icr_lo(value);
intr_restore(saveintr);
}
@ -1499,7 +1889,7 @@ lapic_ipi_vectored(u_int vector, int dest)
printf("APIC: IPI might be stuck\n");
#else /* !needsattention */
/* Wait until mesage is sent without a timeout. */
while (lapic->icr_lo & APIC_DELSTAT_PEND)
while (lapic_icr_lo() & APIC_DELSTAT_PEND)
ia32_pause();
#endif /* needsattention */
}