Use the 'Virtual Interrupt Delivery' feature of Intel VT-x if supported by

hardware. It is possible to turn this feature off and fall back to software
emulation of the APIC by setting the tunable hw.vmm.vmx.use_apic_vid to 0.

We now start handling two new types of VM-exits:

APIC-access: This is a fault-like VM-exit and is triggered when the APIC
register access is not accelerated (e.g. apic timer CCR). In response to
this we do emulate the instruction that triggered the APIC-access exit.

APIC-write: This is a trap-like VM-exit which does not require any instruction
emulation but it does require the hypervisor to emulate the access to the
specified register (e.g. icrlo register).

Introduce 'vlapic_ops' which are function pointers to vector the various
vlapic operations into processor-dependent code. The 'Virtual Interrupt
Delivery' feature installs 'ops' for setting the IRR bits in the virtual
APIC page and to return whether any interrupts are pending for this vcpu.

Tested on an "Intel Xeon E5-2620 v2" courtesy of Allan Jude at ScaleEngine.
This commit is contained in:
Neel Natu 2014-01-07 21:04:49 +00:00
parent 46d3e7e9c5
commit 88c4b8d145
6 changed files with 501 additions and 58 deletions

@ -107,6 +107,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
#define VMCS_GUEST_GS_SELECTOR 0x0000080A
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
#define VMCS_GUEST_TR_SELECTOR 0x0000080E
#define VMCS_GUEST_INTR_STATUS 0x00000810
/* 16-bit host-state fields */
#define VMCS_HOST_ES_SELECTOR 0x00000C00
@ -129,6 +130,10 @@ vmcs_write(uint32_t encoding, uint64_t val)
#define VMCS_VIRTUAL_APIC 0x00002012
#define VMCS_APIC_ACCESS 0x00002014
#define VMCS_EPTP 0x0000201A
#define VMCS_EOI_EXIT0 0x0000201C
#define VMCS_EOI_EXIT1 0x0000201E
#define VMCS_EOI_EXIT2 0x00002020
#define VMCS_EOI_EXIT3 0x00002022
/* 64-bit read-only fields */
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
@ -310,7 +315,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
#define EXIT_REASON_PAUSE 40
#define EXIT_REASON_MCE 41
#define EXIT_REASON_TPR 43
#define EXIT_REASON_APIC 44
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_GDTR_IDTR 46
#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_FAULT 48
@ -321,6 +326,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
/*
* VMCS interrupt information fields
@ -360,4 +366,15 @@ vmcs_write(uint32_t encoding, uint64_t val)
#define EPT_VIOLATION_GLA_VALID (1UL << 7)
#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
/*
* Exit qualification for APIC-access VM exit
*/
#define APIC_ACCESS_OFFSET(qual) ((qual) & 0xFFF)
#define APIC_ACCESS_TYPE(qual) (((qual) >> 12) & 0xF)
/*
* Exit qualification for APIC-write VM exit
*/
#define APIC_WRITE_OFFSET(qual) ((qual) & 0xFFF)
#endif

@ -166,12 +166,25 @@ static int cap_pause_exit;
static int cap_unrestricted_guest;
static int cap_monitor_trap;
static int cap_invpcid;
static int virtual_interrupt_delivery;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
&virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
static struct unrhdr *vpid_unr;
static u_int vpid_alloc_failed;
SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
&vpid_alloc_failed, 0, NULL);
/*
* Use the last page below 4GB as the APIC access address. This address is
* occupied by the boot firmware so it is guaranteed that it will not conflict
* with a page in system memory.
*/
#define APIC_ACCESS_ADDRESS 0xFFFFF000
static void vmx_inject_pir(struct vlapic *vlapic);
#ifdef KTR
static const char *
exit_reason_to_str(int reason)
@ -261,8 +274,8 @@ exit_reason_to_str(int reason)
return "mce";
case EXIT_REASON_TPR:
return "tpr";
case EXIT_REASON_APIC:
return "apic";
case EXIT_REASON_APIC_ACCESS:
return "apic-access";
case EXIT_REASON_GDTR_IDTR:
return "gdtridtr";
case EXIT_REASON_LDTR_TR:
@ -283,6 +296,8 @@ exit_reason_to_str(int reason)
return "wbinvd";
case EXIT_REASON_XSETBV:
return "xsetbv";
case EXIT_REASON_APIC_WRITE:
return "apic-write";
default:
snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
return (reasonbuf);
@ -461,9 +476,9 @@ vmx_restore(void)
static int
vmx_init(void)
{
int error;
int error, use_tpr_shadow;
uint64_t fixed0, fixed1, feature_control;
uint32_t tmp;
uint32_t tmp, procbased2_vid_bits;
/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
if (!(cpu_feature2 & CPUID2_VMX)) {
@ -597,6 +612,31 @@ vmx_init(void)
MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
&tmp) == 0);
/*
* Check support for virtual interrupt delivery.
*/
procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
PROCBASED2_VIRTUALIZE_X2APIC_MODE |
PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
&tmp) == 0);
error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
procbased2_vid_bits, 0, &tmp);
if (error == 0 && use_tpr_shadow) {
virtual_interrupt_delivery = 1;
TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
&virtual_interrupt_delivery);
}
if (virtual_interrupt_delivery) {
procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
procbased_ctls2 |= procbased2_vid_bits;
procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
}
/* Initialize EPT */
error = ept_init();
@ -743,6 +783,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vpid_alloc(vpid, VM_MAXCPU);
if (virtual_interrupt_delivery) {
error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
APIC_ACCESS_ADDRESS);
/* XXX this should really return an error to the caller */
KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
}
for (i = 0; i < VM_MAXCPU; i++) {
vmcs = &vmx->vmcs[i];
vmcs->identifier = vmx_revision();
@ -766,6 +813,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
error += vmwrite(VMCS_VPID, vpid[i]);
if (virtual_interrupt_delivery) {
error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
error += vmwrite(VMCS_VIRTUAL_APIC,
vtophys(&vmx->apic_page[i]));
error += vmwrite(VMCS_EOI_EXIT0, 0);
error += vmwrite(VMCS_EOI_EXIT1, 0);
error += vmwrite(VMCS_EOI_EXIT2, 0);
error += vmwrite(VMCS_EOI_EXIT3, 0);
}
VMCLEAR(vmcs);
KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
@ -988,6 +1044,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
if (vmx_inject_nmi(vmx, vcpu))
return;
if (virtual_interrupt_delivery) {
vmx_inject_pir(vlapic);
return;
}
/* Ask the local apic for a vector to inject */
if (!vlapic_pending_intr(vlapic, &vector))
return;
@ -1180,11 +1241,141 @@ ept_emulation_fault(uint64_t ept_qual)
return (TRUE);
}
static int
vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
{
int error, handled, offset;
bool retu;
if (!virtual_interrupt_delivery)
return (UNHANDLED);
handled = 1;
offset = APIC_WRITE_OFFSET(qual);
switch (offset) {
case APIC_OFFSET_ID:
vlapic_id_write_handler(vlapic);
break;
case APIC_OFFSET_LDR:
vlapic_ldr_write_handler(vlapic);
break;
case APIC_OFFSET_DFR:
vlapic_dfr_write_handler(vlapic);
break;
case APIC_OFFSET_SVR:
vlapic_svr_write_handler(vlapic);
break;
case APIC_OFFSET_ESR:
vlapic_esr_write_handler(vlapic);
break;
case APIC_OFFSET_ICR_LOW:
retu = false;
error = vlapic_icrlo_write_handler(vlapic, &retu);
if (error != 0 || retu)
handled = 0;
break;
case APIC_OFFSET_CMCI_LVT:
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
vlapic_lvt_write_handler(vlapic, offset);
break;
case APIC_OFFSET_TIMER_ICR:
vlapic_icrtmr_write_handler(vlapic);
break;
case APIC_OFFSET_TIMER_DCR:
vlapic_dcr_write_handler(vlapic);
break;
default:
handled = 0;
break;
}
return (handled);
}
static bool
apic_access_fault(uint64_t gpa)
{
if (virtual_interrupt_delivery &&
(gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
return (true);
else
return (false);
}
static int
vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
{
uint64_t qual;
int access_type, offset, allowed;
if (!virtual_interrupt_delivery)
return (UNHANDLED);
qual = vmexit->u.vmx.exit_qualification;
access_type = APIC_ACCESS_TYPE(qual);
offset = APIC_ACCESS_OFFSET(qual);
allowed = 0;
if (access_type == 0) {
/*
* Read data access to the following registers is expected.
*/
switch (offset) {
case APIC_OFFSET_APR:
case APIC_OFFSET_PPR:
case APIC_OFFSET_RRR:
case APIC_OFFSET_CMCI_LVT:
case APIC_OFFSET_TIMER_CCR:
allowed = 1;
break;
default:
break;
}
} else if (access_type == 1) {
/*
* Write data access to the following registers is expected.
*/
switch (offset) {
case APIC_OFFSET_VER:
case APIC_OFFSET_APR:
case APIC_OFFSET_PPR:
case APIC_OFFSET_RRR:
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
case APIC_OFFSET_CMCI_LVT:
case APIC_OFFSET_TIMER_CCR:
allowed = 1;
break;
default:
break;
}
}
if (allowed) {
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
}
/*
* Regardless of whether the APIC-access is allowed this handler
* always returns UNHANDLED:
* - if the access is allowed then it is handled by emulating the
* instruction that caused the VM-exit (outside the critical section)
* - if the access is not allowed then it will be converted to an
* exitcode of VM_EXITCODE_VMX and will be dealt with in userland.
*/
return (UNHANDLED);
}
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
int error, handled;
struct vmxctx *vmxctx;
struct vlapic *vlapic;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
uint64_t qual, gpa;
bool retu;
@ -1209,7 +1400,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
switch (reason) {
case EXIT_REASON_EPT_FAULT:
case EXIT_REASON_EPT_MISCONFIG:
case EXIT_REASON_APIC:
case EXIT_REASON_APIC_ACCESS:
case EXIT_REASON_TASK_SWITCH:
case EXIT_REASON_EXCEPTION:
idtvec_info = vmcs_idt_vectoring_info();
@ -1331,7 +1522,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
* this must be an instruction that accesses MMIO space.
*/
gpa = vmcs_gpa();
if (vm_mem_allocated(vmx->vm, gpa)) {
if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
@ -1342,6 +1533,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
}
break;
case EXIT_REASON_APIC_ACCESS:
handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
break;
case EXIT_REASON_APIC_WRITE:
/*
* APIC-write VM exit is trap-like so the %rip is already
* pointing to the next instruction.
*/
vmexit->inst_length = 0;
vlapic = vm_lapic(vmx->vm, vcpu);
handled = vmx_handle_apic_write(vlapic, qual);
break;
default:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
@ -1532,6 +1735,9 @@ vmx_vmcleanup(void *arg)
int i, error;
struct vmx *vmx = arg;
if (virtual_interrupt_delivery)
vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
for (i = 0; i < VM_MAXCPU; i++)
vpid_free(vmx->state[i].vpid);
@ -1895,6 +2101,195 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
return (retval);
}
/*
* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM).
*/
struct pir_desc {
uint64_t pir[4];
uint64_t pending;
uint64_t unused[3];
} __aligned(64);
CTASSERT(sizeof(struct pir_desc) == 64);
struct vlapic_vtx {
struct vlapic vlapic;
struct pir_desc pir_desc;
};
#define VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg) \
do { \
VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d", \
level ? "level" : "edge", vector); \
VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]); \
VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]); \
VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]); \
VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]); \
VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
} while (0)
/*
* vlapic->ops handlers that utilize the APICv hardware assist described in
* Chapter 29 of the Intel SDM.
*/
static int
vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
{
struct vlapic_vtx *vlapic_vtx;
struct pir_desc *pir_desc;
uint64_t mask;
int idx, notify;
/*
* XXX need to deal with level triggered interrupts
*/
vlapic_vtx = (struct vlapic_vtx *)vlapic;
pir_desc = &vlapic_vtx->pir_desc;
/*
* Keep track of interrupt requests in the PIR descriptor. This is
* because the virtual APIC page pointed to by the VMCS cannot be
* modified if the vcpu is running.
*/
idx = vector / 64;
mask = 1UL << (vector % 64);
atomic_set_long(&pir_desc->pir[idx], mask);
notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
level, "vmx_set_intr_ready");
return (notify);
}
static int
vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
{
struct vlapic_vtx *vlapic_vtx;
struct pir_desc *pir_desc;
struct LAPIC *lapic;
uint64_t pending, pirval;
uint32_t ppr, vpr;
int i;
/*
* This function is only expected to be called from the 'HLT' exit
* handler which does not care about the vector that is pending.
*/
KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
vlapic_vtx = (struct vlapic_vtx *)vlapic;
pir_desc = &vlapic_vtx->pir_desc;
pending = atomic_load_acq_long(&pir_desc->pending);
if (!pending)
return (0); /* common case */
/*
* If there is an interrupt pending then it will be recognized only
* if its priority is greater than the processor priority.
*
* Special case: if the processor priority is zero then any pending
* interrupt will be recognized.
*/
lapic = vlapic->apic_page;
ppr = lapic->ppr & 0xf0;
if (ppr == 0)
return (1);
VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
lapic->ppr);
for (i = 3; i >= 0; i--) {
pirval = pir_desc->pir[i];
if (pirval != 0) {
vpr = (i * 64 + flsl(pirval) - 1) & 0xf0;
return (vpr > ppr);
}
}
return (0);
}
static void
vmx_intr_accepted(struct vlapic *vlapic, int vector)
{
panic("vmx_intr_accepted: not expected to be called");
}
/*
* Transfer the pending interrupts in the PIR descriptor to the IRR
* in the virtual APIC page.
*/
static void
vmx_inject_pir(struct vlapic *vlapic)
{
struct vlapic_vtx *vlapic_vtx;
struct pir_desc *pir_desc;
struct LAPIC *lapic;
uint64_t val, pirval;
int rvi, pirbase;
uint16_t intr_status_old, intr_status_new;
vlapic_vtx = (struct vlapic_vtx *)vlapic;
pir_desc = &vlapic_vtx->pir_desc;
if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
"no posted interrupt pending");
return;
}
pirval = 0;
lapic = vlapic->apic_page;
val = atomic_readandclear_long(&pir_desc->pir[0]);
if (val != 0) {
lapic->irr0 |= val;
lapic->irr1 |= val >> 32;
pirbase = 0;
pirval = val;
}
val = atomic_readandclear_long(&pir_desc->pir[1]);
if (val != 0) {
lapic->irr2 |= val;
lapic->irr3 |= val >> 32;
pirbase = 64;
pirval = val;
}
val = atomic_readandclear_long(&pir_desc->pir[2]);
if (val != 0) {
lapic->irr4 |= val;
lapic->irr5 |= val >> 32;
pirbase = 128;
pirval = val;
}
val = atomic_readandclear_long(&pir_desc->pir[3]);
if (val != 0) {
lapic->irr6 |= val;
lapic->irr7 |= val >> 32;
pirbase = 192;
pirval = val;
}
VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
/*
* Update RVI so the processor can evaluate pending virtual
* interrupts on VM-entry.
*/
if (pirval != 0) {
rvi = pirbase + flsl(pirval) - 1;
intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
intr_status_new = (intr_status_old & 0xFF00) | rvi;
if (intr_status_new > intr_status_old) {
vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
"guest_intr_status changed from 0x%04x to 0x%04x",
intr_status_old, intr_status_new);
}
}
}
static struct vlapic *
vmx_vlapic_init(void *arg, int vcpuid)
{
@ -1903,11 +2298,17 @@ vmx_vlapic_init(void *arg, int vcpuid)
vmx = arg;
vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
vlapic->vm = vmx->vm;
vlapic->vcpuid = vcpuid;
vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
if (virtual_interrupt_delivery) {
vlapic->ops.set_intr_ready = vmx_set_intr_ready;
vlapic->ops.pending_intr = vmx_pending_intr;
vlapic->ops.intr_accepted = vmx_intr_accepted;
}
vlapic_init(vlapic);
return (vlapic);

@ -34,6 +34,7 @@
#define PINBASED_NMI_EXITING (1 << 3)
#define PINBASED_VIRTUAL_NMI (1 << 5)
#define PINBASED_PREMPTION_TIMER (1 << 6)
#define PINBASED_POSTED_INTERRUPT (1 << 7)
/* Primary Processor-Based VM-Execution Controls */
#define PROCBASED_INT_WINDOW_EXITING (1 << 2)
@ -59,16 +60,18 @@
#define PROCBASED_SECONDARY_CONTROLS (1U << 31)
/* Secondary Processor-Based VM-Execution Controls */
#define PROCBASED2_VIRTUALIZE_APIC (1 << 0)
#define PROCBASED2_ENABLE_EPT (1 << 1)
#define PROCBASED2_DESC_TABLE_EXITING (1 << 2)
#define PROCBASED2_ENABLE_RDTSCP (1 << 3)
#define PROCBASED2_VIRTUALIZE_X2APIC (1 << 4)
#define PROCBASED2_ENABLE_VPID (1 << 5)
#define PROCBASED2_WBINVD_EXITING (1 << 6)
#define PROCBASED2_UNRESTRICTED_GUEST (1 << 7)
#define PROCBASED2_PAUSE_LOOP_EXITING (1 << 10)
#define PROCBASED2_ENABLE_INVPCID (1 << 12)
#define PROCBASED2_VIRTUALIZE_APIC_ACCESSES (1 << 0)
#define PROCBASED2_ENABLE_EPT (1 << 1)
#define PROCBASED2_DESC_TABLE_EXITING (1 << 2)
#define PROCBASED2_ENABLE_RDTSCP (1 << 3)
#define PROCBASED2_VIRTUALIZE_X2APIC_MODE (1 << 4)
#define PROCBASED2_ENABLE_VPID (1 << 5)
#define PROCBASED2_WBINVD_EXITING (1 << 6)
#define PROCBASED2_UNRESTRICTED_GUEST (1 << 7)
#define PROCBASED2_APIC_REGISTER_VIRTUALIZATION (1 << 8)
#define PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY (1 << 9)
#define PROCBASED2_PAUSE_LOOP_EXITING (1 << 10)
#define PROCBASED2_ENABLE_INVPCID (1 << 12)
/* VM Exit Controls */
#define VM_EXIT_SAVE_DEBUG_CONTROLS (1 << 2)

@ -54,43 +54,6 @@ __FBSDID("$FreeBSD$");
#include "vlapic_priv.h"
#include "vioapic.h"
#define VLAPIC_CTR0(vlapic, format) \
VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
#define VLAPIC_CTR1(vlapic, format, p1) \
VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
#define VLAPIC_CTR2(vlapic, format, p1, p2) \
VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
#define VLAPIC_CTR_IRR(vlapic, msg) \
do { \
uint32_t *irrptr = &(vlapic)->apic_page->irr0; \
irrptr[0] = irrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \
} while (0)
#define VLAPIC_CTR_ISR(vlapic, msg) \
do { \
uint32_t *isrptr = &(vlapic)->apic_page->isr0; \
isrptr[0] = isrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
} while (0)
#define PRIO(x) ((x) >> 4)
#define VLAPIC_VERSION (16)
@ -312,6 +275,9 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
return (1);
}
if (vlapic->ops.set_intr_ready)
return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
idx = (vector / 32) * 4;
mask = 1 << (vector % 32);
@ -1040,6 +1006,9 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
int idx, i, bitpos, vector;
uint32_t *irrptr, val;
if (vlapic->ops.pending_intr)
return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
irrptr = &lapic->irr0;
/*
@ -1071,6 +1040,9 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
uint32_t *irrptr, *isrptr;
int idx, stk_top;
if (vlapic->ops.intr_accepted)
return ((*vlapic->ops.intr_accepted)(vlapic, vector));
/*
* clear the ready bit for vector being accepted in irr
* and set the vector as in service in isr.
@ -1469,7 +1441,10 @@ vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
* If neither of these features are available then fallback to
* sending an IPI to 'hostcpu'.
*/
ipi_cpu(hostcpu, vmm_ipinum);
if (vlapic->ops.post_intr)
(*vlapic->ops.post_intr)(vlapic, hostcpu);
else
ipi_cpu(hostcpu, vmm_ipinum);
}
bool

@ -82,6 +82,43 @@
#define APIC_OFFSET_TIMER_CCR 0x390 /* Timer's Current Count */
#define APIC_OFFSET_TIMER_DCR 0x3E0 /* Timer's Divide Configuration */
#define VLAPIC_CTR0(vlapic, format) \
VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
#define VLAPIC_CTR1(vlapic, format, p1) \
VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
#define VLAPIC_CTR2(vlapic, format, p1, p2) \
VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
#define VLAPIC_CTR_IRR(vlapic, msg) \
do { \
uint32_t *irrptr = &(vlapic)->apic_page->irr0; \
irrptr[0] = irrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \
} while (0)
#define VLAPIC_CTR_ISR(vlapic, msg) \
do { \
uint32_t *isrptr = &(vlapic)->apic_page->isr0; \
isrptr[0] = isrptr[0]; /* silence compiler */ \
VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
} while (0)
enum boot_state {
BS_INIT,
BS_SIPI,
@ -95,10 +132,20 @@ enum boot_state {
#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI
struct vlapic;
struct vlapic_ops {
int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
void (*intr_accepted)(struct vlapic *vlapic, int vector);
void (*post_intr)(struct vlapic *vlapic, int hostcpu);
};
struct vlapic {
struct vm *vm;
int vcpuid;
struct LAPIC *apic_page;
struct vlapic_ops ops;
uint32_t esr_pending;
int esr_firing;

@ -321,6 +321,7 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->vmspace = vmspace;
vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
vm->vioapic = vioapic_init(vm);
vm->vhpet = vhpet_init(vm);
@ -331,7 +332,6 @@ vm_create(const char *name, struct vm **retvm)
}
vm_activate_cpu(vm, BSP);
vm->vmspace = vmspace;
*retvm = vm;
return (0);