2011-05-13 04:54:01 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 2011 NetApp, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
2012-04-26 07:52:28 +00:00
|
|
|
#include <sys/systm.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/pcpu.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/proc.h>
|
2013-10-05 21:22:35 +00:00
|
|
|
#include <sys/rwlock.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
#include <sys/sched.h>
|
|
|
|
#include <sys/smp.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
|
|
|
|
#include <vm/vm.h>
|
2013-10-05 21:22:35 +00:00
|
|
|
#include <vm/vm_object.h>
|
|
|
|
#include <vm/vm_page.h>
|
|
|
|
#include <vm/pmap.h>
|
|
|
|
#include <vm/vm_map.h>
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
#include <vm/vm_param.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2014-01-29 21:23:37 +00:00
|
|
|
#include <machine/cpu.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
#include <machine/vm.h>
|
|
|
|
#include <machine/pcb.h>
|
2012-10-12 18:32:44 +00:00
|
|
|
#include <machine/smp.h>
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
#include <x86/psl.h>
|
2011-05-14 20:35:01 +00:00
|
|
|
#include <x86/apicreg.h>
|
2013-10-05 21:22:35 +00:00
|
|
|
#include <machine/vmparam.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
#include <machine/vmm.h>
|
2014-01-23 20:21:39 +00:00
|
|
|
#include <machine/vmm_dev.h>
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
#include "vmm_ktr.h"
|
2012-10-29 01:51:24 +00:00
|
|
|
#include "vmm_host.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
#include "vmm_mem.h"
|
|
|
|
#include "vmm_util.h"
|
2014-01-23 20:21:39 +00:00
|
|
|
#include "vhpet.h"
|
|
|
|
#include "vioapic.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
#include "vlapic.h"
|
|
|
|
#include "vmm_msr.h"
|
|
|
|
#include "vmm_ipi.h"
|
|
|
|
#include "vmm_stat.h"
|
2012-10-25 04:29:21 +00:00
|
|
|
#include "vmm_lapic.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
#include "io/ppt.h"
|
|
|
|
#include "io/iommu.h"
|
|
|
|
|
|
|
|
struct vlapic;
|
|
|
|
|
|
|
|
struct vcpu {
|
|
|
|
int flags;
|
2012-10-12 18:32:44 +00:00
|
|
|
enum vcpu_state state;
|
|
|
|
struct mtx mtx;
|
2011-05-13 04:54:01 +00:00
|
|
|
int hostcpu; /* host cpuid this vcpu last ran on */
|
|
|
|
uint64_t guest_msrs[VMM_MSR_NUM];
|
|
|
|
struct vlapic *vlapic;
|
|
|
|
int vcpuid;
|
2012-04-26 07:52:28 +00:00
|
|
|
struct savefpu *guestfpu; /* guest fpu state */
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
uint64_t guest_xcr0;
|
2011-05-13 04:54:01 +00:00
|
|
|
void *stats;
|
2012-09-24 19:32:24 +00:00
|
|
|
struct vm_exit exitinfo;
|
2012-09-25 19:08:51 +00:00
|
|
|
enum x2apic_state x2apic_state;
|
2012-10-24 02:54:21 +00:00
|
|
|
int nmi_pending;
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
struct vm_exception exception;
|
|
|
|
int exception_pending;
|
2011-05-13 04:54:01 +00:00
|
|
|
};
|
|
|
|
|
2012-10-25 04:29:21 +00:00
|
|
|
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
|
|
|
|
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
|
|
|
|
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
|
2013-10-05 21:22:35 +00:00
|
|
|
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
|
2012-10-12 18:32:44 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
struct mem_seg {
|
|
|
|
vm_paddr_t gpa;
|
|
|
|
size_t len;
|
|
|
|
boolean_t wired;
|
|
|
|
vm_object_t object;
|
|
|
|
};
|
2011-05-13 04:54:01 +00:00
|
|
|
#define VM_MAX_MEMORY_SEGMENTS 2
|
|
|
|
|
|
|
|
struct vm {
|
|
|
|
void *cookie; /* processor-specific data */
|
|
|
|
void *iommu; /* iommu-specific data */
|
2014-01-23 20:21:39 +00:00
|
|
|
struct vhpet *vhpet; /* virtual HPET */
|
|
|
|
struct vioapic *vioapic; /* virtual ioapic */
|
2013-10-05 21:22:35 +00:00
|
|
|
struct vmspace *vmspace; /* guest's address space */
|
2011-05-13 04:54:01 +00:00
|
|
|
struct vcpu vcpu[VM_MAXCPU];
|
|
|
|
int num_mem_segs;
|
2013-10-05 21:22:35 +00:00
|
|
|
struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
|
2011-05-13 04:54:01 +00:00
|
|
|
char name[VM_MAX_NAMELEN];
|
|
|
|
|
|
|
|
/*
|
2011-06-28 06:26:03 +00:00
|
|
|
* Set of active vcpus.
|
2011-05-13 04:54:01 +00:00
|
|
|
* An active vcpu is one that has been started implicitly (BSP) or
|
|
|
|
* explicitly (AP) by sending it a startup ipi.
|
|
|
|
*/
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
volatile cpuset_t active_cpus;
|
|
|
|
|
|
|
|
struct mtx rendezvous_mtx;
|
|
|
|
cpuset_t rendezvous_req_cpus;
|
|
|
|
cpuset_t rendezvous_done_cpus;
|
|
|
|
void *rendezvous_arg;
|
|
|
|
vm_rendezvous_func_t rendezvous_func;
|
2011-05-13 04:54:01 +00:00
|
|
|
};
|
|
|
|
|
2013-04-12 01:16:52 +00:00
|
|
|
static int vmm_initialized;
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
static struct vmm_ops *ops;
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
|
2011-05-13 04:54:01 +00:00
|
|
|
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
|
2014-01-29 21:23:37 +00:00
|
|
|
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
#define VMRUN(vmi, vcpu, rip, pmap, rptr) \
|
|
|
|
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO)
|
2011-05-13 04:54:01 +00:00
|
|
|
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
|
2013-10-05 21:22:35 +00:00
|
|
|
#define VMSPACE_ALLOC(min, max) \
|
|
|
|
(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
|
|
|
|
#define VMSPACE_FREE(vmspace) \
|
|
|
|
(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
|
2011-05-13 04:54:01 +00:00
|
|
|
#define VMGETREG(vmi, vcpu, num, retval) \
|
|
|
|
(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
|
|
|
|
#define VMSETREG(vmi, vcpu, num, val) \
|
|
|
|
(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
|
|
|
|
#define VMGETDESC(vmi, vcpu, num, desc) \
|
|
|
|
(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
|
|
|
|
#define VMSETDESC(vmi, vcpu, num, desc) \
|
|
|
|
(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
|
|
|
|
#define VMGETCAP(vmi, vcpu, num, retval) \
|
|
|
|
(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
|
|
|
|
#define VMSETCAP(vmi, vcpu, num, val) \
|
|
|
|
(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
#define VLAPIC_INIT(vmi, vcpu) \
|
|
|
|
(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
|
|
|
|
#define VLAPIC_CLEANUP(vmi, vlapic) \
|
|
|
|
(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-01-04 02:49:12 +00:00
|
|
|
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
|
|
|
|
#define fpu_stop_emulating() clts()
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
static MALLOC_DEFINE(M_VM, "vm", "vm");
|
|
|
|
CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
|
|
|
|
|
|
|
|
/* statistics */
|
2013-03-16 22:40:20 +00:00
|
|
|
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
|
2011-05-13 04:54:01 +00:00
|
|
|
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
|
|
|
|
|
|
|
|
static int vmm_ipinum;
|
|
|
|
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
|
|
|
|
"IPI vector used for vcpu notifications");
|
|
|
|
|
|
|
|
static void vm_deactivate_cpu(struct vm *vm, int vcpuid);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
static void
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vcpu_cleanup(struct vm *vm, int i)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
struct vcpu *vcpu = &vm->vcpu[i];
|
|
|
|
|
|
|
|
VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
|
2012-04-26 07:52:28 +00:00
|
|
|
vmm_stat_free(vcpu->stats);
|
|
|
|
fpu_save_area_free(vcpu->guestfpu);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vcpu_init(struct vm *vm, uint32_t vcpu_id)
|
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpu_id];
|
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
vcpu_lock_init(vcpu);
|
|
|
|
vcpu->hostcpu = NOCPU;
|
2011-05-13 04:54:01 +00:00
|
|
|
vcpu->vcpuid = vcpu_id;
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
|
2014-06-13 19:10:40 +00:00
|
|
|
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
|
2012-04-26 07:52:28 +00:00
|
|
|
vcpu->guestfpu = fpu_save_area_alloc();
|
|
|
|
fpu_save_area_reset(vcpu->guestfpu);
|
2011-05-13 04:54:01 +00:00
|
|
|
vcpu->stats = vmm_stat_alloc();
|
|
|
|
}
|
|
|
|
|
2012-09-24 19:32:24 +00:00
|
|
|
struct vm_exit *
|
|
|
|
vm_exitinfo(struct vm *vm, int cpuid)
|
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
if (cpuid < 0 || cpuid >= VM_MAXCPU)
|
|
|
|
panic("vm_exitinfo: invalid cpuid %d", cpuid);
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[cpuid];
|
|
|
|
|
|
|
|
return (&vcpu->exitinfo);
|
|
|
|
}
|
|
|
|
|
2014-01-29 21:23:37 +00:00
|
|
|
static void
|
|
|
|
vmm_resume(void)
|
|
|
|
{
|
|
|
|
VMM_RESUME();
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
static int
|
|
|
|
vmm_init(void)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
2012-10-29 01:51:24 +00:00
|
|
|
vmm_host_state_init();
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
|
|
|
|
vmm_ipinum = vmm_ipi_alloc();
|
|
|
|
if (vmm_ipinum == 0)
|
|
|
|
vmm_ipinum = IPI_AST;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
error = vmm_mem_init();
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (vmm_is_intel())
|
|
|
|
ops = &vmm_ops_intel;
|
|
|
|
else if (vmm_is_amd())
|
|
|
|
ops = &vmm_ops_amd;
|
|
|
|
else
|
|
|
|
return (ENXIO);
|
|
|
|
|
|
|
|
vmm_msr_init();
|
2014-01-29 21:23:37 +00:00
|
|
|
vmm_resume_p = vmm_resume;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
return (VMM_INIT(vmm_ipinum));
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmm_handler(module_t mod, int what, void *arg)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
switch (what) {
|
|
|
|
case MOD_LOAD:
|
|
|
|
vmmdev_init();
|
2014-06-04 17:57:48 +00:00
|
|
|
if (ppt_avail_devices() > 0)
|
|
|
|
iommu_init();
|
2011-05-13 04:54:01 +00:00
|
|
|
error = vmm_init();
|
2013-04-12 01:16:52 +00:00
|
|
|
if (error == 0)
|
|
|
|
vmm_initialized = 1;
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case MOD_UNLOAD:
|
2012-10-11 19:39:54 +00:00
|
|
|
error = vmmdev_cleanup();
|
|
|
|
if (error == 0) {
|
2014-01-29 21:23:37 +00:00
|
|
|
vmm_resume_p = NULL;
|
2012-10-11 19:39:54 +00:00
|
|
|
iommu_cleanup();
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
if (vmm_ipinum != IPI_AST)
|
|
|
|
vmm_ipi_free(vmm_ipinum);
|
2012-10-11 19:39:54 +00:00
|
|
|
error = VMM_CLEANUP();
|
2013-08-01 05:59:28 +00:00
|
|
|
/*
|
|
|
|
* Something bad happened - prevent new
|
|
|
|
* VMs from being created
|
|
|
|
*/
|
|
|
|
if (error)
|
|
|
|
vmm_initialized = 0;
|
2012-10-11 19:39:54 +00:00
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static moduledata_t vmm_kmod = {
|
|
|
|
"vmm",
|
|
|
|
vmm_handler,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2013-01-21 01:33:10 +00:00
|
|
|
* vmm initialization has the following dependencies:
|
|
|
|
*
|
|
|
|
* - iommu initialization must happen after the pci passthru driver has had
|
|
|
|
* a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
|
|
|
|
*
|
|
|
|
* - VT-x initialization requires smp_rendezvous() and therefore must happen
|
|
|
|
* after SMP is fully functional (after SI_SUB_SMP).
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2013-01-21 01:33:10 +00:00
|
|
|
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
|
2011-05-13 04:54:01 +00:00
|
|
|
MODULE_VERSION(vmm, 1);
|
|
|
|
|
2013-04-12 01:16:52 +00:00
|
|
|
int
|
|
|
|
vm_create(const char *name, struct vm **retvm)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct vm *vm;
|
2013-10-05 21:22:35 +00:00
|
|
|
struct vmspace *vmspace;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
const int BSP = 0;
|
|
|
|
|
2013-04-12 01:16:52 +00:00
|
|
|
/*
|
|
|
|
* If vmm.ko could not be successfully initialized then don't attempt
|
|
|
|
* to create the virtual machine.
|
|
|
|
*/
|
|
|
|
if (!vmm_initialized)
|
|
|
|
return (ENXIO);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
|
2013-04-12 01:16:52 +00:00
|
|
|
return (EINVAL);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
|
|
|
|
if (vmspace == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
|
|
|
|
strcpy(vm->name, name);
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vm->vmspace = vmspace;
|
|
|
|
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
|
2013-10-05 21:22:35 +00:00
|
|
|
vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
|
2014-01-23 20:21:39 +00:00
|
|
|
vm->vioapic = vioapic_init(vm);
|
|
|
|
vm->vhpet = vhpet_init(vm);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
for (i = 0; i < VM_MAXCPU; i++) {
|
|
|
|
vcpu_init(vm, i);
|
|
|
|
guest_msrs_init(vm, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
vm_activate_cpu(vm, BSP);
|
|
|
|
|
2013-04-12 01:16:52 +00:00
|
|
|
*retvm = vm;
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2012-10-04 02:27:14 +00:00
|
|
|
static void
|
2013-10-05 21:22:35 +00:00
|
|
|
vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
|
2012-10-04 02:27:14 +00:00
|
|
|
{
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if (seg->object != NULL)
|
|
|
|
vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
|
2012-10-08 23:41:26 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
bzero(seg, sizeof(*seg));
|
2012-10-04 02:27:14 +00:00
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
void
|
|
|
|
vm_destroy(struct vm *vm)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ppt_unassign_all(vm);
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if (vm->iommu != NULL)
|
|
|
|
iommu_destroy_domain(vm->iommu);
|
|
|
|
|
2014-01-23 20:21:39 +00:00
|
|
|
vhpet_cleanup(vm->vhpet);
|
|
|
|
vioapic_cleanup(vm->vioapic);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
for (i = 0; i < vm->num_mem_segs; i++)
|
2012-10-04 02:27:14 +00:00
|
|
|
vm_free_mem_seg(vm, &vm->mem_segs[i]);
|
|
|
|
|
|
|
|
vm->num_mem_segs = 0;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
for (i = 0; i < VM_MAXCPU; i++)
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vcpu_cleanup(vm, i);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
VMSPACE_FREE(vm->vmspace);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
VMCLEANUP(vm->cookie);
|
|
|
|
|
|
|
|
free(vm, M_VM);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
vm_name(struct vm *vm)
|
|
|
|
{
|
|
|
|
return (vm->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
|
|
|
{
|
2013-10-05 21:22:35 +00:00
|
|
|
vm_object_t obj;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
else
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
|
|
|
|
{
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
vmm_mmio_free(vm->vmspace, gpa, len);
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
boolean_t
|
|
|
|
vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
|
2012-09-29 01:15:45 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
vm_paddr_t gpabase, gpalimit;
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
gpabase = vm->mem_segs[i].gpa;
|
|
|
|
gpalimit = gpabase + vm->mem_segs[i].len;
|
|
|
|
if (gpa >= gpabase && gpa < gpalimit)
|
2013-10-05 21:22:35 +00:00
|
|
|
return (TRUE); /* 'gpa' is regular memory */
|
2012-09-29 01:15:45 +00:00
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if (ppt_is_mmio(vm, gpa))
|
|
|
|
return (TRUE); /* 'gpa' is pci passthru mmio */
|
|
|
|
|
|
|
|
return (FALSE);
|
2012-09-29 01:15:45 +00:00
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
int
|
2012-09-29 01:15:45 +00:00
|
|
|
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2013-10-05 21:22:35 +00:00
|
|
|
int available, allocated;
|
|
|
|
struct mem_seg *seg;
|
|
|
|
vm_object_t object;
|
|
|
|
vm_paddr_t g;
|
2012-09-29 01:15:45 +00:00
|
|
|
|
|
|
|
if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
|
|
|
|
return (EINVAL);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-09-29 01:15:45 +00:00
|
|
|
available = allocated = 0;
|
|
|
|
g = gpa;
|
|
|
|
while (g < gpa + len) {
|
2013-10-05 21:22:35 +00:00
|
|
|
if (vm_mem_allocated(vm, g))
|
2012-09-29 01:15:45 +00:00
|
|
|
allocated++;
|
2013-10-05 21:22:35 +00:00
|
|
|
else
|
|
|
|
available++;
|
2012-09-29 01:15:45 +00:00
|
|
|
|
|
|
|
g += PAGE_SIZE;
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
/*
|
2012-09-29 01:15:45 +00:00
|
|
|
* If there are some allocated and some available pages in the address
|
|
|
|
* range then it is an error.
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2012-09-29 01:15:45 +00:00
|
|
|
if (allocated && available)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the entire address range being requested has already been
|
|
|
|
* allocated then there isn't anything more to do.
|
|
|
|
*/
|
|
|
|
if (allocated && available == 0)
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
|
|
|
|
return (E2BIG);
|
|
|
|
|
2012-10-04 02:27:14 +00:00
|
|
|
seg = &vm->mem_segs[vm->num_mem_segs];
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
2012-10-04 02:27:14 +00:00
|
|
|
seg->gpa = gpa;
|
2013-10-05 21:22:35 +00:00
|
|
|
seg->len = len;
|
|
|
|
seg->object = object;
|
|
|
|
seg->wired = FALSE;
|
2012-10-04 02:27:14 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
vm->num_mem_segs++;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_gpa_unwire(struct vm *vm)
|
|
|
|
{
|
|
|
|
int i, rv;
|
|
|
|
struct mem_seg *seg;
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
seg = &vm->mem_segs[i];
|
|
|
|
if (!seg->wired)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rv = vm_map_unwire(&vm->vmspace->vm_map,
|
|
|
|
seg->gpa, seg->gpa + seg->len,
|
|
|
|
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
|
|
|
|
KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
|
|
|
|
"%#lx/%ld could not be unwired: %d",
|
|
|
|
vm_name(vm), seg->gpa, seg->len, rv));
|
|
|
|
|
|
|
|
seg->wired = FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vm_gpa_wire(struct vm *vm)
|
|
|
|
{
|
|
|
|
int i, rv;
|
|
|
|
struct mem_seg *seg;
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
seg = &vm->mem_segs[i];
|
|
|
|
if (seg->wired)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* XXX rlimits? */
|
|
|
|
rv = vm_map_wire(&vm->vmspace->vm_map,
|
|
|
|
seg->gpa, seg->gpa + seg->len,
|
|
|
|
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
|
|
|
|
if (rv != KERN_SUCCESS)
|
2012-10-04 02:27:14 +00:00
|
|
|
break;
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
seg->wired = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i < vm->num_mem_segs) {
|
2012-10-08 23:41:26 +00:00
|
|
|
/*
|
2013-10-05 21:22:35 +00:00
|
|
|
* Undo the wiring before returning an error.
|
2012-10-08 23:41:26 +00:00
|
|
|
*/
|
2013-10-05 21:22:35 +00:00
|
|
|
vm_gpa_unwire(vm);
|
|
|
|
return (EAGAIN);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_iommu_modify(struct vm *vm, boolean_t map)
|
|
|
|
{
|
|
|
|
int i, sz;
|
|
|
|
vm_paddr_t gpa, hpa;
|
|
|
|
struct mem_seg *seg;
|
|
|
|
void *vp, *cookie, *host_domain;
|
2012-10-04 02:27:14 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
sz = PAGE_SIZE;
|
|
|
|
host_domain = iommu_host_domain();
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
seg = &vm->mem_segs[i];
|
|
|
|
KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
|
|
|
|
vm_name(vm), seg->gpa, seg->len));
|
|
|
|
|
|
|
|
gpa = seg->gpa;
|
|
|
|
while (gpa < seg->gpa + seg->len) {
|
|
|
|
vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
|
|
|
|
&cookie);
|
|
|
|
KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
|
|
|
|
vm_name(vm), gpa));
|
|
|
|
|
|
|
|
vm_gpa_release(cookie);
|
|
|
|
|
|
|
|
hpa = DMAP_TO_PHYS((uintptr_t)vp);
|
|
|
|
if (map) {
|
|
|
|
iommu_create_mapping(vm->iommu, gpa, hpa, sz);
|
|
|
|
iommu_remove_mapping(host_domain, hpa, sz);
|
|
|
|
} else {
|
|
|
|
iommu_remove_mapping(vm->iommu, gpa, sz);
|
|
|
|
iommu_create_mapping(host_domain, hpa, hpa, sz);
|
|
|
|
}
|
|
|
|
|
|
|
|
gpa += PAGE_SIZE;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
/*
|
|
|
|
* Invalidate the cached translations associated with the domain
|
|
|
|
* from which pages were removed.
|
|
|
|
*/
|
|
|
|
if (map)
|
|
|
|
iommu_invalidate_tlb(host_domain);
|
|
|
|
else
|
|
|
|
iommu_invalidate_tlb(vm->iommu);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE)
|
|
|
|
#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE)
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = ppt_unassign_device(vm, bus, slot, func);
|
|
|
|
if (error)
|
2012-10-04 02:27:14 +00:00
|
|
|
return (error);
|
2013-10-05 21:22:35 +00:00
|
|
|
|
2014-06-04 17:57:48 +00:00
|
|
|
if (ppt_assigned_devices(vm) == 0) {
|
2013-10-05 21:22:35 +00:00
|
|
|
vm_iommu_unmap(vm);
|
|
|
|
vm_gpa_unwire(vm);
|
2012-10-04 02:27:14 +00:00
|
|
|
}
|
2013-10-05 21:22:35 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
vm_paddr_t maxaddr;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-10-08 23:41:26 +00:00
|
|
|
/*
|
2013-10-05 21:22:35 +00:00
|
|
|
* Virtual machines with pci passthru devices get special treatment:
|
|
|
|
* - the guest physical memory is wired
|
|
|
|
* - the iommu is programmed to do the 'gpa' to 'hpa' translation
|
|
|
|
*
|
|
|
|
* We need to do this before the first pci passthru device is attached.
|
2012-10-08 23:41:26 +00:00
|
|
|
*/
|
2014-06-04 17:57:48 +00:00
|
|
|
if (ppt_assigned_devices(vm) == 0) {
|
2013-10-05 21:22:35 +00:00
|
|
|
KASSERT(vm->iommu == NULL,
|
|
|
|
("vm_assign_pptdev: iommu must be NULL"));
|
|
|
|
maxaddr = vmm_mem_maxaddr();
|
|
|
|
vm->iommu = iommu_create_domain(maxaddr);
|
2012-10-08 23:41:26 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
error = vm_gpa_wire(vm);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2012-09-29 01:15:45 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
vm_iommu_map(vm);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = ppt_assign_device(vm, bus, slot, func);
|
|
|
|
return (error);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
void *
|
|
|
|
vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
|
|
|
|
void **cookie)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2013-10-05 21:22:35 +00:00
|
|
|
int count, pageoff;
|
|
|
|
vm_page_t m;
|
|
|
|
|
|
|
|
pageoff = gpa & PAGE_MASK;
|
|
|
|
if (len > PAGE_SIZE - pageoff)
|
|
|
|
panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
|
2012-10-03 01:18:51 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
|
|
|
|
trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if (count == 1) {
|
|
|
|
*cookie = m;
|
|
|
|
return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
|
|
|
|
} else {
|
|
|
|
*cookie = NULL;
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_gpa_release(void *cookie)
|
|
|
|
{
|
|
|
|
vm_page_t m = cookie;
|
|
|
|
|
|
|
|
vm_page_lock(m);
|
|
|
|
vm_page_unhold(m);
|
|
|
|
vm_page_unlock(m);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
|
|
|
struct vm_memory_segment *seg)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
if (gpabase == vm->mem_segs[i].gpa) {
|
2013-10-05 21:22:35 +00:00
|
|
|
seg->gpa = vm->mem_segs[i].gpa;
|
|
|
|
seg->len = vm->mem_segs[i].len;
|
|
|
|
seg->wired = vm->mem_segs[i].wired;
|
2011-05-13 04:54:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
int
|
|
|
|
vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
|
|
|
|
vm_offset_t *offset, struct vm_object **object)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
size_t seg_len;
|
|
|
|
vm_paddr_t seg_gpa;
|
|
|
|
vm_object_t seg_obj;
|
|
|
|
|
|
|
|
for (i = 0; i < vm->num_mem_segs; i++) {
|
|
|
|
if ((seg_obj = vm->mem_segs[i].object) == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
seg_gpa = vm->mem_segs[i].gpa;
|
|
|
|
seg_len = vm->mem_segs[i].len;
|
|
|
|
|
|
|
|
if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
|
|
|
|
*offset = gpa - seg_gpa;
|
|
|
|
*object = seg_obj;
|
|
|
|
vm_object_reference(seg_obj);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
int
|
|
|
|
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (reg >= VM_REG_LAST)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMGETREG(vm->cookie, vcpu, reg, retval));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (reg >= VM_REG_LAST)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMSETREG(vm->cookie, vcpu, reg, val));
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean_t
|
|
|
|
is_descriptor_table(int reg)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (reg) {
|
|
|
|
case VM_REG_GUEST_IDTR:
|
|
|
|
case VM_REG_GUEST_GDTR:
|
|
|
|
return (TRUE);
|
|
|
|
default:
|
|
|
|
return (FALSE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean_t
|
|
|
|
is_segment_register(int reg)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (reg) {
|
|
|
|
case VM_REG_GUEST_ES:
|
|
|
|
case VM_REG_GUEST_CS:
|
|
|
|
case VM_REG_GUEST_SS:
|
|
|
|
case VM_REG_GUEST_DS:
|
|
|
|
case VM_REG_GUEST_FS:
|
|
|
|
case VM_REG_GUEST_GS:
|
|
|
|
case VM_REG_GUEST_TR:
|
|
|
|
case VM_REG_GUEST_LDTR:
|
|
|
|
return (TRUE);
|
|
|
|
default:
|
|
|
|
return (FALSE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
|
|
|
struct seg_desc *desc)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (!is_segment_register(reg) && !is_descriptor_table(reg))
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
|
|
|
struct seg_desc *desc)
|
|
|
|
{
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (!is_segment_register(reg) && !is_descriptor_table(reg))
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
restore_guest_fpustate(struct vcpu *vcpu)
|
|
|
|
{
|
|
|
|
|
2012-04-26 07:52:28 +00:00
|
|
|
/* flush host state to the pcb */
|
|
|
|
fpuexit(curthread);
|
2012-10-26 03:12:40 +00:00
|
|
|
|
|
|
|
/* restore guest FPU state */
|
2011-05-13 04:54:01 +00:00
|
|
|
fpu_stop_emulating();
|
2012-04-26 07:52:28 +00:00
|
|
|
fpurestore(vcpu->guestfpu);
|
2012-10-26 03:12:40 +00:00
|
|
|
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
/* restore guest XCR0 if XSAVE is enabled in the host */
|
|
|
|
if (rcr4() & CR4_XSAVE)
|
|
|
|
load_xcr(0, vcpu->guest_xcr0);
|
|
|
|
|
2012-10-26 03:12:40 +00:00
|
|
|
/*
|
|
|
|
* The FPU is now "dirty" with the guest's state so turn on emulation
|
|
|
|
* to trap any access to the FPU by the host.
|
|
|
|
*/
|
|
|
|
fpu_start_emulating();
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
save_guest_fpustate(struct vcpu *vcpu)
|
|
|
|
{
|
|
|
|
|
2012-10-26 03:12:40 +00:00
|
|
|
if ((rcr0() & CR0_TS) == 0)
|
|
|
|
panic("fpu emulation not enabled in host!");
|
|
|
|
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
/* save guest XCR0 and restore host XCR0 */
|
|
|
|
if (rcr4() & CR4_XSAVE) {
|
|
|
|
vcpu->guest_xcr0 = rxcr(0);
|
|
|
|
load_xcr(0, vmm_get_host_xcr0());
|
|
|
|
}
|
|
|
|
|
2012-10-26 03:12:40 +00:00
|
|
|
/* save guest FPU state */
|
|
|
|
fpu_stop_emulating();
|
2012-04-26 07:52:28 +00:00
|
|
|
fpusave(vcpu->guestfpu);
|
2011-05-13 04:54:01 +00:00
|
|
|
fpu_start_emulating();
|
|
|
|
}
|
|
|
|
|
2013-03-16 22:40:20 +00:00
|
|
|
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
|
2012-10-25 04:29:21 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
static int
|
2014-05-18 04:33:24 +00:00
|
|
|
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
|
|
|
|
bool from_idle)
|
2013-10-05 21:22:35 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
vcpu_assert_locked(vcpu);
|
|
|
|
|
2014-05-18 04:33:24 +00:00
|
|
|
/*
|
|
|
|
* State transitions from the vmmdev_ioctl() must always begin from
|
|
|
|
* the VCPU_IDLE state. This guarantees that there is only a single
|
|
|
|
* ioctl() operating on a vcpu at any point.
|
|
|
|
*/
|
|
|
|
if (from_idle) {
|
|
|
|
while (vcpu->state != VCPU_IDLE)
|
|
|
|
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
|
|
|
|
} else {
|
|
|
|
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
|
|
|
|
"vcpu idle state"));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vcpu->state == VCPU_RUNNING) {
|
|
|
|
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
|
|
|
|
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
|
|
|
|
} else {
|
|
|
|
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
|
|
|
|
"vcpu that is not running", vcpu->hostcpu));
|
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
/*
|
|
|
|
* The following state transitions are allowed:
|
|
|
|
* IDLE -> FROZEN -> IDLE
|
|
|
|
* FROZEN -> RUNNING -> FROZEN
|
|
|
|
* FROZEN -> SLEEPING -> FROZEN
|
|
|
|
*/
|
|
|
|
switch (vcpu->state) {
|
|
|
|
case VCPU_IDLE:
|
|
|
|
case VCPU_RUNNING:
|
|
|
|
case VCPU_SLEEPING:
|
|
|
|
error = (newstate != VCPU_FROZEN);
|
|
|
|
break;
|
|
|
|
case VCPU_FROZEN:
|
|
|
|
error = (newstate == VCPU_FROZEN);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-05-18 04:33:24 +00:00
|
|
|
if (error)
|
|
|
|
return (EBUSY);
|
|
|
|
|
|
|
|
vcpu->state = newstate;
|
|
|
|
if (newstate == VCPU_RUNNING)
|
|
|
|
vcpu->hostcpu = curcpu;
|
2013-10-05 21:22:35 +00:00
|
|
|
else
|
2014-05-18 04:33:24 +00:00
|
|
|
vcpu->hostcpu = NOCPU;
|
2013-10-05 21:22:35 +00:00
|
|
|
|
2014-05-18 04:33:24 +00:00
|
|
|
if (newstate == VCPU_IDLE)
|
|
|
|
wakeup(&vcpu->state);
|
|
|
|
|
|
|
|
return (0);
|
2013-10-05 21:22:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
2014-05-18 04:33:24 +00:00
|
|
|
if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
|
2013-10-05 21:22:35 +00:00
|
|
|
panic("Error %d setting state to %d\n", error, newstate);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
2014-05-18 04:33:24 +00:00
|
|
|
if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
|
2013-10-05 21:22:35 +00:00
|
|
|
panic("Error %d setting state to %d", error, newstate);
|
|
|
|
}
|
|
|
|
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
static void
|
|
|
|
vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update 'rendezvous_func' and execute a write memory barrier to
|
|
|
|
* ensure that it is visible across all host cpus. This is not needed
|
|
|
|
* for correctness but it does ensure that all the vcpus will notice
|
|
|
|
* that the rendezvous is requested immediately.
|
|
|
|
*/
|
|
|
|
vm->rendezvous_func = func;
|
|
|
|
wmb();
|
|
|
|
}
|
|
|
|
|
|
|
|
#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \
|
|
|
|
do { \
|
|
|
|
if (vcpuid >= 0) \
|
|
|
|
VCPU_CTR0(vm, vcpuid, fmt); \
|
|
|
|
else \
|
|
|
|
VM_CTR0(vm, fmt); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_handle_rendezvous(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
|
|
|
|
("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
|
|
|
|
|
|
|
|
mtx_lock(&vm->rendezvous_mtx);
|
|
|
|
while (vm->rendezvous_func != NULL) {
|
|
|
|
/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
|
|
|
|
CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
|
|
|
|
|
|
|
|
if (vcpuid != -1 &&
|
|
|
|
CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
|
|
|
|
!CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
|
|
|
|
VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
|
|
|
|
(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
|
|
|
|
CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
|
|
|
|
}
|
|
|
|
if (CPU_CMP(&vm->rendezvous_req_cpus,
|
|
|
|
&vm->rendezvous_done_cpus) == 0) {
|
|
|
|
VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
|
|
|
|
vm_set_rendezvous_func(vm, NULL);
|
|
|
|
wakeup(&vm->rendezvous_func);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
|
|
|
|
mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
|
|
|
|
"vmrndv", 0);
|
|
|
|
}
|
|
|
|
mtx_unlock(&vm->rendezvous_mtx);
|
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
/*
|
|
|
|
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
|
|
|
|
*/
|
|
|
|
static int
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
|
2013-10-05 21:22:35 +00:00
|
|
|
{
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
struct vm_exit *vmexit;
|
2013-10-05 21:22:35 +00:00
|
|
|
struct vcpu *vcpu;
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
int t, timo, spindown;
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
spindown = 0;
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
vcpu_lock(vcpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a final check for pending NMI or interrupts before
|
|
|
|
* really putting this thread to sleep.
|
|
|
|
*
|
|
|
|
* These interrupts could have happened any time after we
|
|
|
|
* returned from VMRUN() and before we grabbed the vcpu lock.
|
|
|
|
*/
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
if (!vm_nmi_pending(vm, vcpuid) &&
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
(intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
|
2013-10-05 21:22:35 +00:00
|
|
|
t = ticks;
|
|
|
|
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
if (vlapic_enabled(vcpu->vlapic)) {
|
|
|
|
/*
|
|
|
|
* XXX msleep_spin() is not interruptible so use the
|
|
|
|
* 'timo' to put an upper bound on the sleep time.
|
|
|
|
*/
|
|
|
|
timo = hz;
|
|
|
|
msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Spindown the vcpu if the apic is disabled and it
|
|
|
|
* had entered the halted state.
|
|
|
|
*/
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
spindown = 1;
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
}
|
2013-10-05 21:22:35 +00:00
|
|
|
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
|
|
|
|
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
|
|
|
|
}
|
|
|
|
vcpu_unlock(vcpu);
|
|
|
|
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
/*
|
|
|
|
* Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it
|
|
|
|
* outside the confines of the vcpu spinlock.
|
|
|
|
*/
|
|
|
|
if (spindown) {
|
|
|
|
*retu = true;
|
|
|
|
vmexit = vm_exitinfo(vm, vcpuid);
|
|
|
|
vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
|
|
|
|
vm_deactivate_cpu(vm, vcpuid);
|
|
|
|
VCPU_CTR0(vm, vcpuid, "spinning down cpu");
|
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
|
2013-10-05 21:22:35 +00:00
|
|
|
{
|
|
|
|
int rv, ftype;
|
|
|
|
struct vm_map *map;
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
struct vm_exit *vme;
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
vme = &vcpu->exitinfo;
|
|
|
|
|
|
|
|
ftype = vme->u.paging.fault_type;
|
|
|
|
KASSERT(ftype == VM_PROT_READ ||
|
|
|
|
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
|
|
|
|
("vm_handle_paging: invalid fault_type %d", ftype));
|
|
|
|
|
|
|
|
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
|
|
|
|
rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
|
|
|
|
vme->u.paging.gpa, ftype);
|
|
|
|
if (rv == 0)
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
map = &vm->vmspace->vm_map;
|
|
|
|
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
|
|
|
|
|
2014-01-23 20:21:39 +00:00
|
|
|
VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
|
|
|
|
"ftype = %d", rv, vme->u.paging.gpa, ftype);
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
if (rv != KERN_SUCCESS)
|
|
|
|
return (EFAULT);
|
|
|
|
done:
|
|
|
|
/* restart execution at the faulting instruction */
|
|
|
|
vme->inst_length = 0;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
|
2013-10-05 21:22:35 +00:00
|
|
|
{
|
|
|
|
struct vie *vie;
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
struct vm_exit *vme;
|
|
|
|
int error, inst_length;
|
|
|
|
uint64_t rip, gla, gpa, cr3;
|
2014-06-12 15:20:59 +00:00
|
|
|
enum vie_cpu_mode cpu_mode;
|
|
|
|
enum vie_paging_mode paging_mode;
|
2014-01-23 20:21:39 +00:00
|
|
|
mem_region_read_t mread;
|
|
|
|
mem_region_write_t mwrite;
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
vme = &vcpu->exitinfo;
|
|
|
|
|
|
|
|
rip = vme->rip;
|
|
|
|
inst_length = vme->inst_length;
|
|
|
|
|
|
|
|
gla = vme->u.inst_emul.gla;
|
|
|
|
gpa = vme->u.inst_emul.gpa;
|
|
|
|
cr3 = vme->u.inst_emul.cr3;
|
2014-06-12 15:20:59 +00:00
|
|
|
cpu_mode = vme->u.inst_emul.cpu_mode;
|
|
|
|
paging_mode = vme->u.inst_emul.paging_mode;
|
2013-10-05 21:22:35 +00:00
|
|
|
vie = &vme->u.inst_emul.vie;
|
|
|
|
|
|
|
|
vie_init(vie);
|
|
|
|
|
|
|
|
/* Fetch, decode and emulate the faulting instruction */
|
2014-06-12 15:20:59 +00:00
|
|
|
if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
|
|
|
|
paging_mode, vie) != 0)
|
2013-10-05 21:22:35 +00:00
|
|
|
return (EFAULT);
|
|
|
|
|
2014-06-12 15:20:59 +00:00
|
|
|
if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
|
2013-10-05 21:22:35 +00:00
|
|
|
return (EFAULT);
|
|
|
|
|
2014-01-23 20:21:39 +00:00
|
|
|
/* return to userland unless this is an in-kernel emulated device */
|
|
|
|
if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
|
|
|
|
mread = lapic_mmio_read;
|
|
|
|
mwrite = lapic_mmio_write;
|
|
|
|
} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
|
|
|
|
mread = vioapic_mmio_read;
|
|
|
|
mwrite = vioapic_mmio_write;
|
|
|
|
} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
|
|
|
|
mread = vhpet_mmio_read;
|
|
|
|
mwrite = vhpet_mmio_write;
|
|
|
|
} else {
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
*retu = true;
|
2013-10-05 21:22:35 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
|
|
|
|
retu);
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
int
|
|
|
|
vm_run(struct vm *vm, struct vm_run *vmrun)
|
|
|
|
{
|
2013-10-05 21:22:35 +00:00
|
|
|
int error, vcpuid;
|
2011-05-13 04:54:01 +00:00
|
|
|
struct vcpu *vcpu;
|
|
|
|
struct pcb *pcb;
|
2012-10-25 04:29:21 +00:00
|
|
|
uint64_t tscval, rip;
|
|
|
|
struct vm_exit *vme;
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
bool retu, intr_disabled;
|
2013-10-05 21:22:35 +00:00
|
|
|
pmap_t pmap;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
vcpuid = vmrun->cpuid;
|
|
|
|
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
pmap = vmspace_pmap(vm->vmspace);
|
2011-05-13 04:54:01 +00:00
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
2013-10-05 21:22:35 +00:00
|
|
|
vme = &vcpu->exitinfo;
|
2012-10-25 04:29:21 +00:00
|
|
|
rip = vmrun->rip;
|
|
|
|
restart:
|
2011-05-13 04:54:01 +00:00
|
|
|
critical_enter();
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
|
|
|
|
("vm_run: absurd pm_active"));
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
tscval = rdtsc();
|
|
|
|
|
|
|
|
pcb = PCPU_GET(curpcb);
|
2011-05-14 20:35:01 +00:00
|
|
|
set_pcb_flags(pcb, PCB_FULL_IRET);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-04-26 07:52:28 +00:00
|
|
|
restore_guest_msrs(vm, vcpuid);
|
2011-05-13 04:54:01 +00:00
|
|
|
restore_guest_fpustate(vcpu);
|
2012-10-12 18:32:44 +00:00
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
|
2013-10-05 21:22:35 +00:00
|
|
|
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
|
2012-10-12 18:32:44 +00:00
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
save_guest_fpustate(vcpu);
|
|
|
|
restore_host_msrs(vm, vcpuid);
|
|
|
|
|
|
|
|
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
|
|
|
|
|
|
|
|
critical_exit();
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
if (error == 0) {
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
retu = false;
|
2013-10-05 21:22:35 +00:00
|
|
|
switch (vme->exitcode) {
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
case VM_EXITCODE_IOAPIC_EOI:
|
|
|
|
vioapic_process_eoi(vm, vcpuid,
|
|
|
|
vme->u.ioapic_eoi.vector);
|
|
|
|
break;
|
|
|
|
case VM_EXITCODE_RENDEZVOUS:
|
|
|
|
vm_handle_rendezvous(vm, vcpuid);
|
|
|
|
error = 0;
|
|
|
|
break;
|
2013-10-05 21:22:35 +00:00
|
|
|
case VM_EXITCODE_HLT:
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
|
|
|
|
error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
|
2013-10-05 21:22:35 +00:00
|
|
|
break;
|
|
|
|
case VM_EXITCODE_PAGING:
|
|
|
|
error = vm_handle_paging(vm, vcpuid, &retu);
|
|
|
|
break;
|
|
|
|
case VM_EXITCODE_INST_EMUL:
|
|
|
|
error = vm_handle_inst_emul(vm, vcpuid, &retu);
|
|
|
|
break;
|
|
|
|
default:
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
retu = true; /* handled in userland */
|
2013-10-05 21:22:35 +00:00
|
|
|
break;
|
2012-10-25 04:29:21 +00:00
|
|
|
}
|
2013-10-05 21:22:35 +00:00
|
|
|
}
|
2012-10-25 04:29:21 +00:00
|
|
|
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
if (error == 0 && retu == false) {
|
2012-10-25 04:29:21 +00:00
|
|
|
rip = vme->rip + vme->inst_length;
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
|
2013-10-05 21:22:35 +00:00
|
|
|
/* copy the exit information */
|
|
|
|
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
|
2011-05-13 04:54:01 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
if (exception->vector < 0 || exception->vector >= 32)
|
2011-05-13 04:54:01 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
|
|
|
if (vcpu->exception_pending) {
|
|
|
|
VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
|
|
|
|
"pending exception %d", exception->vector,
|
|
|
|
vcpu->exception.vector);
|
|
|
|
return (EBUSY);
|
|
|
|
}
|
|
|
|
|
|
|
|
vcpu->exception_pending = 1;
|
|
|
|
vcpu->exception = *exception;
|
|
|
|
VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
|
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
int pending;
|
|
|
|
|
|
|
|
KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
pending = vcpu->exception_pending;
|
|
|
|
if (pending) {
|
|
|
|
vcpu->exception_pending = 0;
|
|
|
|
*exception = vcpu->exception;
|
|
|
|
VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
|
|
|
|
exception->vector);
|
|
|
|
}
|
|
|
|
return (pending);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
|
|
|
|
{
|
|
|
|
struct vm_exit *vmexit;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = vm_inject_exception(vm, vcpuid, exception);
|
|
|
|
KASSERT(error == 0, ("vm_inject_exception error %d", error));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A fault-like exception allows the instruction to be restarted
|
|
|
|
* after the exception handler returns.
|
|
|
|
*
|
|
|
|
* By setting the inst_length to 0 we ensure that the instruction
|
|
|
|
* pointer remains at the faulting instruction.
|
|
|
|
*/
|
|
|
|
vmexit = vm_exitinfo(vm, vcpuid);
|
|
|
|
vmexit->inst_length = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_inject_gp(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
struct vm_exception gpf = {
|
|
|
|
.vector = IDT_GP,
|
|
|
|
.error_code_valid = 1,
|
|
|
|
.error_code = 0
|
|
|
|
};
|
|
|
|
|
|
|
|
vm_inject_fault(vm, vcpuid, &gpf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_inject_ud(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
struct vm_exception udf = {
|
|
|
|
.vector = IDT_UD,
|
|
|
|
.error_code_valid = 0
|
|
|
|
};
|
2011-05-13 04:54:01 +00:00
|
|
|
|
MFC 261638,262144,262506,266765:
Add virtualized XSAVE support to bhyve which permits guests to use XSAVE and
XSAVE-enabled features like AVX.
- Store a per-cpu guest xcr0 register and handle xsetbv VM exits by emulating
the instruction.
- Only expose XSAVE to guests if XSAVE is enabled in the host. Only expose
a subset of XSAVE features currently supported by the guest and for which
the proper emulation of xsetbv is known. Currently this includes X87, SSE,
AVX, AVX-512, and Intel MPX.
- Add support for injecting hardware exceptions into the guest and use this
to trigger exceptions in the guest for invalid xsetbv operations instead
of potentially faulting in the host.
- Queue pending exceptions in the 'struct vcpu' instead of directly updating
the processor-specific VMCS or VMCB. The pending exception will be delivered
right before entering the guest.
- Rename the unused ioctl VM_INJECT_EVENT to VM_INJECT_EXCEPTION and restrict
it to only deliver x86 hardware exceptions. This new ioctl is now used to
inject a protection fault when the guest accesses an unimplemented MSR.
- Expose a subset of known-safe features from leaf 0 of the structured
extended features to guests if they are supported on the host including
RDFSBASE/RDGSBASE, BMI1/2, AVX2, AVX-512, HLE, ERMS, and RTM. Aside
from AVX-512, these features are all new instructions available for use
in ring 3 with no additional hypervisor changes needed.
2014-06-12 19:58:12 +00:00
|
|
|
vm_inject_fault(vm, vcpuid, &udf);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2013-03-16 22:40:20 +00:00
|
|
|
static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
|
2012-10-24 02:54:21 +00:00
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
int
|
2012-10-24 02:54:21 +00:00
|
|
|
vm_inject_nmi(struct vm *vm, int vcpuid)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2012-10-24 02:54:21 +00:00
|
|
|
struct vcpu *vcpu;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-10-24 02:54:21 +00:00
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
2011-05-13 04:54:01 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
2012-10-24 02:54:21 +00:00
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
|
|
|
vcpu->nmi_pending = 1;
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vcpu_notify_event(vm, vcpuid, false);
|
2012-10-24 02:54:21 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_nmi_pending(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
|
|
|
return (vcpu->nmi_pending);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_nmi_clear(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
|
|
|
if (vcpu->nmi_pending == 0)
|
|
|
|
panic("vm_nmi_clear: inconsistent nmi_pending state");
|
|
|
|
|
|
|
|
vcpu->nmi_pending = 0;
|
|
|
|
vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
|
|
|
|
{
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (type < 0 || type >= VM_CAP_MAX)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMGETCAP(vm->cookie, vcpu, type, retval));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_set_capability(struct vm *vm, int vcpu, int type, int val)
|
|
|
|
{
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (type < 0 || type >= VM_CAP_MAX)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (VMSETCAP(vm->cookie, vcpu, type, val));
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t *
|
|
|
|
vm_guest_msrs(struct vm *vm, int cpu)
|
|
|
|
{
|
|
|
|
return (vm->vcpu[cpu].guest_msrs);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct vlapic *
|
|
|
|
vm_lapic(struct vm *vm, int cpu)
|
|
|
|
{
|
|
|
|
return (vm->vcpu[cpu].vlapic);
|
|
|
|
}
|
|
|
|
|
2014-01-23 20:21:39 +00:00
|
|
|
struct vioapic *
|
|
|
|
vm_ioapic(struct vm *vm)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->vioapic);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct vhpet *
|
|
|
|
vm_hpet(struct vm *vm)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->vhpet);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
boolean_t
|
|
|
|
vmm_is_pptdev(int bus, int slot, int func)
|
|
|
|
{
|
2013-02-01 01:16:26 +00:00
|
|
|
int found, i, n;
|
|
|
|
int b, s, f;
|
2011-05-13 04:54:01 +00:00
|
|
|
char *val, *cp, *cp2;
|
|
|
|
|
|
|
|
/*
|
2013-02-01 01:16:26 +00:00
|
|
|
* XXX
|
|
|
|
* The length of an environment variable is limited to 128 bytes which
|
|
|
|
* puts an upper limit on the number of passthru devices that may be
|
|
|
|
* specified using a single environment variable.
|
|
|
|
*
|
|
|
|
* Work around this by scanning multiple environment variable
|
|
|
|
* names instead of a single one - yuck!
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2013-02-01 01:16:26 +00:00
|
|
|
const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
|
|
|
|
|
|
|
|
/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
|
2011-05-13 04:54:01 +00:00
|
|
|
found = 0;
|
2013-02-01 01:16:26 +00:00
|
|
|
for (i = 0; names[i] != NULL && !found; i++) {
|
|
|
|
cp = val = getenv(names[i]);
|
|
|
|
while (cp != NULL && *cp != '\0') {
|
|
|
|
if ((cp2 = strchr(cp, ' ')) != NULL)
|
|
|
|
*cp2 = '\0';
|
|
|
|
|
|
|
|
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
|
|
|
|
if (n == 3 && bus == b && slot == s && func == f) {
|
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-02-01 01:16:26 +00:00
|
|
|
if (cp2 != NULL)
|
|
|
|
*cp2++ = ' ';
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-02-01 01:16:26 +00:00
|
|
|
cp = cp2;
|
|
|
|
}
|
|
|
|
freeenv(val);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
return (found);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
vm_iommu_domain(struct vm *vm)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->iommu);
|
|
|
|
}
|
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
int
|
2014-05-18 04:33:24 +00:00
|
|
|
vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
|
|
|
|
bool from_idle)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2012-10-12 18:32:44 +00:00
|
|
|
int error;
|
2011-05-13 04:54:01 +00:00
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
vcpu_lock(vcpu);
|
2014-05-18 04:33:24 +00:00
|
|
|
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
|
2012-10-12 18:32:44 +00:00
|
|
|
vcpu_unlock(vcpu);
|
|
|
|
|
|
|
|
return (error);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
enum vcpu_state
|
2013-04-25 04:56:43 +00:00
|
|
|
vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
struct vcpu *vcpu;
|
2012-10-12 18:32:44 +00:00
|
|
|
enum vcpu_state state;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
vcpu_lock(vcpu);
|
|
|
|
state = vcpu->state;
|
2013-04-25 04:56:43 +00:00
|
|
|
if (hostcpu != NULL)
|
|
|
|
*hostcpu = vcpu->hostcpu;
|
2012-10-12 18:32:44 +00:00
|
|
|
vcpu_unlock(vcpu);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-10-12 18:32:44 +00:00
|
|
|
return (state);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_activate_cpu(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
|
|
|
|
("vm_activate_cpu: invalid vcpuid %d", vcpuid));
|
|
|
|
KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus),
|
|
|
|
("vm_activate_cpu: vcpuid %d is already active", vcpuid));
|
|
|
|
|
|
|
|
VCPU_CTR0(vm, vcpuid, "activated");
|
|
|
|
CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_deactivate_cpu(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
|
|
|
|
("vm_deactivate_cpu: invalid vcpuid %d", vcpuid));
|
|
|
|
KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus),
|
|
|
|
("vm_deactivate_cpu: vcpuid %d is not active", vcpuid));
|
|
|
|
|
|
|
|
VCPU_CTR0(vm, vcpuid, "deactivated");
|
|
|
|
CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If a vcpu rendezvous is in progress then it could be blocked
|
|
|
|
* on 'vcpuid' - unblock it before disappearing forever.
|
|
|
|
*/
|
|
|
|
mtx_lock(&vm->rendezvous_mtx);
|
|
|
|
if (vm->rendezvous_func != NULL) {
|
|
|
|
VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation");
|
|
|
|
wakeup(&vm->rendezvous_func);
|
|
|
|
}
|
|
|
|
mtx_unlock(&vm->rendezvous_mtx);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 06:26:03 +00:00
|
|
|
cpuset_t
|
2011-05-13 04:54:01 +00:00
|
|
|
vm_active_cpus(struct vm *vm)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->active_cpus);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
vcpu_stats(struct vm *vm, int vcpuid)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->vcpu[vcpuid].stats);
|
|
|
|
}
|
2012-09-25 19:08:51 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
|
|
|
|
{
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
*state = vm->vcpu[vcpuid].x2apic_state;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
|
|
|
|
{
|
|
|
|
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2013-03-16 22:53:05 +00:00
|
|
|
if (state >= X2APIC_STATE_LAST)
|
2012-09-25 19:08:51 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
vm->vcpu[vcpuid].x2apic_state = state;
|
|
|
|
|
2012-09-26 00:06:17 +00:00
|
|
|
vlapic_set_x2apic_state(vm, vcpuid, state);
|
|
|
|
|
2012-09-25 19:08:51 +00:00
|
|
|
return (0);
|
|
|
|
}
|
2012-10-12 18:32:44 +00:00
|
|
|
|
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
2014-02-23 00:46:05 +00:00
|
|
|
/*
|
|
|
|
* This function is called to ensure that a vcpu "sees" a pending event
|
|
|
|
* as soon as possible:
|
|
|
|
* - If the vcpu thread is sleeping then it is woken up.
|
|
|
|
* - If the vcpu is running on a different host_cpu then an IPI will be directed
|
|
|
|
* to the host_cpu to cause the vcpu to trap into the hypervisor.
|
|
|
|
*/
|
2012-10-12 18:32:44 +00:00
|
|
|
void
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
|
2012-10-12 18:32:44 +00:00
|
|
|
{
|
|
|
|
int hostcpu;
|
|
|
|
struct vcpu *vcpu;
|
|
|
|
|
|
|
|
vcpu = &vm->vcpu[vcpuid];
|
|
|
|
|
2012-10-25 04:29:21 +00:00
|
|
|
vcpu_lock(vcpu);
|
2012-10-12 18:32:44 +00:00
|
|
|
hostcpu = vcpu->hostcpu;
|
2014-05-18 04:33:24 +00:00
|
|
|
if (vcpu->state == VCPU_RUNNING) {
|
|
|
|
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
if (hostcpu != curcpu) {
|
2014-05-18 04:33:24 +00:00
|
|
|
if (lapic_intr) {
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
vlapic_post_intr(vcpu->vlapic, hostcpu,
|
|
|
|
vmm_ipinum);
|
2014-05-18 04:33:24 +00:00
|
|
|
} else {
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
ipi_cpu(hostcpu, vmm_ipinum);
|
2014-05-18 04:33:24 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If the 'vcpu' is running on 'curcpu' then it must
|
|
|
|
* be sending a notification to itself (e.g. SELF_IPI).
|
|
|
|
* The pending event will be picked up when the vcpu
|
|
|
|
* transitions back to guest context.
|
|
|
|
*/
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
}
|
2014-05-18 04:33:24 +00:00
|
|
|
} else {
|
|
|
|
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
|
|
|
|
"with hostcpu %d", vcpu->state, hostcpu));
|
|
|
|
if (vcpu->state == VCPU_SLEEPING)
|
|
|
|
wakeup_one(vcpu);
|
2012-10-25 04:29:21 +00:00
|
|
|
}
|
|
|
|
vcpu_unlock(vcpu);
|
2012-10-12 18:32:44 +00:00
|
|
|
}
|
2013-10-05 21:22:35 +00:00
|
|
|
|
|
|
|
struct vmspace *
|
|
|
|
vm_get_vmspace(struct vm *vm)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm->vmspace);
|
|
|
|
}
|
2014-01-23 20:21:39 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
vm_apicid2vcpuid(struct vm *vm, int apicid)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* XXX apic id is assumed to be numerically identical to vcpu id
|
|
|
|
*/
|
|
|
|
return (apicid);
|
|
|
|
}
|
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
2014-05-17 19:11:08 +00:00
|
|
|
|
|
|
|
void
|
|
|
|
vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
|
|
|
|
vm_rendezvous_func_t func, void *arg)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Enforce that this function is called without any locks
|
|
|
|
*/
|
|
|
|
WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
|
|
|
|
KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
|
|
|
|
("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
|
|
|
|
|
|
|
|
restart:
|
|
|
|
mtx_lock(&vm->rendezvous_mtx);
|
|
|
|
if (vm->rendezvous_func != NULL) {
|
|
|
|
/*
|
|
|
|
* If a rendezvous is already in progress then we need to
|
|
|
|
* call the rendezvous handler in case this 'vcpuid' is one
|
|
|
|
* of the targets of the rendezvous.
|
|
|
|
*/
|
|
|
|
RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
|
|
|
|
mtx_unlock(&vm->rendezvous_mtx);
|
|
|
|
vm_handle_rendezvous(vm, vcpuid);
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
|
|
|
|
"rendezvous is still in progress"));
|
|
|
|
|
|
|
|
RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
|
|
|
|
vm->rendezvous_req_cpus = dest;
|
|
|
|
CPU_ZERO(&vm->rendezvous_done_cpus);
|
|
|
|
vm->rendezvous_arg = arg;
|
|
|
|
vm_set_rendezvous_func(vm, func);
|
|
|
|
mtx_unlock(&vm->rendezvous_mtx);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wake up any sleeping vcpus and trigger a VM-exit in any running
|
|
|
|
* vcpus so they handle the rendezvous as soon as possible.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < VM_MAXCPU; i++) {
|
|
|
|
if (CPU_ISSET(i, &dest))
|
|
|
|
vcpu_notify_event(vm, i, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
vm_handle_rendezvous(vm, vcpuid);
|
|
|
|
}
|