freebsd-dev/sys/amd64/vmm/vmm.c
Neel Natu f76fc5d414 If the guest vcpu wants to idle then use that opportunity to relinquish the
host cpu to the scheduler until the guest is ready to run again.

This implies that the host cpu utilization will now closely mirror the actual
load imposed by the guest vcpu.

Also, the vcpu mutex now needs to be of type MTX_SPIN since we need to acquire
it inside a critical section.

Obtained from:	NetApp
2012-10-25 04:29:21 +00:00

1008 lines
21 KiB
C

/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <machine/vm.h>
#include <machine/pcb.h>
#include <machine/smp.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
#include "vmm_mem.h"
#include "vmm_util.h"
#include <machine/vmm_dev.h>
#include "vlapic.h"
#include "vmm_msr.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
#include "io/ppt.h"
#include "io/iommu.h"
struct vlapic;
struct vcpu {
int flags;
enum vcpu_state state;
struct mtx mtx;
int pincpu; /* host cpuid this vcpu is bound to */
int hostcpu; /* host cpuid this vcpu last ran on */
uint64_t guest_msrs[VMM_MSR_NUM];
struct vlapic *vlapic;
int vcpuid;
struct savefpu *guestfpu; /* guest fpu state */
void *stats;
struct vm_exit exitinfo;
enum x2apic_state x2apic_state;
int nmi_pending;
};
#define VCPU_F_PINNED 0x0001
#define VCPU_PINCPU(vm, vcpuid) \
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
#define VCPU_PIN(vm, vcpuid, host_cpuid) \
do { \
vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
vm->vcpu[vcpuid].pincpu = host_cpuid; \
} while(0)
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
#define VM_MAX_MEMORY_SEGMENTS 2
struct vm {
void *cookie; /* processor-specific data */
void *iommu; /* iommu-specific data */
struct vcpu vcpu[VM_MAXCPU];
int num_mem_segs;
struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
char name[VM_MAX_NAMELEN];
/*
* Set of active vcpus.
* An active vcpu is one that has been started implicitly (BSP) or
* explicitly (AP) by sending it a startup ipi.
*/
cpuset_t active_cpus;
};
static struct vmm_ops *ops;
#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
#define VMRUN(vmi, vcpu, rip) \
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \
(ops != NULL ? \
(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \
ENXIO)
#define VMMMAP_GET(vmi, gpa) \
(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
#define VMGETREG(vmi, vcpu, num, retval) \
(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
#define VMSETREG(vmi, vcpu, num, val) \
(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
#define VMGETDESC(vmi, vcpu, num, desc) \
(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
#define VMSETDESC(vmi, vcpu, num, desc) \
(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \
(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
#define VMGETCAP(vmi, vcpu, num, retval) \
(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
#define VMSETCAP(vmi, vcpu, num, val) \
(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
#define fpu_start_emulating() start_emulating()
#define fpu_stop_emulating() stop_emulating()
static MALLOC_DEFINE(M_VM, "vm", "vm");
CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
/* statistics */
static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
static void
vcpu_cleanup(struct vcpu *vcpu)
{
vlapic_cleanup(vcpu->vlapic);
vmm_stat_free(vcpu->stats);
fpu_save_area_free(vcpu->guestfpu);
}
static void
vcpu_init(struct vm *vm, uint32_t vcpu_id)
{
struct vcpu *vcpu;
vcpu = &vm->vcpu[vcpu_id];
vcpu_lock_init(vcpu);
vcpu->hostcpu = NOCPU;
vcpu->vcpuid = vcpu_id;
vcpu->vlapic = vlapic_init(vm, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
vcpu->guestfpu = fpu_save_area_alloc();
fpu_save_area_reset(vcpu->guestfpu);
vcpu->stats = vmm_stat_alloc();
}
struct vm_exit *
vm_exitinfo(struct vm *vm, int cpuid)
{
struct vcpu *vcpu;
if (cpuid < 0 || cpuid >= VM_MAXCPU)
panic("vm_exitinfo: invalid cpuid %d", cpuid);
vcpu = &vm->vcpu[cpuid];
return (&vcpu->exitinfo);
}
static int
vmm_init(void)
{
int error;
vmm_ipi_init();
error = vmm_mem_init();
if (error)
return (error);
if (vmm_is_intel())
ops = &vmm_ops_intel;
else if (vmm_is_amd())
ops = &vmm_ops_amd;
else
return (ENXIO);
vmm_msr_init();
return (VMM_INIT());
}
static int
vmm_handler(module_t mod, int what, void *arg)
{
int error;
switch (what) {
case MOD_LOAD:
vmmdev_init();
iommu_init();
error = vmm_init();
break;
case MOD_UNLOAD:
error = vmmdev_cleanup();
if (error == 0) {
iommu_cleanup();
vmm_ipi_cleanup();
error = VMM_CLEANUP();
}
break;
default:
error = 0;
break;
}
return (error);
}
static moduledata_t vmm_kmod = {
"vmm",
vmm_handler,
NULL
};
/*
* Execute the module load handler after the pci passthru driver has had
* a chance to claim devices. We need this information at the time we do
* iommu initialization.
*/
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
MODULE_VERSION(vmm, 1);
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
struct vm *
vm_create(const char *name)
{
int i;
struct vm *vm;
vm_paddr_t maxaddr;
const int BSP = 0;
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
return (NULL);
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->cookie = VMINIT(vm);
for (i = 0; i < VM_MAXCPU; i++) {
vcpu_init(vm, i);
guest_msrs_init(vm, i);
}
maxaddr = vmm_mem_maxaddr();
vm->iommu = iommu_create_domain(maxaddr);
vm_activate_cpu(vm, BSP);
return (vm);
}
static void
vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
{
size_t len;
vm_paddr_t hpa;
void *host_domain;
host_domain = iommu_host_domain();
len = 0;
while (len < seg->len) {
hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
if (hpa == (vm_paddr_t)-1) {
panic("vm_free_mem_segs: cannot free hpa "
"associated with gpa 0x%016lx", seg->gpa + len);
}
/*
* Remove the 'gpa' to 'hpa' mapping in VMs domain.
* And resurrect the 1:1 mapping for 'hpa' in 'host_domain'.
*/
iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE);
iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE);
vmm_mem_free(hpa, PAGE_SIZE);
len += PAGE_SIZE;
}
/*
* Invalidate cached translations associated with 'vm->iommu' since
* we have now moved some pages from it.
*/
iommu_invalidate_tlb(vm->iommu);
bzero(seg, sizeof(struct vm_memory_segment));
}
void
vm_destroy(struct vm *vm)
{
int i;
ppt_unassign_all(vm);
for (i = 0; i < vm->num_mem_segs; i++)
vm_free_mem_seg(vm, &vm->mem_segs[i]);
vm->num_mem_segs = 0;
for (i = 0; i < VM_MAXCPU; i++)
vcpu_cleanup(&vm->vcpu[i]);
iommu_destroy_domain(vm->iommu);
VMCLEANUP(vm->cookie);
free(vm, M_VM);
}
const char *
vm_name(struct vm *vm)
{
return (vm->name);
}
int
vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
const boolean_t spok = TRUE; /* superpage mappings are ok */
return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
VM_PROT_RW, spok));
}
int
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
{
const boolean_t spok = TRUE; /* superpage mappings are ok */
return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
VM_PROT_NONE, spok));
}
/*
* Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
*/
static boolean_t
vm_gpa_available(struct vm *vm, vm_paddr_t gpa)
{
int i;
vm_paddr_t gpabase, gpalimit;
if (gpa & PAGE_MASK)
panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
for (i = 0; i < vm->num_mem_segs; i++) {
gpabase = vm->mem_segs[i].gpa;
gpalimit = gpabase + vm->mem_segs[i].len;
if (gpa >= gpabase && gpa < gpalimit)
return (FALSE);
}
return (TRUE);
}
int
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
{
int error, available, allocated;
struct vm_memory_segment *seg;
vm_paddr_t g, hpa;
void *host_domain;
const boolean_t spok = TRUE; /* superpage mappings are ok */
if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
return (EINVAL);
available = allocated = 0;
g = gpa;
while (g < gpa + len) {
if (vm_gpa_available(vm, g))
available++;
else
allocated++;
g += PAGE_SIZE;
}
/*
* If there are some allocated and some available pages in the address
* range then it is an error.
*/
if (allocated && available)
return (EINVAL);
/*
* If the entire address range being requested has already been
* allocated then there isn't anything more to do.
*/
if (allocated && available == 0)
return (0);
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
return (E2BIG);
host_domain = iommu_host_domain();
seg = &vm->mem_segs[vm->num_mem_segs];
error = 0;
seg->gpa = gpa;
seg->len = 0;
while (seg->len < len) {
hpa = vmm_mem_alloc(PAGE_SIZE);
if (hpa == 0) {
error = ENOMEM;
break;
}
error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
if (error)
break;
/*
* Remove the 1:1 mapping for 'hpa' from the 'host_domain'.
* Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain.
*/
iommu_remove_mapping(host_domain, hpa, PAGE_SIZE);
iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
seg->len += PAGE_SIZE;
}
if (error) {
vm_free_mem_seg(vm, seg);
return (error);
}
/*
* Invalidate cached translations associated with 'host_domain' since
* we have now moved some pages from it.
*/
iommu_invalidate_tlb(host_domain);
vm->num_mem_segs++;
return (0);
}
vm_paddr_t
vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
{
vm_paddr_t nextpage;
nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
if (len > nextpage - gpa)
panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
return (VMMMAP_GET(vm->cookie, gpa));
}
int
vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
struct vm_memory_segment *seg)
{
int i;
for (i = 0; i < vm->num_mem_segs; i++) {
if (gpabase == vm->mem_segs[i].gpa) {
*seg = vm->mem_segs[i];
return (0);
}
}
return (-1);
}
int
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
return (VMGETREG(vm->cookie, vcpu, reg, retval));
}
int
vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
return (VMSETREG(vm->cookie, vcpu, reg, val));
}
static boolean_t
is_descriptor_table(int reg)
{
switch (reg) {
case VM_REG_GUEST_IDTR:
case VM_REG_GUEST_GDTR:
return (TRUE);
default:
return (FALSE);
}
}
static boolean_t
is_segment_register(int reg)
{
switch (reg) {
case VM_REG_GUEST_ES:
case VM_REG_GUEST_CS:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_TR:
case VM_REG_GUEST_LDTR:
return (TRUE);
default:
return (FALSE);
}
}
int
vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
return (EINVAL);
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
}
int
vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
return (EINVAL);
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
int
vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
*cpuid = VCPU_PINCPU(vm, vcpuid);
return (0);
}
int
vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
{
struct thread *td;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
td = curthread; /* XXXSMP only safe when muxing vcpus */
/* unpin */
if (host_cpuid < 0) {
VCPU_UNPIN(vm, vcpuid);
thread_lock(td);
sched_unbind(td);
thread_unlock(td);
return (0);
}
if (CPU_ABSENT(host_cpuid))
return (EINVAL);
/*
* XXX we should check that 'host_cpuid' has not already been pinned
* by another vm.
*/
thread_lock(td);
sched_bind(td, host_cpuid);
thread_unlock(td);
VCPU_PIN(vm, vcpuid, host_cpuid);
return (0);
}
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
/* flush host state to the pcb */
fpuexit(curthread);
fpu_stop_emulating();
fpurestore(vcpu->guestfpu);
}
static void
save_guest_fpustate(struct vcpu *vcpu)
{
fpusave(vcpu->guestfpu);
fpu_start_emulating();
}
static VMM_STAT_DEFINE(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
int
vm_run(struct vm *vm, struct vm_run *vmrun)
{
int error, vcpuid, sleepticks, t;
struct vcpu *vcpu;
struct pcb *pcb;
uint64_t tscval, rip;
struct vm_exit *vme;
vcpuid = vmrun->cpuid;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
vme = &vmrun->vm_exit;
rip = vmrun->rip;
restart:
critical_enter();
tscval = rdtsc();
pcb = PCPU_GET(curpcb);
set_pcb_flags(pcb, PCB_FULL_IRET);
restore_guest_msrs(vm, vcpuid);
restore_guest_fpustate(vcpu);
vcpu->hostcpu = curcpu;
error = VMRUN(vm->cookie, vcpuid, rip);
vcpu->hostcpu = NOCPU;
save_guest_fpustate(vcpu);
restore_host_msrs(vm, vcpuid);
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
/* copy the exit information */
bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit));
critical_exit();
/*
* Oblige the guest's desire to 'hlt' by sleeping until the vcpu
* is ready to run.
*/
if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) {
vcpu_lock(vcpu);
/*
* Figure out the number of host ticks until the next apic
* timer interrupt in the guest.
*/
sleepticks = lapic_timer_tick(vm, vcpuid);
/*
* If the guest local apic timer is disabled then sleep for
* a long time but not forever.
*/
if (sleepticks < 0)
sleepticks = hz;
/*
* Do a final check for pending NMI or interrupts before
* really putting this thread to sleep.
*
* These interrupts could have happened any time after we
* returned from VMRUN() and before we grabbed the vcpu lock.
*/
if (!vm_nmi_pending(vm, vcpuid) &&
lapic_pending_intr(vm, vcpuid) < 0) {
if (sleepticks <= 0)
panic("invalid sleepticks %d", sleepticks);
t = ticks;
msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
vcpu_unlock(vcpu);
rip = vme->rip + vme->inst_length;
goto restart;
}
return (error);
}
int
vm_inject_event(struct vm *vm, int vcpuid, int type,
int vector, uint32_t code, int code_valid)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
return (EINVAL);
if (vector < 0 || vector > 255)
return (EINVAL);
return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
}
static VMM_STAT_DEFINE(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
int
vm_inject_nmi(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
vcpu->nmi_pending = 1;
vm_interrupt_hostcpu(vm, vcpuid);
return (0);
}
int
vm_nmi_pending(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
return (vcpu->nmi_pending);
}
void
vm_nmi_clear(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
if (vcpu->nmi_pending == 0)
panic("vm_nmi_clear: inconsistent nmi_pending state");
vcpu->nmi_pending = 0;
vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
}
int
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (type < 0 || type >= VM_CAP_MAX)
return (EINVAL);
return (VMGETCAP(vm->cookie, vcpu, type, retval));
}
int
vm_set_capability(struct vm *vm, int vcpu, int type, int val)
{
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
if (type < 0 || type >= VM_CAP_MAX)
return (EINVAL);
return (VMSETCAP(vm->cookie, vcpu, type, val));
}
uint64_t *
vm_guest_msrs(struct vm *vm, int cpu)
{
return (vm->vcpu[cpu].guest_msrs);
}
struct vlapic *
vm_lapic(struct vm *vm, int cpu)
{
return (vm->vcpu[cpu].vlapic);
}
boolean_t
vmm_is_pptdev(int bus, int slot, int func)
{
int found, b, s, f, n;
char *val, *cp, *cp2;
/*
* setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
*/
found = 0;
cp = val = getenv("pptdevs");
while (cp != NULL && *cp != '\0') {
if ((cp2 = strchr(cp, ' ')) != NULL)
*cp2 = '\0';
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
if (n == 3 && bus == b && slot == s && func == f) {
found = 1;
break;
}
if (cp2 != NULL)
*cp2++ = ' ';
cp = cp2;
}
freeenv(val);
return (found);
}
void *
vm_iommu_domain(struct vm *vm)
{
return (vm->iommu);
}
int
vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
{
int error;
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
/*
* The following state transitions are allowed:
* IDLE -> RUNNING -> IDLE
* IDLE -> CANNOT_RUN -> IDLE
*/
if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
(vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
error = 0;
vcpu->state = state;
} else {
error = EBUSY;
}
vcpu_unlock(vcpu);
return (error);
}
enum vcpu_state
vcpu_get_state(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu;
enum vcpu_state state;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
state = vcpu->state;
vcpu_unlock(vcpu);
return (state);
}
void
vm_activate_cpu(struct vm *vm, int vcpuid)
{
if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
CPU_SET(vcpuid, &vm->active_cpus);
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
void *
vcpu_stats(struct vm *vm, int vcpuid)
{
return (vm->vcpu[vcpuid].stats);
}
int
vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
*state = vm->vcpu[vcpuid].x2apic_state;
return (0);
}
int
vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (state < 0 || state >= X2APIC_STATE_LAST)
return (EINVAL);
vm->vcpu[vcpuid].x2apic_state = state;
vlapic_set_x2apic_state(vm, vcpuid, state);
return (0);
}
void
vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
{
int hostcpu;
struct vcpu *vcpu;
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
hostcpu = vcpu->hostcpu;
if (hostcpu == NOCPU) {
/*
* If the vcpu is 'RUNNING' but without a valid 'hostcpu' then
* the host thread must be sleeping waiting for an event to
* kick the vcpu out of 'hlt'.
*
* XXX this is racy because the condition exists right before
* and after calling VMRUN() in vm_run(). The wakeup() is
* benign in this case.
*/
if (vcpu->state == VCPU_RUNNING)
wakeup_one(vcpu);
} else {
if (vcpu->state != VCPU_RUNNING)
panic("invalid vcpu state %d", vcpu->state);
if (hostcpu != curcpu)
ipi_cpu(hostcpu, vmm_ipinum);
}
vcpu_unlock(vcpu);
}