Merge projects/bhyve to head.
'bhyve' was developed by grehan@ and myself at NetApp (thanks!). Special thanks to Peter Snyder, Joe Caradonna and Michael Dexter for their support and encouragement. Obtained from: NetApp
This commit is contained in:
commit
363335d53e
@ -115,6 +115,7 @@ SUBDIR= ${SUBDIR_ORDERED} \
|
||||
${_libusbhid} \
|
||||
${_libusb} \
|
||||
${_libvgl} \
|
||||
${_libvmmapi} \
|
||||
libwrap \
|
||||
liby \
|
||||
libz \
|
||||
@ -198,6 +199,10 @@ _libproc= libproc
|
||||
_librtld_db= librtld_db
|
||||
.endif
|
||||
|
||||
.if ${MACHINE_CPUARCH} == "amd64"
|
||||
_libvmmapi= libvmmapi
|
||||
.endif
|
||||
|
||||
.if ${MACHINE_CPUARCH} == "ia64"
|
||||
_libefi= libefi
|
||||
.endif
|
||||
|
11
lib/libvmmapi/Makefile
Normal file
11
lib/libvmmapi/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
# $FreeBSD$
|
||||
|
||||
LIB= vmmapi
|
||||
SRCS= vmmapi.c vmmapi_freebsd.c
|
||||
INCS= vmmapi.h
|
||||
|
||||
WARNS?= 2
|
||||
|
||||
CFLAGS+= -I${.CURDIR}
|
||||
|
||||
.include <bsd.lib.mk>
|
723
lib/libvmmapi/vmmapi.c
Normal file
723
lib/libvmmapi/vmmapi.c
Normal file
@ -0,0 +1,723 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
|
||||
struct vmctx {
|
||||
int fd;
|
||||
char *name;
|
||||
};
|
||||
|
||||
#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
|
||||
#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
|
||||
|
||||
static int
|
||||
vm_device_open(const char *name)
|
||||
{
|
||||
int fd, len;
|
||||
char *vmfile;
|
||||
|
||||
len = strlen("/dev/vmm/") + strlen(name) + 1;
|
||||
vmfile = malloc(len);
|
||||
assert(vmfile != NULL);
|
||||
snprintf(vmfile, len, "/dev/vmm/%s", name);
|
||||
|
||||
/* Open the device file */
|
||||
fd = open(vmfile, O_RDWR, 0);
|
||||
|
||||
free(vmfile);
|
||||
return (fd);
|
||||
}
|
||||
|
||||
int
|
||||
vm_create(const char *name)
|
||||
{
|
||||
|
||||
return (CREATE((char *)name));
|
||||
}
|
||||
|
||||
struct vmctx *
|
||||
vm_open(const char *name)
|
||||
{
|
||||
struct vmctx *vm;
|
||||
|
||||
vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
|
||||
assert(vm != NULL);
|
||||
|
||||
vm->fd = -1;
|
||||
vm->name = (char *)(vm + 1);
|
||||
strcpy(vm->name, name);
|
||||
|
||||
if ((vm->fd = vm_device_open(vm->name)) < 0)
|
||||
goto err;
|
||||
|
||||
return (vm);
|
||||
err:
|
||||
vm_destroy(vm);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
vm_destroy(struct vmctx *vm)
|
||||
{
|
||||
assert(vm != NULL);
|
||||
|
||||
if (vm->fd >= 0)
|
||||
close(vm->fd);
|
||||
DESTROY(vm->name);
|
||||
|
||||
free(vm);
|
||||
}
|
||||
|
||||
size_t
|
||||
vmm_get_mem_total(void)
|
||||
{
|
||||
size_t mem_total = 0;
|
||||
size_t oldlen = sizeof(mem_total);
|
||||
int error;
|
||||
error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0);
|
||||
if (error)
|
||||
return -1;
|
||||
return mem_total;
|
||||
}
|
||||
|
||||
size_t
|
||||
vmm_get_mem_free(void)
|
||||
{
|
||||
size_t mem_free = 0;
|
||||
size_t oldlen = sizeof(mem_free);
|
||||
int error;
|
||||
error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0);
|
||||
if (error)
|
||||
return -1;
|
||||
return mem_free;
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
|
||||
*ret_len = seg.len;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
/*
|
||||
* Create and optionally map 'len' bytes of memory at guest
|
||||
* physical address 'gpa'
|
||||
*/
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
seg.len = len;
|
||||
error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
|
||||
if (error == 0 && mapaddr != NULL) {
|
||||
*mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
char *
|
||||
vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
|
||||
/* Map 'len' bytes of memory at guest physical address 'gpa' */
|
||||
return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
vmsegdesc.desc.base = base;
|
||||
vmsegdesc.desc.limit = limit;
|
||||
vmsegdesc.desc.access = access;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
if (error == 0) {
|
||||
*base = vmsegdesc.desc.base;
|
||||
*limit = vmsegdesc.desc.limit;
|
||||
*access = vmsegdesc.desc.access;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
vmreg.regval = val;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
|
||||
*ret_val = vmreg.regval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
|
||||
*host_cpuid = vmpin.host_cpuid;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
vmpin.host_cpuid = host_cpuid;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
|
||||
{
|
||||
int error;
|
||||
struct vm_run vmrun;
|
||||
|
||||
bzero(&vmrun, sizeof(vmrun));
|
||||
vmrun.cpuid = vcpu;
|
||||
vmrun.rip = rip;
|
||||
|
||||
error = ioctl(ctx->fd, VM_RUN, &vmrun);
|
||||
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code, int error_code_valid)
|
||||
{
|
||||
struct vm_event ev;
|
||||
|
||||
bzero(&ev, sizeof(ev));
|
||||
ev.cpuid = vcpu;
|
||||
ev.type = type;
|
||||
ev.vector = vector;
|
||||
ev.error_code = error_code;
|
||||
ev.error_code_valid = error_code_valid;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1));
|
||||
}
|
||||
|
||||
int
|
||||
vm_apicid2vcpu(struct vmctx *ctx, int apicid)
|
||||
{
|
||||
/*
|
||||
* The apic id associated with the 'vcpu' has the same numerical value
|
||||
* as the 'vcpu' itself.
|
||||
*/
|
||||
return (apicid);
|
||||
}
|
||||
|
||||
int
|
||||
vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
|
||||
{
|
||||
struct vm_lapic_irq vmirq;
|
||||
|
||||
bzero(&vmirq, sizeof(vmirq));
|
||||
vmirq.cpuid = vcpu;
|
||||
vmirq.vector = vector;
|
||||
|
||||
return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_nmi(struct vmctx *ctx, int vcpu)
|
||||
{
|
||||
struct vm_nmi vmnmi;
|
||||
|
||||
bzero(&vmnmi, sizeof(vmnmi));
|
||||
vmnmi.cpuid = vcpu;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
|
||||
}
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int type;
|
||||
} capstrmap[] = {
|
||||
{ "hlt_exit", VM_CAP_HALT_EXIT },
|
||||
{ "mtrap_exit", VM_CAP_MTRAP_EXIT },
|
||||
{ "pause_exit", VM_CAP_PAUSE_EXIT },
|
||||
{ "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
int
|
||||
vm_capability_name2type(const char *capname)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
|
||||
if (strcmp(capstrmap[i].name, capname) == 0)
|
||||
return (capstrmap[i].type);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
const char *
|
||||
vm_capability_type2name(int type)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; capstrmap[i].name != NULL; i++) {
|
||||
if (capstrmap[i].type == type)
|
||||
return (capstrmap[i].name);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval)
|
||||
{
|
||||
int error;
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
|
||||
*retval = vmcap.capval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
|
||||
{
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
vmcap.capval = val;
|
||||
|
||||
return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
|
||||
}
|
||||
|
||||
int
|
||||
vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
struct vm_pptdev_mmio pptmmio;
|
||||
|
||||
bzero(&pptmmio, sizeof(pptmmio));
|
||||
pptmmio.bus = bus;
|
||||
pptmmio.slot = slot;
|
||||
pptmmio.func = func;
|
||||
pptmmio.gpa = gpa;
|
||||
pptmmio.len = len;
|
||||
pptmmio.hpa = hpa;
|
||||
|
||||
return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
{
|
||||
struct vm_pptdev_msi pptmsi;
|
||||
|
||||
bzero(&pptmsi, sizeof(pptmsi));
|
||||
pptmsi.vcpu = vcpu;
|
||||
pptmsi.bus = bus;
|
||||
pptmsi.slot = slot;
|
||||
pptmsi.func = func;
|
||||
pptmsi.destcpu = destcpu;
|
||||
pptmsi.vector = vector;
|
||||
pptmsi.numvec = numvec;
|
||||
|
||||
return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
|
||||
{
|
||||
struct vm_pptdev_msix pptmsix;
|
||||
|
||||
bzero(&pptmsix, sizeof(pptmsix));
|
||||
pptmsix.vcpu = vcpu;
|
||||
pptmsix.bus = bus;
|
||||
pptmsix.slot = slot;
|
||||
pptmsix.func = func;
|
||||
pptmsix.idx = idx;
|
||||
pptmsix.msg = msg;
|
||||
pptmsix.addr = addr;
|
||||
pptmsix.vector_control = vector_control;
|
||||
|
||||
return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
|
||||
}
|
||||
|
||||
uint64_t *
|
||||
vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries)
|
||||
{
|
||||
int error;
|
||||
|
||||
static struct vm_stats vmstats;
|
||||
|
||||
vmstats.cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_STATS, &vmstats);
|
||||
if (error == 0) {
|
||||
if (ret_entries)
|
||||
*ret_entries = vmstats.num_entries;
|
||||
if (ret_tv)
|
||||
*ret_tv = vmstats.tv;
|
||||
return (vmstats.statbuf);
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
const char *
|
||||
vm_get_stat_desc(struct vmctx *ctx, int index)
|
||||
{
|
||||
static struct vm_stat_desc statdesc;
|
||||
|
||||
statdesc.index = index;
|
||||
if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
|
||||
return (statdesc.desc);
|
||||
else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
|
||||
{
|
||||
int error;
|
||||
struct vm_x2apic x2apic;
|
||||
|
||||
bzero(&x2apic, sizeof(x2apic));
|
||||
x2apic.cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
|
||||
*state = x2apic.state;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
|
||||
{
|
||||
int error;
|
||||
struct vm_x2apic x2apic;
|
||||
|
||||
bzero(&x2apic, sizeof(x2apic));
|
||||
x2apic.cpuid = vcpu;
|
||||
x2apic.state = state;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Intel Vol 3a:
|
||||
* Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
|
||||
*/
|
||||
int
|
||||
vcpu_reset(struct vmctx *vmctx, int vcpu)
|
||||
{
|
||||
int error;
|
||||
uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t sel;
|
||||
|
||||
zero = 0;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
rip = 0xfff0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
cr0 = CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* CS: present, r/w, accessed, 16-bit, byte granularity, usable
|
||||
*/
|
||||
desc_base = 0xffff0000;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0xf000;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
|
||||
*/
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* General purpose registers */
|
||||
rdx = 0xf00;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
/* GDTR, IDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
/* TR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x00000082;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
|
||||
desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX cr2, debug registers */
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
105
lib/libvmmapi/vmmapi.h
Normal file
105
lib/libvmmapi/vmmapi.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMMAPI_H_
|
||||
#define _VMMAPI_H_
|
||||
|
||||
struct vmctx;
|
||||
enum x2apic_state;
|
||||
|
||||
int vm_create(const char *name);
|
||||
struct vmctx *vm_open(const char *name);
|
||||
void vm_destroy(struct vmctx *ctx);
|
||||
size_t vmm_get_mem_total(void);
|
||||
size_t vmm_get_mem_free(void);
|
||||
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len);
|
||||
/*
|
||||
* Create a memory segment of 'len' bytes in the guest physical address space
|
||||
* at offset 'gpa'.
|
||||
*
|
||||
* If 'mapaddr' is not NULL then this region is mmap'ed into the address
|
||||
* space of the calling process. If there is an mmap error then *mapaddr
|
||||
* will be set to MAP_FAILED.
|
||||
*/
|
||||
|
||||
int vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
|
||||
char **mapaddr);
|
||||
char * vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
|
||||
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access);
|
||||
int vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access);
|
||||
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
|
||||
int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
|
||||
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
|
||||
struct vm_exit *ret_vmexit);
|
||||
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
|
||||
int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector);
|
||||
int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code);
|
||||
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
|
||||
int vm_inject_nmi(struct vmctx *ctx, int vcpu);
|
||||
int vm_capability_name2type(const char *capname);
|
||||
const char *vm_capability_type2name(int type);
|
||||
int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval);
|
||||
int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int val);
|
||||
int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int dest, int vector, int numvec);
|
||||
int vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
|
||||
|
||||
/*
|
||||
* Return a pointer to the statistics buffer. Note that this is not MT-safe.
|
||||
*/
|
||||
uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries);
|
||||
const char *vm_get_stat_desc(struct vmctx *ctx, int index);
|
||||
|
||||
int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s);
|
||||
int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s);
|
||||
|
||||
/* Reset vcpu register state */
|
||||
int vcpu_reset(struct vmctx *ctx, int vcpu);
|
||||
|
||||
/*
|
||||
* FreeBSD specific APIs
|
||||
*/
|
||||
int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp);
|
||||
void vm_setup_freebsd_gdt(uint64_t *gdtr);
|
||||
#endif /* _VMMAPI_H_ */
|
183
lib/libvmmapi/vmmapi_freebsd.c
Normal file
183
lib/libvmmapi/vmmapi_freebsd.c
Normal file
@ -0,0 +1,183 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
|
||||
#define DESC_UNUSABLE 0x00010000
|
||||
|
||||
#define GUEST_NULL_SEL 0
|
||||
#define GUEST_CODE_SEL 1
|
||||
#define GUEST_DATA_SEL 2
|
||||
#define GUEST_GDTR_LIMIT (3 * 8 - 1)
|
||||
|
||||
void
|
||||
vm_setup_freebsd_gdt(uint64_t *gdtr)
|
||||
{
|
||||
gdtr[GUEST_NULL_SEL] = 0;
|
||||
gdtr[GUEST_CODE_SEL] = 0x0020980000000000;
|
||||
gdtr[GUEST_DATA_SEL] = 0x0000900000000000;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the 'vcpu' register set such that it will begin execution at
|
||||
* 'rip' in long mode.
|
||||
*/
|
||||
int
|
||||
vm_setup_freebsd_registers(struct vmctx *vmctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp)
|
||||
{
|
||||
int error;
|
||||
uint64_t cr0, cr4, efer, rflags, desc_base;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t gsel;
|
||||
|
||||
cr0 = CR0_PE | CR0_PG | CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = CR4_PAE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
efer = EFER_LME | EFER_LMA;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, efer)))
|
||||
goto done;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_base = 0;
|
||||
desc_limit = 0;
|
||||
desc_access = 0x0000209B;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_access = 0x00000093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* XXX TR is pointing to null selector even though we set the
|
||||
* TSS segment to be usable with a base address and limit of 0.
|
||||
*/
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0,
|
||||
DESC_UNUSABLE);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_CODE_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_DATA_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX TR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* entry point */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
/* page table base */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, cr3)) != 0)
|
||||
goto done;
|
||||
|
||||
desc_base = gdtbase;
|
||||
desc_limit = GUEST_GDTR_LIMIT;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, 0);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
68
share/man/man4/bhyve.4
Normal file
68
share/man/man4/bhyve.4
Normal file
@ -0,0 +1,68 @@
|
||||
.\"
|
||||
.\" Copyright (c) 2012 NetApp Inc
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd January 5, 2013
|
||||
.Dt BHYVE 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm bhyve
|
||||
.Nd virtual machine monitor
|
||||
.Sh SYNOPSIS
|
||||
.Cd "/usr/sbin/bhyve"
|
||||
.Cd "/usr/sbin/bhyveload"
|
||||
.Cd "/usr/sbin/bhyvectl"
|
||||
.Cd "/boot/kernel/vmm.ko"
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
is a virtual machine monitor that is hosted by FreeBSD. It is used to host
|
||||
unmodified guest operating systems on top of FreeBSD.
|
||||
.Pp
|
||||
.Nm
|
||||
relies heavily on hardware assist provided by the CPU and chipset to virtualize
|
||||
processor and memory resources.
|
||||
.Sh SEE ALSO
|
||||
.Xr bhyve 8 ,
|
||||
.Xr bhyveload 8 ,
|
||||
.Xr bhyvectl 8 ,
|
||||
.Xr vmm 4
|
||||
.Sh HISTORY
|
||||
.Nm
|
||||
first appeared in
|
||||
.Fx 10.0 ,
|
||||
and was developed at NetApp Inc.
|
||||
.Sh AUTHORS
|
||||
.Nm
|
||||
was developed by
|
||||
.An -nosplit
|
||||
.An "Peter Grehan" Aq grehan@FreeBSD.org
|
||||
and
|
||||
.An "Neel Natu" Aq neel@FreeBSD.org
|
||||
at NetApp Inc.
|
||||
.Sh BUGS
|
||||
.Nm
|
||||
is considered experimental in
|
||||
.Fx .
|
@ -162,6 +162,7 @@ LIBULOG?= ${DESTDIR}${LIBDIR}/libulog.a
|
||||
LIBUTIL?= ${DESTDIR}${LIBDIR}/libutil.a
|
||||
LIBUUTIL?= ${DESTDIR}${LIBDIR}/libuutil.a
|
||||
LIBVGL?= ${DESTDIR}${LIBDIR}/libvgl.a
|
||||
LIBVMMAPI?= ${DESTDIR}${LIBDIR}/libvmmapi.a
|
||||
LIBWIND?= ${DESTDIR}${LIBDIR}/libwind.a
|
||||
LIBWRAP?= ${DESTDIR}${LIBDIR}/libwrap.a
|
||||
LIBXPG4?= ${DESTDIR}${LIBDIR}/libxpg4.a
|
||||
|
293
sys/amd64/include/vmm.h
Normal file
293
sys/amd64/include/vmm.h
Normal file
@ -0,0 +1,293 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD: vmm.h 482 2011-05-09 21:22:43Z grehan $
|
||||
*/
|
||||
|
||||
#ifndef _VMM_H_
|
||||
#define _VMM_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define VM_MAX_NAMELEN 32
|
||||
|
||||
struct vm;
|
||||
struct vm_memory_segment;
|
||||
struct seg_desc;
|
||||
struct vm_exit;
|
||||
struct vm_run;
|
||||
struct vlapic;
|
||||
|
||||
enum x2apic_state;
|
||||
|
||||
typedef int (*vmm_init_func_t)(void);
|
||||
typedef int (*vmm_cleanup_func_t)(void);
|
||||
typedef void * (*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
|
||||
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip);
|
||||
typedef void (*vmi_cleanup_func_t)(void *vmi);
|
||||
typedef int (*vmi_mmap_set_func_t)(void *vmi, vm_paddr_t gpa,
|
||||
vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot,
|
||||
boolean_t superpages_ok);
|
||||
typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *vmi, vm_paddr_t gpa);
|
||||
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
|
||||
uint64_t *retval);
|
||||
typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
|
||||
uint64_t val);
|
||||
typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
|
||||
struct seg_desc *desc);
|
||||
typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
|
||||
struct seg_desc *desc);
|
||||
typedef int (*vmi_inject_event_t)(void *vmi, int vcpu,
|
||||
int type, int vector,
|
||||
uint32_t code, int code_valid);
|
||||
typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
|
||||
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
|
||||
|
||||
struct vmm_ops {
|
||||
vmm_init_func_t init; /* module wide initialization */
|
||||
vmm_cleanup_func_t cleanup;
|
||||
|
||||
vmi_init_func_t vminit; /* vm-specific initialization */
|
||||
vmi_run_func_t vmrun;
|
||||
vmi_cleanup_func_t vmcleanup;
|
||||
vmi_mmap_set_func_t vmmmap_set;
|
||||
vmi_mmap_get_func_t vmmmap_get;
|
||||
vmi_get_register_t vmgetreg;
|
||||
vmi_set_register_t vmsetreg;
|
||||
vmi_get_desc_t vmgetdesc;
|
||||
vmi_set_desc_t vmsetdesc;
|
||||
vmi_inject_event_t vminject;
|
||||
vmi_get_cap_t vmgetcap;
|
||||
vmi_set_cap_t vmsetcap;
|
||||
};
|
||||
|
||||
extern struct vmm_ops vmm_ops_intel;
|
||||
extern struct vmm_ops vmm_ops_amd;
|
||||
|
||||
struct vm *vm_create(const char *name);
|
||||
void vm_destroy(struct vm *vm);
|
||||
const char *vm_name(struct vm *vm);
|
||||
int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
|
||||
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
|
||||
vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
|
||||
int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
||||
struct vm_memory_segment *seg);
|
||||
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *ret_desc);
|
||||
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *desc);
|
||||
int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
|
||||
int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
|
||||
int vm_run(struct vm *vm, struct vm_run *vmrun);
|
||||
int vm_inject_event(struct vm *vm, int vcpu, int type,
|
||||
int vector, uint32_t error_code, int error_code_valid);
|
||||
int vm_inject_nmi(struct vm *vm, int vcpu);
|
||||
int vm_nmi_pending(struct vm *vm, int vcpuid);
|
||||
void vm_nmi_clear(struct vm *vm, int vcpuid);
|
||||
uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
|
||||
struct vlapic *vm_lapic(struct vm *vm, int cpu);
|
||||
int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
|
||||
int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
|
||||
int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
|
||||
int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
|
||||
void vm_activate_cpu(struct vm *vm, int vcpu);
|
||||
cpuset_t vm_active_cpus(struct vm *vm);
|
||||
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
|
||||
|
||||
/*
|
||||
* Return 1 if device indicated by bus/slot/func is supposed to be a
|
||||
* pci passthrough device.
|
||||
*
|
||||
* Return 0 otherwise.
|
||||
*/
|
||||
int vmm_is_pptdev(int bus, int slot, int func);
|
||||
|
||||
void *vm_iommu_domain(struct vm *vm);
|
||||
|
||||
enum vcpu_state {
|
||||
VCPU_IDLE,
|
||||
VCPU_RUNNING,
|
||||
VCPU_CANNOT_RUN,
|
||||
};
|
||||
|
||||
int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state);
|
||||
enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu);
|
||||
|
||||
static int __inline
|
||||
vcpu_is_running(struct vm *vm, int vcpu)
|
||||
{
|
||||
return (vcpu_get_state(vm, vcpu) == VCPU_RUNNING);
|
||||
}
|
||||
|
||||
void *vcpu_stats(struct vm *vm, int vcpu);
|
||||
void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
|
||||
|
||||
#endif /* KERNEL */
|
||||
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
|
||||
#define VM_MAXCPU 8 /* maximum virtual cpus */
|
||||
|
||||
/*
|
||||
* Identifiers for events that can be injected into the VM
|
||||
*/
|
||||
enum vm_event_type {
|
||||
VM_EVENT_NONE,
|
||||
VM_HW_INTR,
|
||||
VM_NMI,
|
||||
VM_HW_EXCEPTION,
|
||||
VM_SW_INTR,
|
||||
VM_PRIV_SW_EXCEPTION,
|
||||
VM_SW_EXCEPTION,
|
||||
VM_EVENT_MAX
|
||||
};
|
||||
|
||||
/*
|
||||
* Identifiers for architecturally defined registers.
|
||||
*/
|
||||
enum vm_reg_name {
|
||||
VM_REG_GUEST_RAX,
|
||||
VM_REG_GUEST_RBX,
|
||||
VM_REG_GUEST_RCX,
|
||||
VM_REG_GUEST_RDX,
|
||||
VM_REG_GUEST_RSI,
|
||||
VM_REG_GUEST_RDI,
|
||||
VM_REG_GUEST_RBP,
|
||||
VM_REG_GUEST_R8,
|
||||
VM_REG_GUEST_R9,
|
||||
VM_REG_GUEST_R10,
|
||||
VM_REG_GUEST_R11,
|
||||
VM_REG_GUEST_R12,
|
||||
VM_REG_GUEST_R13,
|
||||
VM_REG_GUEST_R14,
|
||||
VM_REG_GUEST_R15,
|
||||
VM_REG_GUEST_CR0,
|
||||
VM_REG_GUEST_CR3,
|
||||
VM_REG_GUEST_CR4,
|
||||
VM_REG_GUEST_DR7,
|
||||
VM_REG_GUEST_RSP,
|
||||
VM_REG_GUEST_RIP,
|
||||
VM_REG_GUEST_RFLAGS,
|
||||
VM_REG_GUEST_ES,
|
||||
VM_REG_GUEST_CS,
|
||||
VM_REG_GUEST_SS,
|
||||
VM_REG_GUEST_DS,
|
||||
VM_REG_GUEST_FS,
|
||||
VM_REG_GUEST_GS,
|
||||
VM_REG_GUEST_LDTR,
|
||||
VM_REG_GUEST_TR,
|
||||
VM_REG_GUEST_IDTR,
|
||||
VM_REG_GUEST_GDTR,
|
||||
VM_REG_GUEST_EFER,
|
||||
VM_REG_LAST
|
||||
};
|
||||
|
||||
/*
|
||||
* Identifiers for optional vmm capabilities
|
||||
*/
|
||||
enum vm_cap_type {
|
||||
VM_CAP_HALT_EXIT,
|
||||
VM_CAP_MTRAP_EXIT,
|
||||
VM_CAP_PAUSE_EXIT,
|
||||
VM_CAP_UNRESTRICTED_GUEST,
|
||||
VM_CAP_MAX
|
||||
};
|
||||
|
||||
enum x2apic_state {
|
||||
X2APIC_ENABLED,
|
||||
X2APIC_AVAILABLE,
|
||||
X2APIC_DISABLED,
|
||||
X2APIC_STATE_LAST
|
||||
};
|
||||
|
||||
/*
|
||||
* The 'access' field has the format specified in Table 21-2 of the Intel
|
||||
* Architecture Manual vol 3b.
|
||||
*
|
||||
* XXX The contents of the 'access' field are architecturally defined except
|
||||
* bit 16 - Segment Unusable.
|
||||
*/
|
||||
struct seg_desc {
|
||||
uint64_t base;
|
||||
uint32_t limit;
|
||||
uint32_t access;
|
||||
};
|
||||
|
||||
enum vm_exitcode {
|
||||
VM_EXITCODE_INOUT,
|
||||
VM_EXITCODE_VMX,
|
||||
VM_EXITCODE_BOGUS,
|
||||
VM_EXITCODE_RDMSR,
|
||||
VM_EXITCODE_WRMSR,
|
||||
VM_EXITCODE_HLT,
|
||||
VM_EXITCODE_MTRAP,
|
||||
VM_EXITCODE_PAUSE,
|
||||
VM_EXITCODE_PAGING,
|
||||
VM_EXITCODE_SPINUP_AP,
|
||||
VM_EXITCODE_MAX
|
||||
};
|
||||
|
||||
struct vm_exit {
|
||||
enum vm_exitcode exitcode;
|
||||
int inst_length; /* 0 means unknown */
|
||||
uint64_t rip;
|
||||
union {
|
||||
struct {
|
||||
uint16_t bytes:3; /* 1 or 2 or 4 */
|
||||
uint16_t in:1; /* out is 0, in is 1 */
|
||||
uint16_t string:1;
|
||||
uint16_t rep:1;
|
||||
uint16_t port;
|
||||
uint32_t eax; /* valid for out */
|
||||
} inout;
|
||||
struct {
|
||||
uint64_t gpa;
|
||||
struct vie vie;
|
||||
} paging;
|
||||
/*
|
||||
* VMX specific payload. Used when there is no "better"
|
||||
* exitcode to represent the VM-exit.
|
||||
*/
|
||||
struct {
|
||||
int error; /* vmx inst error */
|
||||
uint32_t exit_reason;
|
||||
uint64_t exit_qualification;
|
||||
} vmx;
|
||||
struct {
|
||||
uint32_t code; /* ecx value */
|
||||
uint64_t wval;
|
||||
} msr;
|
||||
struct {
|
||||
int vcpu;
|
||||
uint64_t rip;
|
||||
} spinup_ap;
|
||||
} u;
|
||||
};
|
||||
|
||||
#endif /* _VMM_H_ */
|
215
sys/amd64/include/vmm_dev.h
Normal file
215
sys/amd64/include/vmm_dev.h
Normal file
@ -0,0 +1,215 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD: vmm_dev.h 482 2011-05-09 21:22:43Z grehan $
|
||||
*/
|
||||
|
||||
#ifndef _VMM_DEV_H_
|
||||
#define _VMM_DEV_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
void vmmdev_init(void);
|
||||
int vmmdev_cleanup(void);
|
||||
#endif
|
||||
|
||||
struct vm_memory_segment {
|
||||
vm_paddr_t gpa; /* in */
|
||||
size_t len; /* in */
|
||||
};
|
||||
|
||||
struct vm_register {
|
||||
int cpuid;
|
||||
int regnum; /* enum vm_reg_name */
|
||||
uint64_t regval;
|
||||
};
|
||||
|
||||
struct vm_seg_desc { /* data or code segment */
|
||||
int cpuid;
|
||||
int regnum; /* enum vm_reg_name */
|
||||
struct seg_desc desc;
|
||||
};
|
||||
|
||||
struct vm_pin {
|
||||
int vm_cpuid;
|
||||
int host_cpuid; /* -1 to unpin */
|
||||
};
|
||||
|
||||
struct vm_run {
|
||||
int cpuid;
|
||||
uint64_t rip; /* start running here */
|
||||
struct vm_exit vm_exit;
|
||||
};
|
||||
|
||||
struct vm_event {
|
||||
int cpuid;
|
||||
enum vm_event_type type;
|
||||
int vector;
|
||||
uint32_t error_code;
|
||||
int error_code_valid;
|
||||
};
|
||||
|
||||
struct vm_lapic_irq {
|
||||
int cpuid;
|
||||
int vector;
|
||||
};
|
||||
|
||||
struct vm_capability {
|
||||
int cpuid;
|
||||
enum vm_cap_type captype;
|
||||
int capval;
|
||||
int allcpus;
|
||||
};
|
||||
|
||||
struct vm_pptdev {
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
};
|
||||
|
||||
struct vm_pptdev_mmio {
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
vm_paddr_t gpa;
|
||||
vm_paddr_t hpa;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
struct vm_pptdev_msi {
|
||||
int vcpu;
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
int numvec; /* 0 means disabled */
|
||||
int vector;
|
||||
int destcpu;
|
||||
};
|
||||
|
||||
struct vm_pptdev_msix {
|
||||
int vcpu;
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
int idx;
|
||||
uint32_t msg;
|
||||
uint32_t vector_control;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct vm_nmi {
|
||||
int cpuid;
|
||||
};
|
||||
|
||||
#define MAX_VM_STATS 64
|
||||
struct vm_stats {
|
||||
int cpuid; /* in */
|
||||
int num_entries; /* out */
|
||||
struct timeval tv;
|
||||
uint64_t statbuf[MAX_VM_STATS];
|
||||
};
|
||||
|
||||
struct vm_stat_desc {
|
||||
int index; /* in */
|
||||
char desc[128]; /* out */
|
||||
};
|
||||
|
||||
struct vm_x2apic {
|
||||
int cpuid;
|
||||
enum x2apic_state state;
|
||||
};
|
||||
|
||||
enum {
|
||||
IOCNUM_RUN,
|
||||
IOCNUM_SET_PINNING,
|
||||
IOCNUM_GET_PINNING,
|
||||
IOCNUM_MAP_MEMORY,
|
||||
IOCNUM_GET_MEMORY_SEG,
|
||||
IOCNUM_SET_REGISTER,
|
||||
IOCNUM_GET_REGISTER,
|
||||
IOCNUM_SET_SEGMENT_DESCRIPTOR,
|
||||
IOCNUM_GET_SEGMENT_DESCRIPTOR,
|
||||
IOCNUM_INJECT_EVENT,
|
||||
IOCNUM_LAPIC_IRQ,
|
||||
IOCNUM_SET_CAPABILITY,
|
||||
IOCNUM_GET_CAPABILITY,
|
||||
IOCNUM_BIND_PPTDEV,
|
||||
IOCNUM_UNBIND_PPTDEV,
|
||||
IOCNUM_MAP_PPTDEV_MMIO,
|
||||
IOCNUM_PPTDEV_MSI,
|
||||
IOCNUM_PPTDEV_MSIX,
|
||||
IOCNUM_INJECT_NMI,
|
||||
IOCNUM_VM_STATS,
|
||||
IOCNUM_VM_STAT_DESC,
|
||||
IOCNUM_SET_X2APIC_STATE,
|
||||
IOCNUM_GET_X2APIC_STATE,
|
||||
};
|
||||
|
||||
#define VM_RUN \
|
||||
_IOWR('v', IOCNUM_RUN, struct vm_run)
|
||||
#define VM_SET_PINNING \
|
||||
_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
|
||||
#define VM_GET_PINNING \
|
||||
_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
|
||||
#define VM_MAP_MEMORY \
|
||||
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
|
||||
#define VM_GET_MEMORY_SEG \
|
||||
_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
|
||||
#define VM_SET_REGISTER \
|
||||
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
|
||||
#define VM_GET_REGISTER \
|
||||
_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
|
||||
#define VM_SET_SEGMENT_DESCRIPTOR \
|
||||
_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
|
||||
#define VM_GET_SEGMENT_DESCRIPTOR \
|
||||
_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
|
||||
#define VM_INJECT_EVENT \
|
||||
_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
|
||||
#define VM_LAPIC_IRQ \
|
||||
_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
|
||||
#define VM_SET_CAPABILITY \
|
||||
_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
|
||||
#define VM_GET_CAPABILITY \
|
||||
_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
|
||||
#define VM_BIND_PPTDEV \
|
||||
_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
|
||||
#define VM_UNBIND_PPTDEV \
|
||||
_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
|
||||
#define VM_MAP_PPTDEV_MMIO \
|
||||
_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
|
||||
#define VM_PPTDEV_MSI \
|
||||
_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
|
||||
#define VM_PPTDEV_MSIX \
|
||||
_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
|
||||
#define VM_INJECT_NMI \
|
||||
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
|
||||
#define VM_STATS \
|
||||
_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
|
||||
#define VM_STAT_DESC \
|
||||
_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
|
||||
#define VM_SET_X2APIC_STATE \
|
||||
_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
|
||||
#define VM_GET_X2APIC_STATE \
|
||||
_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
|
||||
#endif
|
113
sys/amd64/include/vmm_instruction_emul.h
Normal file
113
sys/amd64/include/vmm_instruction_emul.h
Normal file
@ -0,0 +1,113 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_INSTRUCTION_EMUL_H_
|
||||
#define _VMM_INSTRUCTION_EMUL_H_
|
||||
|
||||
/*
|
||||
* The data structures 'vie' and 'vie_op' are meant to be opaque to the
|
||||
* consumers of instruction decoding. The only reason why their contents
|
||||
* need to be exposed is because they are part of the 'vm_exit' structure.
|
||||
*/
|
||||
struct vie_op {
|
||||
uint8_t op_byte; /* actual opcode byte */
|
||||
uint8_t op_type; /* type of operation (e.g. MOV) */
|
||||
uint16_t op_flags;
|
||||
};
|
||||
|
||||
#define VIE_INST_SIZE 15
|
||||
struct vie {
|
||||
uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
|
||||
uint8_t num_valid; /* size of the instruction */
|
||||
uint8_t num_processed;
|
||||
|
||||
uint8_t rex_w:1, /* REX prefix */
|
||||
rex_r:1,
|
||||
rex_x:1,
|
||||
rex_b:1;
|
||||
|
||||
uint8_t mod:2, /* ModRM byte */
|
||||
reg:4,
|
||||
rm:4;
|
||||
|
||||
uint8_t ss:2, /* SIB byte */
|
||||
index:4,
|
||||
base:4;
|
||||
|
||||
uint8_t disp_bytes;
|
||||
uint8_t imm_bytes;
|
||||
|
||||
uint8_t scale;
|
||||
int base_register; /* VM_REG_GUEST_xyz */
|
||||
int index_register; /* VM_REG_GUEST_xyz */
|
||||
|
||||
int64_t displacement; /* optional addr displacement */
|
||||
int64_t immediate; /* optional immediate operand */
|
||||
|
||||
uint8_t decoded; /* set to 1 if successfully decoded */
|
||||
|
||||
struct vie_op op; /* opcode description */
|
||||
};
|
||||
|
||||
/*
|
||||
* Callback functions to read and write memory regions.
|
||||
*/
|
||||
typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
|
||||
uint64_t *rval, int rsize, void *arg);
|
||||
|
||||
typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
|
||||
uint64_t wval, int wsize, void *arg);
|
||||
|
||||
/*
|
||||
* Emulate the decoded 'vie' instruction.
|
||||
*
|
||||
* The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
|
||||
* containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
|
||||
* callback functions.
|
||||
*
|
||||
* 'void *vm' should be 'struct vm *' when called from kernel context and
|
||||
* 'struct vmctx *' when called from user context.
|
||||
* s
|
||||
*/
|
||||
int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
|
||||
mem_region_read_t mrr, mem_region_write_t mrw,
|
||||
void *mrarg);
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* APIs to fetch and decode the instruction from nested page fault handler.
|
||||
*/
|
||||
int vmm_fetch_instruction(struct vm *vm, int cpuid,
|
||||
uint64_t rip, int inst_length, uint64_t cr3,
|
||||
struct vie *vie);
|
||||
|
||||
int vmm_decode_instruction(struct vm *vm, int cpuid,
|
||||
uint64_t gla, struct vie *vie);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* _VMM_INSTRUCTION_EMUL_H_ */
|
265
sys/amd64/vmm/amd/amdv.c
Normal file
265
sys/amd64/vmm/amd/amdv.c
Normal file
@ -0,0 +1,265 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "io/iommu.h"
|
||||
|
||||
static int
|
||||
amdv_init(void)
|
||||
{
|
||||
|
||||
printf("amdv_init: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_cleanup(void)
|
||||
{
|
||||
|
||||
printf("amdv_cleanup: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void *
|
||||
amdv_vminit(struct vm *vm)
|
||||
{
|
||||
|
||||
printf("amdv_vminit: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmrun(void *arg, int vcpu, register_t rip)
|
||||
{
|
||||
|
||||
printf("amdv_vmrun: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
amdv_vmcleanup(void *arg)
|
||||
{
|
||||
|
||||
printf("amdv_vmcleanup: not implemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
|
||||
printf("amdv_vmmmap_set: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static vm_paddr_t
|
||||
amdv_vmmmap_get(void *arg, vm_paddr_t gpa)
|
||||
{
|
||||
|
||||
printf("amdv_vmmmap_get: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
|
||||
{
|
||||
|
||||
printf("amdv_setreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_inject_event(void *vmi, int vcpu, int type, int vector,
|
||||
uint32_t error_code, int error_code_valid)
|
||||
{
|
||||
|
||||
printf("amdv_inject_event: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getcap(void *arg, int vcpu, int type, int *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setcap(void *arg, int vcpu, int type, int val)
|
||||
{
|
||||
|
||||
printf("amdv_setcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
struct vmm_ops vmm_ops_amd = {
|
||||
amdv_init,
|
||||
amdv_cleanup,
|
||||
amdv_vminit,
|
||||
amdv_vmrun,
|
||||
amdv_vmcleanup,
|
||||
amdv_vmmmap_set,
|
||||
amdv_vmmmap_get,
|
||||
amdv_getreg,
|
||||
amdv_setreg,
|
||||
amdv_getdesc,
|
||||
amdv_setdesc,
|
||||
amdv_inject_event,
|
||||
amdv_getcap,
|
||||
amdv_setcap
|
||||
};
|
||||
|
||||
static int
|
||||
amd_iommu_init(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_init: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_cleanup(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_cleanup: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_enable(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_enable: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_disable(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_disable: not implemented\n");
|
||||
}
|
||||
|
||||
static void *
|
||||
amd_iommu_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
printf("amd_iommu_create_domain: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_destroy_domain(void *domain)
|
||||
{
|
||||
|
||||
printf("amd_iommu_destroy_domain: not implemented\n");
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
uint64_t len)
|
||||
{
|
||||
|
||||
printf("amd_iommu_create_mapping: not implemented\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
amd_iommu_remove_mapping(void *domain, vm_paddr_t gpa, uint64_t len)
|
||||
{
|
||||
|
||||
printf("amd_iommu_remove_mapping: not implemented\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_add_device(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
printf("amd_iommu_add_device: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_remove_device(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
printf("amd_iommu_remove_device: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_invalidate_tlb(void *domain)
|
||||
{
|
||||
|
||||
printf("amd_iommu_invalidate_tlb: not implemented\n");
|
||||
}
|
||||
|
||||
struct iommu_ops iommu_ops_amd = {
|
||||
amd_iommu_init,
|
||||
amd_iommu_cleanup,
|
||||
amd_iommu_enable,
|
||||
amd_iommu_disable,
|
||||
amd_iommu_create_domain,
|
||||
amd_iommu_destroy_domain,
|
||||
amd_iommu_create_mapping,
|
||||
amd_iommu_remove_mapping,
|
||||
amd_iommu_add_device,
|
||||
amd_iommu_remove_device,
|
||||
amd_iommu_invalidate_tlb,
|
||||
};
|
392
sys/amd64/vmm/intel/ept.c
Normal file
392
sys/amd64/vmm/intel/ept.c
Normal file
@ -0,0 +1,392 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/param.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmx_cpufunc.h"
|
||||
#include "vmx_msr.h"
|
||||
#include "vmx.h"
|
||||
#include "ept.h"
|
||||
|
||||
#define EPT_PWL4(cap) ((cap) & (1UL << 6))
|
||||
#define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
|
||||
#define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
|
||||
#define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
|
||||
#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
|
||||
#define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
|
||||
|
||||
#define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
|
||||
#define INVVPID_ALL_TYPES_SUPPORTED(cap) \
|
||||
(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
|
||||
|
||||
#define INVEPT_ALL_TYPES_MASK 0x6000000UL
|
||||
#define INVEPT_ALL_TYPES_SUPPORTED(cap) \
|
||||
(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
|
||||
|
||||
#define EPT_PG_RD (1 << 0)
|
||||
#define EPT_PG_WR (1 << 1)
|
||||
#define EPT_PG_EX (1 << 2)
|
||||
#define EPT_PG_MEMORY_TYPE(x) ((x) << 3)
|
||||
#define EPT_PG_IGNORE_PAT (1 << 6)
|
||||
#define EPT_PG_SUPERPAGE (1 << 7)
|
||||
|
||||
#define EPT_ADDR_MASK ((uint64_t)-1 << 12)
|
||||
|
||||
MALLOC_DECLARE(M_VMX);
|
||||
|
||||
static uint64_t page_sizes_mask;
|
||||
|
||||
int
|
||||
ept_init(void)
|
||||
{
|
||||
int page_shift;
|
||||
uint64_t cap;
|
||||
|
||||
cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
|
||||
|
||||
/*
|
||||
* Verify that:
|
||||
* - page walk length is 4 steps
|
||||
* - extended page tables can be laid out in write-back memory
|
||||
* - invvpid instruction with all possible types is supported
|
||||
* - invept instruction with all possible types is supported
|
||||
*/
|
||||
if (!EPT_PWL4(cap) ||
|
||||
!EPT_MEMORY_TYPE_WB(cap) ||
|
||||
!INVVPID_SUPPORTED(cap) ||
|
||||
!INVVPID_ALL_TYPES_SUPPORTED(cap) ||
|
||||
!INVEPT_SUPPORTED(cap) ||
|
||||
!INVEPT_ALL_TYPES_SUPPORTED(cap))
|
||||
return (EINVAL);
|
||||
|
||||
/* Set bits in 'page_sizes_mask' for each valid page size */
|
||||
page_shift = PAGE_SHIFT;
|
||||
page_sizes_mask = 1UL << page_shift; /* 4KB page */
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDPTE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
ept_dump(uint64_t *ptp, int nlevels)
|
||||
{
|
||||
int i, t, tabs;
|
||||
uint64_t *ptpnext, ptpval;
|
||||
|
||||
if (--nlevels < 0)
|
||||
return;
|
||||
|
||||
tabs = 3 - nlevels;
|
||||
for (t = 0; t < tabs; t++)
|
||||
printf("\t");
|
||||
printf("PTP = %p\n", ptp);
|
||||
|
||||
for (i = 0; i < 512; i++) {
|
||||
ptpval = ptp[i];
|
||||
|
||||
if (ptpval == 0)
|
||||
continue;
|
||||
|
||||
for (t = 0; t < tabs; t++)
|
||||
printf("\t");
|
||||
printf("%3d 0x%016lx\n", i, ptpval);
|
||||
|
||||
if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
|
||||
ptpnext = (uint64_t *)
|
||||
PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
|
||||
ept_dump(ptpnext, nlevels);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t
|
||||
ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
|
||||
{
|
||||
int spshift, ptpshift, ptpindex, nlevels;
|
||||
|
||||
/*
|
||||
* Compute the size of the mapping that we can accomodate.
|
||||
*
|
||||
* This is based on three factors:
|
||||
* - super page sizes supported by the processor
|
||||
* - alignment of the region starting at 'gpa' and 'hpa'
|
||||
* - length of the region 'len'
|
||||
*/
|
||||
spshift = PAGE_SHIFT;
|
||||
if (spok)
|
||||
spshift += (EPT_PWLEVELS - 1) * 9;
|
||||
while (spshift >= PAGE_SHIFT) {
|
||||
uint64_t spsize = 1UL << spshift;
|
||||
if ((page_sizes_mask & spsize) != 0 &&
|
||||
(gpa & (spsize - 1)) == 0 &&
|
||||
(hpa & (spsize - 1)) == 0 &&
|
||||
length >= spsize) {
|
||||
break;
|
||||
}
|
||||
spshift -= 9;
|
||||
}
|
||||
|
||||
if (spshift < PAGE_SHIFT) {
|
||||
panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
|
||||
"length 0x%016lx, page_sizes_mask 0x%016lx",
|
||||
gpa, hpa, length, page_sizes_mask);
|
||||
}
|
||||
|
||||
nlevels = EPT_PWLEVELS;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
/* We have reached the leaf mapping */
|
||||
if (spshift >= ptpshift)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We are working on a non-leaf page table page.
|
||||
*
|
||||
* Create the next level page table page if necessary and point
|
||||
* to it from the current page table.
|
||||
*/
|
||||
if (ptp[ptpindex] == 0) {
|
||||
void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
|
||||
ptp[ptpindex] = vtophys(nlp);
|
||||
ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
|
||||
}
|
||||
|
||||
/* Work our way down to the next level page table page */
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
|
||||
}
|
||||
|
||||
if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
|
||||
panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
|
||||
"mismatch\n", gpa, ptpshift);
|
||||
}
|
||||
|
||||
if (prot != VM_PROT_NONE) {
|
||||
/* Do the mapping */
|
||||
ptp[ptpindex] = hpa;
|
||||
|
||||
/* Apply the access controls */
|
||||
if (prot & VM_PROT_READ)
|
||||
ptp[ptpindex] |= EPT_PG_RD;
|
||||
if (prot & VM_PROT_WRITE)
|
||||
ptp[ptpindex] |= EPT_PG_WR;
|
||||
if (prot & VM_PROT_EXECUTE)
|
||||
ptp[ptpindex] |= EPT_PG_EX;
|
||||
|
||||
/*
|
||||
* XXX should we enforce this memory type by setting the
|
||||
* ignore PAT bit to 1.
|
||||
*/
|
||||
ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
|
||||
|
||||
if (nlevels > 0)
|
||||
ptp[ptpindex] |= EPT_PG_SUPERPAGE;
|
||||
} else {
|
||||
/* Remove the mapping */
|
||||
ptp[ptpindex] = 0;
|
||||
}
|
||||
|
||||
return (1UL << ptpshift);
|
||||
}
|
||||
|
||||
static vm_paddr_t
|
||||
ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
|
||||
{
|
||||
int nlevels, ptpshift, ptpindex;
|
||||
uint64_t ptpval, hpabase, pgmask;
|
||||
|
||||
nlevels = EPT_PWLEVELS;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
ptpval = ptp[ptpindex];
|
||||
|
||||
/* Cannot make progress beyond this point */
|
||||
if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
|
||||
break;
|
||||
|
||||
if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
|
||||
pgmask = (1UL << ptpshift) - 1;
|
||||
hpabase = ptpval & ~pgmask;
|
||||
return (hpabase | (gpa & pgmask));
|
||||
}
|
||||
|
||||
/* Work our way down to the next level page table page */
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
|
||||
}
|
||||
|
||||
return ((vm_paddr_t)-1);
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pt_entry(pt_entry_t pte)
|
||||
{
|
||||
if (pte == 0)
|
||||
return;
|
||||
|
||||
/* sanity check */
|
||||
if ((pte & EPT_PG_SUPERPAGE) != 0)
|
||||
panic("ept_free_pt_entry: pte cannot have superpage bit");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pd_entry(pd_entry_t pde)
|
||||
{
|
||||
pt_entry_t *pt;
|
||||
int i;
|
||||
|
||||
if (pde == 0)
|
||||
return;
|
||||
|
||||
if ((pde & EPT_PG_SUPERPAGE) == 0) {
|
||||
pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPTEPG; i++)
|
||||
ept_free_pt_entry(pt[i]);
|
||||
free(pt, M_VMX); /* free the page table page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pdp_entry(pdp_entry_t pdpe)
|
||||
{
|
||||
pd_entry_t *pd;
|
||||
int i;
|
||||
|
||||
if (pdpe == 0)
|
||||
return;
|
||||
|
||||
if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDEPG; i++)
|
||||
ept_free_pd_entry(pd[i]);
|
||||
free(pd, M_VMX); /* free the page directory page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pml4_entry(pml4_entry_t pml4e)
|
||||
{
|
||||
pdp_entry_t *pdp;
|
||||
int i;
|
||||
|
||||
if (pml4e == 0)
|
||||
return;
|
||||
|
||||
if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDPEPG; i++)
|
||||
ept_free_pdp_entry(pdp[i]);
|
||||
free(pdp, M_VMX); /* free the page directory ptr page */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ept_vmcleanup(struct vmx *vmx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NPML4EPG; i++)
|
||||
ept_free_pml4_entry(vmx->pml4ept[i]);
|
||||
}
|
||||
|
||||
int
|
||||
ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
size_t n;
|
||||
struct vmx *vmx = arg;
|
||||
|
||||
while (len > 0) {
|
||||
n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
|
||||
prot, spok);
|
||||
len -= n;
|
||||
gpa += n;
|
||||
hpa += n;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
ept_vmmmap_get(void *arg, vm_paddr_t gpa)
|
||||
{
|
||||
vm_paddr_t hpa;
|
||||
struct vmx *vmx;
|
||||
|
||||
vmx = arg;
|
||||
hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
|
||||
return (hpa);
|
||||
}
|
||||
|
||||
static void
|
||||
invept_single_context(void *arg)
|
||||
{
|
||||
struct invept_desc desc = *(struct invept_desc *)arg;
|
||||
|
||||
invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
|
||||
}
|
||||
|
||||
void
|
||||
ept_invalidate_mappings(u_long pml4ept)
|
||||
{
|
||||
struct invept_desc invept_desc = { 0 };
|
||||
|
||||
invept_desc.eptp = EPTP(pml4ept);
|
||||
|
||||
smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
|
||||
}
|
43
sys/amd64/vmm/intel/ept.h
Normal file
43
sys/amd64/vmm/intel/ept.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _EPT_H_
|
||||
#define _EPT_H_
|
||||
|
||||
struct vmx;
|
||||
|
||||
#define EPT_PWLEVELS 4 /* page walk levels */
|
||||
#define EPTP(pml4) ((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
|
||||
|
||||
int ept_init(void);
|
||||
int ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
|
||||
vm_paddr_t ept_vmmmap_get(void *arg, vm_paddr_t gpa);
|
||||
void ept_invalidate_mappings(u_long ept_pml4);
|
||||
void ept_vmcleanup(struct vmx *vmx);
|
||||
#endif
|
551
sys/amd64/vmm/intel/vmcs.c
Normal file
551
sys/amd64/vmm/intel/vmcs.c
Normal file
@ -0,0 +1,551 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_ddb.h"
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/pcpu.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/segments.h>
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_host.h"
|
||||
#include "vmcs.h"
|
||||
#include "vmx_cpufunc.h"
|
||||
#include "ept.h"
|
||||
#include "vmx.h"
|
||||
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
#endif
|
||||
|
||||
static uint64_t
|
||||
vmcs_fix_regval(uint32_t encoding, uint64_t val)
|
||||
{
|
||||
|
||||
switch (encoding) {
|
||||
case VMCS_GUEST_CR0:
|
||||
val = vmx_fix_cr0(val);
|
||||
break;
|
||||
case VMCS_GUEST_CR4:
|
||||
val = vmx_fix_cr4(val);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return (val);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vmcs_field_encoding(int ident)
|
||||
{
|
||||
switch (ident) {
|
||||
case VM_REG_GUEST_CR0:
|
||||
return (VMCS_GUEST_CR0);
|
||||
case VM_REG_GUEST_CR3:
|
||||
return (VMCS_GUEST_CR3);
|
||||
case VM_REG_GUEST_CR4:
|
||||
return (VMCS_GUEST_CR4);
|
||||
case VM_REG_GUEST_DR7:
|
||||
return (VMCS_GUEST_DR7);
|
||||
case VM_REG_GUEST_RSP:
|
||||
return (VMCS_GUEST_RSP);
|
||||
case VM_REG_GUEST_RIP:
|
||||
return (VMCS_GUEST_RIP);
|
||||
case VM_REG_GUEST_RFLAGS:
|
||||
return (VMCS_GUEST_RFLAGS);
|
||||
case VM_REG_GUEST_ES:
|
||||
return (VMCS_GUEST_ES_SELECTOR);
|
||||
case VM_REG_GUEST_CS:
|
||||
return (VMCS_GUEST_CS_SELECTOR);
|
||||
case VM_REG_GUEST_SS:
|
||||
return (VMCS_GUEST_SS_SELECTOR);
|
||||
case VM_REG_GUEST_DS:
|
||||
return (VMCS_GUEST_DS_SELECTOR);
|
||||
case VM_REG_GUEST_FS:
|
||||
return (VMCS_GUEST_FS_SELECTOR);
|
||||
case VM_REG_GUEST_GS:
|
||||
return (VMCS_GUEST_GS_SELECTOR);
|
||||
case VM_REG_GUEST_TR:
|
||||
return (VMCS_GUEST_TR_SELECTOR);
|
||||
case VM_REG_GUEST_LDTR:
|
||||
return (VMCS_GUEST_LDTR_SELECTOR);
|
||||
case VM_REG_GUEST_EFER:
|
||||
return (VMCS_GUEST_IA32_EFER);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
|
||||
{
|
||||
|
||||
switch (seg) {
|
||||
case VM_REG_GUEST_ES:
|
||||
*base = VMCS_GUEST_ES_BASE;
|
||||
*lim = VMCS_GUEST_ES_LIMIT;
|
||||
*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_CS:
|
||||
*base = VMCS_GUEST_CS_BASE;
|
||||
*lim = VMCS_GUEST_CS_LIMIT;
|
||||
*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_SS:
|
||||
*base = VMCS_GUEST_SS_BASE;
|
||||
*lim = VMCS_GUEST_SS_LIMIT;
|
||||
*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_DS:
|
||||
*base = VMCS_GUEST_DS_BASE;
|
||||
*lim = VMCS_GUEST_DS_LIMIT;
|
||||
*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_FS:
|
||||
*base = VMCS_GUEST_FS_BASE;
|
||||
*lim = VMCS_GUEST_FS_LIMIT;
|
||||
*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_GS:
|
||||
*base = VMCS_GUEST_GS_BASE;
|
||||
*lim = VMCS_GUEST_GS_LIMIT;
|
||||
*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_TR:
|
||||
*base = VMCS_GUEST_TR_BASE;
|
||||
*lim = VMCS_GUEST_TR_LIMIT;
|
||||
*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_LDTR:
|
||||
*base = VMCS_GUEST_LDTR_BASE;
|
||||
*lim = VMCS_GUEST_LDTR_LIMIT;
|
||||
*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_IDTR:
|
||||
*base = VMCS_GUEST_IDTR_BASE;
|
||||
*lim = VMCS_GUEST_IDTR_LIMIT;
|
||||
*acc = VMCS_INVALID_ENCODING;
|
||||
break;
|
||||
case VM_REG_GUEST_GDTR:
|
||||
*base = VMCS_GUEST_GDTR_BASE;
|
||||
*lim = VMCS_GUEST_GDTR_LIMIT;
|
||||
*acc = VMCS_INVALID_ENCODING;
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
|
||||
{
|
||||
int error;
|
||||
uint32_t encoding;
|
||||
|
||||
/*
|
||||
* If we need to get at vmx-specific state in the VMCS we can bypass
|
||||
* the translation of 'ident' to 'encoding' by simply setting the
|
||||
* sign bit. As it so happens the upper 16 bits are reserved (i.e
|
||||
* set to 0) in the encodings for the VMCS so we are free to use the
|
||||
* sign bit.
|
||||
*/
|
||||
if (ident < 0)
|
||||
encoding = ident & 0x7fffffff;
|
||||
else
|
||||
encoding = vmcs_field_encoding(ident);
|
||||
|
||||
if (encoding == (uint32_t)-1)
|
||||
return (EINVAL);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
error = vmread(encoding, retval);
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
uint32_t encoding;
|
||||
|
||||
if (ident < 0)
|
||||
encoding = ident & 0x7fffffff;
|
||||
else
|
||||
encoding = vmcs_field_encoding(ident);
|
||||
|
||||
if (encoding == (uint32_t)-1)
|
||||
return (EINVAL);
|
||||
|
||||
val = vmcs_fix_regval(encoding, val);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
error = vmwrite(encoding, val);
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
|
||||
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
|
||||
if (error != 0)
|
||||
panic("vmcs_setdesc: invalid segment register %d", seg);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
if ((error = vmwrite(base, desc->base)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(limit, desc->limit)) != 0)
|
||||
goto done;
|
||||
|
||||
if (access != VMCS_INVALID_ENCODING) {
|
||||
if ((error = vmwrite(access, desc->access)) != 0)
|
||||
goto done;
|
||||
}
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
uint64_t u64;
|
||||
|
||||
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
|
||||
if (error != 0)
|
||||
panic("vmcs_getdesc: invalid segment register %d", seg);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
if ((error = vmread(base, &u64)) != 0)
|
||||
goto done;
|
||||
desc->base = u64;
|
||||
|
||||
if ((error = vmread(limit, &u64)) != 0)
|
||||
goto done;
|
||||
desc->limit = u64;
|
||||
|
||||
if (access != VMCS_INVALID_ENCODING) {
|
||||
if ((error = vmread(access, &u64)) != 0)
|
||||
goto done;
|
||||
desc->access = u64;
|
||||
}
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
|
||||
{
|
||||
int error;
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
|
||||
/*
|
||||
* Guest MSRs are saved in the VM-exit MSR-store area.
|
||||
* Guest MSRs are loaded from the VM-entry MSR-load area.
|
||||
* Both areas point to the same location in memory.
|
||||
*/
|
||||
if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_set_defaults(struct vmcs *vmcs,
|
||||
u_long host_rip, u_long host_rsp, u_long ept_pml4,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
|
||||
{
|
||||
int error, codesel, datasel, tsssel;
|
||||
u_long cr0, cr4, efer;
|
||||
uint64_t eptp, pat, fsbase, idtrbase;
|
||||
uint32_t exc_bitmap;
|
||||
|
||||
codesel = vmm_get_host_codesel();
|
||||
datasel = vmm_get_host_datasel();
|
||||
tsssel = vmm_get_host_tsssel();
|
||||
|
||||
/*
|
||||
* Make sure we have a "current" VMCS to work with.
|
||||
*/
|
||||
VMPTRLD(vmcs);
|
||||
|
||||
/*
|
||||
* Load the VMX controls
|
||||
*/
|
||||
if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Guest state */
|
||||
|
||||
/* Initialize guest IA32_PAT MSR with the default value */
|
||||
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(1, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(2, PAT_UNCACHED) |
|
||||
PAT_VALUE(3, PAT_UNCACHEABLE) |
|
||||
PAT_VALUE(4, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(5, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(6, PAT_UNCACHED) |
|
||||
PAT_VALUE(7, PAT_UNCACHEABLE);
|
||||
if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Host state */
|
||||
|
||||
/* Initialize host IA32_PAT MSR */
|
||||
pat = vmm_get_host_pat();
|
||||
if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the IA32_EFER MSR */
|
||||
efer = vmm_get_host_efer();
|
||||
if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the control registers */
|
||||
|
||||
cr0 = vmm_get_host_cr0();
|
||||
if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = vmm_get_host_cr4() | CR4_VMXE;
|
||||
if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the segment selectors */
|
||||
if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Load the Base-Address for %fs and idtr.
|
||||
*
|
||||
* Note that we exclude %gs, tss and gdtr here because their base
|
||||
* address is pcpu specific.
|
||||
*/
|
||||
fsbase = vmm_get_host_fsbase();
|
||||
if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
|
||||
goto done;
|
||||
|
||||
idtrbase = vmm_get_host_idtrbase();
|
||||
if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
|
||||
goto done;
|
||||
|
||||
/* instruction pointer */
|
||||
if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
|
||||
goto done;
|
||||
|
||||
/* stack pointer */
|
||||
if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
|
||||
goto done;
|
||||
|
||||
/* eptp */
|
||||
eptp = EPTP(ept_pml4);
|
||||
if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
|
||||
goto done;
|
||||
|
||||
/* vpid */
|
||||
if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
|
||||
goto done;
|
||||
|
||||
/* msr bitmap */
|
||||
if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
|
||||
goto done;
|
||||
|
||||
/* exception bitmap */
|
||||
exc_bitmap = 1 << IDT_MC;
|
||||
if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
|
||||
goto done;
|
||||
|
||||
/* link pointer */
|
||||
if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
|
||||
goto done;
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmcs_read(uint32_t encoding)
|
||||
{
|
||||
int error;
|
||||
uint64_t val;
|
||||
|
||||
error = vmread(encoding, &val);
|
||||
if (error != 0)
|
||||
panic("vmcs_read(%u) error %d", encoding, error);
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
#ifdef DDB
|
||||
extern int vmxon_enabled[];
|
||||
|
||||
DB_SHOW_COMMAND(vmcs, db_show_vmcs)
|
||||
{
|
||||
uint64_t cur_vmcs, val;
|
||||
uint32_t exit;
|
||||
|
||||
if (!vmxon_enabled[curcpu]) {
|
||||
db_printf("VMX not enabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (have_addr) {
|
||||
db_printf("Only current VMCS supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
vmptrst(&cur_vmcs);
|
||||
if (cur_vmcs == VMCS_INITIAL) {
|
||||
db_printf("No current VM context\n");
|
||||
return;
|
||||
}
|
||||
db_printf("VMCS: %jx\n", cur_vmcs);
|
||||
db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
|
||||
db_printf("Activity: ");
|
||||
val = vmcs_read(VMCS_GUEST_ACTIVITY);
|
||||
switch (val) {
|
||||
case 0:
|
||||
db_printf("Active");
|
||||
break;
|
||||
case 1:
|
||||
db_printf("HLT");
|
||||
break;
|
||||
case 2:
|
||||
db_printf("Shutdown");
|
||||
break;
|
||||
case 3:
|
||||
db_printf("Wait for SIPI");
|
||||
break;
|
||||
default:
|
||||
db_printf("Unknown: %#lx", val);
|
||||
}
|
||||
db_printf("\n");
|
||||
exit = vmcs_read(VMCS_EXIT_REASON);
|
||||
if (exit & 0x80000000)
|
||||
db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
|
||||
else
|
||||
db_printf("Exit Reason: %u\n", exit & 0xffff);
|
||||
db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
|
||||
db_printf("Guest Linear Address: %#lx\n",
|
||||
vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
|
||||
switch (exit & 0x8000ffff) {
|
||||
case EXIT_REASON_EXCEPTION:
|
||||
case EXIT_REASON_EXT_INTR:
|
||||
val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
|
||||
db_printf("Interrupt Type: ");
|
||||
switch (val >> 8 & 0x7) {
|
||||
case 0:
|
||||
db_printf("external");
|
||||
break;
|
||||
case 2:
|
||||
db_printf("NMI");
|
||||
break;
|
||||
case 3:
|
||||
db_printf("HW exception");
|
||||
break;
|
||||
case 4:
|
||||
db_printf("SW exception");
|
||||
break;
|
||||
default:
|
||||
db_printf("?? %lu", val >> 8 & 0x7);
|
||||
break;
|
||||
}
|
||||
db_printf(" Vector: %lu", val & 0xff);
|
||||
if (val & 0x800)
|
||||
db_printf(" Error Code: %lx",
|
||||
vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
|
||||
db_printf("\n");
|
||||
break;
|
||||
case EXIT_REASON_EPT_FAULT:
|
||||
case EXIT_REASON_EPT_MISCONFIG:
|
||||
db_printf("Guest Physical Address: %#lx\n",
|
||||
vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
|
||||
break;
|
||||
}
|
||||
db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
|
||||
}
|
||||
#endif
|
338
sys/amd64/vmm/intel/vmcs.h
Normal file
338
sys/amd64/vmm/intel/vmcs.h
Normal file
@ -0,0 +1,338 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMCS_H_
|
||||
#define _VMCS_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct vmcs {
|
||||
uint32_t identifier;
|
||||
uint32_t abort_code;
|
||||
char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
|
||||
};
|
||||
CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
|
||||
|
||||
/* MSR save region is composed of an array of 'struct msr_entry' */
|
||||
struct msr_entry {
|
||||
uint32_t index;
|
||||
uint32_t reserved;
|
||||
uint64_t val;
|
||||
|
||||
};
|
||||
|
||||
int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
|
||||
int vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
|
||||
u_long ept_pml4,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap,
|
||||
uint16_t vpid);
|
||||
int vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval);
|
||||
int vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val);
|
||||
int vmcs_getdesc(struct vmcs *vmcs, int ident,
|
||||
struct seg_desc *desc);
|
||||
int vmcs_setdesc(struct vmcs *vmcs, int ident,
|
||||
struct seg_desc *desc);
|
||||
uint64_t vmcs_read(uint32_t encoding);
|
||||
|
||||
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
|
||||
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
|
||||
#define vmcs_instruction_error() vmcs_read(VMCS_INSTRUCTION_ERROR)
|
||||
#define vmcs_exit_reason() (vmcs_read(VMCS_EXIT_REASON) & 0xffff)
|
||||
#define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION)
|
||||
#define vmcs_guest_cr3() vmcs_read(VMCS_GUEST_CR3)
|
||||
#define vmcs_gpa() vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
|
||||
#define vmcs_gla() vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#define VMCS_INITIAL 0xffffffffffffffff
|
||||
|
||||
#define VMCS_IDENT(encoding) ((encoding) | 0x80000000)
|
||||
/*
|
||||
* VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
|
||||
*/
|
||||
#define VMCS_INVALID_ENCODING 0xffffffff
|
||||
|
||||
/* 16-bit control fields */
|
||||
#define VMCS_VPID 0x00000000
|
||||
|
||||
/* 16-bit guest-state fields */
|
||||
#define VMCS_GUEST_ES_SELECTOR 0x00000800
|
||||
#define VMCS_GUEST_CS_SELECTOR 0x00000802
|
||||
#define VMCS_GUEST_SS_SELECTOR 0x00000804
|
||||
#define VMCS_GUEST_DS_SELECTOR 0x00000806
|
||||
#define VMCS_GUEST_FS_SELECTOR 0x00000808
|
||||
#define VMCS_GUEST_GS_SELECTOR 0x0000080A
|
||||
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
|
||||
#define VMCS_GUEST_TR_SELECTOR 0x0000080E
|
||||
|
||||
/* 16-bit host-state fields */
|
||||
#define VMCS_HOST_ES_SELECTOR 0x00000C00
|
||||
#define VMCS_HOST_CS_SELECTOR 0x00000C02
|
||||
#define VMCS_HOST_SS_SELECTOR 0x00000C04
|
||||
#define VMCS_HOST_DS_SELECTOR 0x00000C06
|
||||
#define VMCS_HOST_FS_SELECTOR 0x00000C08
|
||||
#define VMCS_HOST_GS_SELECTOR 0x00000C0A
|
||||
#define VMCS_HOST_TR_SELECTOR 0x00000C0C
|
||||
|
||||
/* 64-bit control fields */
|
||||
#define VMCS_IO_BITMAP_A 0x00002000
|
||||
#define VMCS_IO_BITMAP_B 0x00002002
|
||||
#define VMCS_MSR_BITMAP 0x00002004
|
||||
#define VMCS_EXIT_MSR_STORE 0x00002006
|
||||
#define VMCS_EXIT_MSR_LOAD 0x00002008
|
||||
#define VMCS_ENTRY_MSR_LOAD 0x0000200A
|
||||
#define VMCS_EXECUTIVE_VMCS 0x0000200C
|
||||
#define VMCS_TSC_OFFSET 0x00002010
|
||||
#define VMCS_VIRTUAL_APIC 0x00002012
|
||||
#define VMCS_APIC_ACCESS 0x00002014
|
||||
#define VMCS_EPTP 0x0000201A
|
||||
|
||||
/* 64-bit read-only fields */
|
||||
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
|
||||
|
||||
/* 64-bit guest-state fields */
|
||||
#define VMCS_LINK_POINTER 0x00002800
|
||||
#define VMCS_GUEST_IA32_DEBUGCTL 0x00002802
|
||||
#define VMCS_GUEST_IA32_PAT 0x00002804
|
||||
#define VMCS_GUEST_IA32_EFER 0x00002806
|
||||
#define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
|
||||
#define VMCS_GUEST_PDPTE0 0x0000280A
|
||||
#define VMCS_GUEST_PDPTE1 0x0000280C
|
||||
#define VMCS_GUEST_PDPTE2 0x0000280E
|
||||
#define VMCS_GUEST_PDPTE3 0x00002810
|
||||
|
||||
/* 64-bit host-state fields */
|
||||
#define VMCS_HOST_IA32_PAT 0x00002C00
|
||||
#define VMCS_HOST_IA32_EFER 0x00002C02
|
||||
#define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04
|
||||
|
||||
/* 32-bit control fields */
|
||||
#define VMCS_PIN_BASED_CTLS 0x00004000
|
||||
#define VMCS_PRI_PROC_BASED_CTLS 0x00004002
|
||||
#define VMCS_EXCEPTION_BITMAP 0x00004004
|
||||
#define VMCS_PF_ERROR_MASK 0x00004006
|
||||
#define VMCS_PF_ERROR_MATCH 0x00004008
|
||||
#define VMCS_CR3_TARGET_COUNT 0x0000400A
|
||||
#define VMCS_EXIT_CTLS 0x0000400C
|
||||
#define VMCS_EXIT_MSR_STORE_COUNT 0x0000400E
|
||||
#define VMCS_EXIT_MSR_LOAD_COUNT 0x00004010
|
||||
#define VMCS_ENTRY_CTLS 0x00004012
|
||||
#define VMCS_ENTRY_MSR_LOAD_COUNT 0x00004014
|
||||
#define VMCS_ENTRY_INTR_INFO 0x00004016
|
||||
#define VMCS_ENTRY_EXCEPTION_ERROR 0x00004018
|
||||
#define VMCS_ENTRY_INST_LENGTH 0x0000401A
|
||||
#define VMCS_TPR_THRESHOLD 0x0000401C
|
||||
#define VMCS_SEC_PROC_BASED_CTLS 0x0000401E
|
||||
#define VMCS_PLE_GAP 0x00004020
|
||||
#define VMCS_PLE_WINDOW 0x00004022
|
||||
|
||||
/* 32-bit read-only data fields */
|
||||
#define VMCS_INSTRUCTION_ERROR 0x00004400
|
||||
#define VMCS_EXIT_REASON 0x00004402
|
||||
#define VMCS_EXIT_INTERRUPTION_INFO 0x00004404
|
||||
#define VMCS_EXIT_INTERRUPTION_ERROR 0x00004406
|
||||
#define VMCS_IDT_VECTORING_INFO 0x00004408
|
||||
#define VMCS_IDT_VECTORING_ERROR 0x0000440A
|
||||
#define VMCS_EXIT_INSTRUCTION_LENGTH 0x0000440C
|
||||
#define VMCS_EXIT_INSTRUCTION_INFO 0x0000440E
|
||||
|
||||
/* 32-bit guest-state fields */
|
||||
#define VMCS_GUEST_ES_LIMIT 0x00004800
|
||||
#define VMCS_GUEST_CS_LIMIT 0x00004802
|
||||
#define VMCS_GUEST_SS_LIMIT 0x00004804
|
||||
#define VMCS_GUEST_DS_LIMIT 0x00004806
|
||||
#define VMCS_GUEST_FS_LIMIT 0x00004808
|
||||
#define VMCS_GUEST_GS_LIMIT 0x0000480A
|
||||
#define VMCS_GUEST_LDTR_LIMIT 0x0000480C
|
||||
#define VMCS_GUEST_TR_LIMIT 0x0000480E
|
||||
#define VMCS_GUEST_GDTR_LIMIT 0x00004810
|
||||
#define VMCS_GUEST_IDTR_LIMIT 0x00004812
|
||||
#define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814
|
||||
#define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816
|
||||
#define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818
|
||||
#define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481A
|
||||
#define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481C
|
||||
#define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481E
|
||||
#define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820
|
||||
#define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822
|
||||
#define VMCS_GUEST_INTERRUPTIBILITY 0x00004824
|
||||
#define VMCS_GUEST_ACTIVITY 0x00004826
|
||||
#define VMCS_GUEST_SMBASE 0x00004828
|
||||
#define VMCS_GUEST_IA32_SYSENTER_CS 0x0000482A
|
||||
#define VMCS_PREEMPTION_TIMER_VALUE 0x0000482E
|
||||
|
||||
/* 32-bit host state fields */
|
||||
#define VMCS_HOST_IA32_SYSENTER_CS 0x00004C00
|
||||
|
||||
/* Natural Width control fields */
|
||||
#define VMCS_CR0_MASK 0x00006000
|
||||
#define VMCS_CR4_MASK 0x00006002
|
||||
#define VMCS_CR0_SHADOW 0x00006004
|
||||
#define VMCS_CR4_SHADOW 0x00006006
|
||||
#define VMCS_CR3_TARGET0 0x00006008
|
||||
#define VMCS_CR3_TARGET1 0x0000600A
|
||||
#define VMCS_CR3_TARGET2 0x0000600C
|
||||
#define VMCS_CR3_TARGET3 0x0000600E
|
||||
|
||||
/* Natural Width read-only fields */
|
||||
#define VMCS_EXIT_QUALIFICATION 0x00006400
|
||||
#define VMCS_IO_RCX 0x00006402
|
||||
#define VMCS_IO_RSI 0x00006404
|
||||
#define VMCS_IO_RDI 0x00006406
|
||||
#define VMCS_IO_RIP 0x00006408
|
||||
#define VMCS_GUEST_LINEAR_ADDRESS 0x0000640A
|
||||
|
||||
/* Natural Width guest-state fields */
|
||||
#define VMCS_GUEST_CR0 0x00006800
|
||||
#define VMCS_GUEST_CR3 0x00006802
|
||||
#define VMCS_GUEST_CR4 0x00006804
|
||||
#define VMCS_GUEST_ES_BASE 0x00006806
|
||||
#define VMCS_GUEST_CS_BASE 0x00006808
|
||||
#define VMCS_GUEST_SS_BASE 0x0000680A
|
||||
#define VMCS_GUEST_DS_BASE 0x0000680C
|
||||
#define VMCS_GUEST_FS_BASE 0x0000680E
|
||||
#define VMCS_GUEST_GS_BASE 0x00006810
|
||||
#define VMCS_GUEST_LDTR_BASE 0x00006812
|
||||
#define VMCS_GUEST_TR_BASE 0x00006814
|
||||
#define VMCS_GUEST_GDTR_BASE 0x00006816
|
||||
#define VMCS_GUEST_IDTR_BASE 0x00006818
|
||||
#define VMCS_GUEST_DR7 0x0000681A
|
||||
#define VMCS_GUEST_RSP 0x0000681C
|
||||
#define VMCS_GUEST_RIP 0x0000681E
|
||||
#define VMCS_GUEST_RFLAGS 0x00006820
|
||||
#define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
|
||||
#define VMCS_GUEST_IA32_SYSENTER_ESP 0x00006824
|
||||
#define VMCS_GUEST_IA32_SYSENTER_EIP 0x00006826
|
||||
|
||||
/* Natural Width host-state fields */
|
||||
#define VMCS_HOST_CR0 0x00006C00
|
||||
#define VMCS_HOST_CR3 0x00006C02
|
||||
#define VMCS_HOST_CR4 0x00006C04
|
||||
#define VMCS_HOST_FS_BASE 0x00006C06
|
||||
#define VMCS_HOST_GS_BASE 0x00006C08
|
||||
#define VMCS_HOST_TR_BASE 0x00006C0A
|
||||
#define VMCS_HOST_GDTR_BASE 0x00006C0C
|
||||
#define VMCS_HOST_IDTR_BASE 0x00006C0E
|
||||
#define VMCS_HOST_IA32_SYSENTER_ESP 0x00006C10
|
||||
#define VMCS_HOST_IA32_SYSENTER_EIP 0x00006C12
|
||||
#define VMCS_HOST_RSP 0x00006C14
|
||||
#define VMCS_HOST_RIP 0x00006c16
|
||||
|
||||
/*
|
||||
* VM instruction error numbers
|
||||
*/
|
||||
#define VMRESUME_WITH_NON_LAUNCHED_VMCS 5
|
||||
|
||||
/*
|
||||
* VMCS exit reasons
|
||||
*/
|
||||
#define EXIT_REASON_EXCEPTION 0
|
||||
#define EXIT_REASON_EXT_INTR 1
|
||||
#define EXIT_REASON_TRIPLE_FAULT 2
|
||||
#define EXIT_REASON_INIT 3
|
||||
#define EXIT_REASON_SIPI 4
|
||||
#define EXIT_REASON_IO_SMI 5
|
||||
#define EXIT_REASON_SMI 6
|
||||
#define EXIT_REASON_INTR_WINDOW 7
|
||||
#define EXIT_REASON_NMI_WINDOW 8
|
||||
#define EXIT_REASON_TASK_SWITCH 9
|
||||
#define EXIT_REASON_CPUID 10
|
||||
#define EXIT_REASON_GETSEC 11
|
||||
#define EXIT_REASON_HLT 12
|
||||
#define EXIT_REASON_INVD 13
|
||||
#define EXIT_REASON_INVLPG 14
|
||||
#define EXIT_REASON_RDPMC 15
|
||||
#define EXIT_REASON_RDTSC 16
|
||||
#define EXIT_REASON_RSM 17
|
||||
#define EXIT_REASON_VMCALL 18
|
||||
#define EXIT_REASON_VMCLEAR 19
|
||||
#define EXIT_REASON_VMLAUNCH 20
|
||||
#define EXIT_REASON_VMPTRLD 21
|
||||
#define EXIT_REASON_VMPTRST 22
|
||||
#define EXIT_REASON_VMREAD 23
|
||||
#define EXIT_REASON_VMRESUME 24
|
||||
#define EXIT_REASON_VMWRITE 25
|
||||
#define EXIT_REASON_VMXOFF 26
|
||||
#define EXIT_REASON_VMXON 27
|
||||
#define EXIT_REASON_CR_ACCESS 28
|
||||
#define EXIT_REASON_DR_ACCESS 29
|
||||
#define EXIT_REASON_INOUT 30
|
||||
#define EXIT_REASON_RDMSR 31
|
||||
#define EXIT_REASON_WRMSR 32
|
||||
#define EXIT_REASON_INVAL_VMCS 33
|
||||
#define EXIT_REASON_INVAL_MSR 34
|
||||
#define EXIT_REASON_MWAIT 36
|
||||
#define EXIT_REASON_MTF 37
|
||||
#define EXIT_REASON_MONITOR 39
|
||||
#define EXIT_REASON_PAUSE 40
|
||||
#define EXIT_REASON_MCE 41
|
||||
#define EXIT_REASON_TPR 43
|
||||
#define EXIT_REASON_APIC 44
|
||||
#define EXIT_REASON_GDTR_IDTR 46
|
||||
#define EXIT_REASON_LDTR_TR 47
|
||||
#define EXIT_REASON_EPT_FAULT 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_INVEPT 50
|
||||
#define EXIT_REASON_RDTSCP 51
|
||||
#define EXIT_REASON_VMX_PREEMPT 52
|
||||
#define EXIT_REASON_INVVPID 53
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
#define EXIT_REASON_XSETBV 55
|
||||
|
||||
/*
|
||||
* VMCS interrupt information fields
|
||||
*/
|
||||
#define VMCS_INTERRUPTION_INFO_VALID (1U << 31)
|
||||
#define VMCS_INTERRUPTION_INFO_HW_INTR (0 << 8)
|
||||
#define VMCS_INTERRUPTION_INFO_NMI (2 << 8)
|
||||
|
||||
/*
|
||||
* VMCS Guest interruptibility field
|
||||
*/
|
||||
#define VMCS_INTERRUPTIBILITY_STI_BLOCKING (1 << 0)
|
||||
#define VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING (1 << 1)
|
||||
#define VMCS_INTERRUPTIBILITY_SMI_BLOCKING (1 << 2)
|
||||
#define VMCS_INTERRUPTIBILITY_NMI_BLOCKING (1 << 3)
|
||||
|
||||
/*
|
||||
* Exit qualification for EXIT_REASON_INVAL_VMCS
|
||||
*/
|
||||
#define EXIT_QUAL_NMI_WHILE_STI_BLOCKING 3
|
||||
|
||||
/*
|
||||
* Exit qualification for EPT violation
|
||||
*/
|
||||
#define EPT_VIOLATION_DATA_READ (1UL << 0)
|
||||
#define EPT_VIOLATION_DATA_WRITE (1UL << 1)
|
||||
#define EPT_VIOLATION_INST_FETCH (1UL << 2)
|
||||
#define EPT_VIOLATION_GLA_VALID (1UL << 7)
|
||||
#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
|
||||
|
||||
#endif
|
1845
sys/amd64/vmm/intel/vmx.c
Normal file
1845
sys/amd64/vmm/intel/vmx.c
Normal file
File diff suppressed because it is too large
Load Diff
120
sys/amd64/vmm/intel/vmx.h
Normal file
120
sys/amd64/vmm/intel/vmx.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_H_
|
||||
#define _VMX_H_
|
||||
|
||||
#include "vmcs.h"
|
||||
|
||||
#define GUEST_MSR_MAX_ENTRIES 64 /* arbitrary */
|
||||
|
||||
struct vmxctx {
|
||||
register_t tmpstk[32]; /* vmx_return() stack */
|
||||
register_t tmpstktop;
|
||||
|
||||
register_t guest_rdi; /* Guest state */
|
||||
register_t guest_rsi;
|
||||
register_t guest_rdx;
|
||||
register_t guest_rcx;
|
||||
register_t guest_r8;
|
||||
register_t guest_r9;
|
||||
register_t guest_rax;
|
||||
register_t guest_rbx;
|
||||
register_t guest_rbp;
|
||||
register_t guest_r10;
|
||||
register_t guest_r11;
|
||||
register_t guest_r12;
|
||||
register_t guest_r13;
|
||||
register_t guest_r14;
|
||||
register_t guest_r15;
|
||||
register_t guest_cr2;
|
||||
|
||||
register_t host_r15; /* Host state */
|
||||
register_t host_r14;
|
||||
register_t host_r13;
|
||||
register_t host_r12;
|
||||
register_t host_rbp;
|
||||
register_t host_rsp;
|
||||
register_t host_rbx;
|
||||
register_t host_rip;
|
||||
/*
|
||||
* XXX todo debug registers and fpu state
|
||||
*/
|
||||
|
||||
int launched; /* vmcs launch state */
|
||||
int launch_error;
|
||||
};
|
||||
|
||||
struct vmxcap {
|
||||
int set;
|
||||
uint32_t proc_ctls;
|
||||
};
|
||||
|
||||
struct vmxstate {
|
||||
int lastcpu; /* host cpu that this 'vcpu' last ran on */
|
||||
uint16_t vpid;
|
||||
};
|
||||
|
||||
/* virtual machine softc */
|
||||
struct vmx {
|
||||
pml4_entry_t pml4ept[NPML4EPG];
|
||||
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
|
||||
char msr_bitmap[PAGE_SIZE];
|
||||
struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
|
||||
struct vmxctx ctx[VM_MAXCPU];
|
||||
struct vmxcap cap[VM_MAXCPU];
|
||||
struct vmxstate state[VM_MAXCPU];
|
||||
struct vm *vm;
|
||||
};
|
||||
CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
|
||||
|
||||
#define VMX_RETURN_DIRECT 0
|
||||
#define VMX_RETURN_LONGJMP 1
|
||||
#define VMX_RETURN_VMRESUME 2
|
||||
#define VMX_RETURN_VMLAUNCH 3
|
||||
#define VMX_RETURN_AST 4
|
||||
/*
|
||||
* vmx_setjmp() returns:
|
||||
* - 0 when it returns directly
|
||||
* - 1 when it returns from vmx_longjmp
|
||||
* - 2 when it returns from vmx_resume (which would only be in the error case)
|
||||
* - 3 when it returns from vmx_launch (which would only be in the error case)
|
||||
* - 4 when it returns from vmx_resume or vmx_launch because of AST pending
|
||||
*/
|
||||
int vmx_setjmp(struct vmxctx *ctx);
|
||||
void vmx_longjmp(void); /* returns via vmx_setjmp */
|
||||
void vmx_launch(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
|
||||
void vmx_resume(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
|
||||
|
||||
u_long vmx_fix_cr0(u_long cr0);
|
||||
u_long vmx_fix_cr4(u_long cr4);
|
||||
|
||||
#endif
|
92
sys/amd64/vmm/intel/vmx_controls.h
Normal file
92
sys/amd64/vmm/intel/vmx_controls.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_CONTROLS_H_
|
||||
#define _VMX_CONTROLS_H_
|
||||
|
||||
/* Pin-Based VM-Execution Controls */
|
||||
#define PINBASED_EXTINT_EXITING (1 << 0)
|
||||
#define PINBASED_NMI_EXITING (1 << 3)
|
||||
#define PINBASED_VIRTUAL_NMI (1 << 5)
|
||||
#define PINBASED_PREMPTION_TIMER (1 << 6)
|
||||
|
||||
/* Primary Processor-Based VM-Execution Controls */
|
||||
#define PROCBASED_INT_WINDOW_EXITING (1 << 2)
|
||||
#define PROCBASED_TSC_OFFSET (1 << 3)
|
||||
#define PROCBASED_HLT_EXITING (1 << 7)
|
||||
#define PROCBASED_INVLPG_EXITING (1 << 9)
|
||||
#define PROCBASED_MWAIT_EXITING (1 << 10)
|
||||
#define PROCBASED_RDPMC_EXITING (1 << 11)
|
||||
#define PROCBASED_RDTSC_EXITING (1 << 12)
|
||||
#define PROCBASED_CR3_LOAD_EXITING (1 << 15)
|
||||
#define PROCBASED_CR3_STORE_EXITING (1 << 16)
|
||||
#define PROCBASED_CR8_LOAD_EXITING (1 << 19)
|
||||
#define PROCBASED_CR8_STORE_EXITING (1 << 20)
|
||||
#define PROCBASED_USE_TPR_SHADOW (1 << 21)
|
||||
#define PROCBASED_NMI_WINDOW_EXITING (1 << 22)
|
||||
#define PROCBASED_MOV_DR_EXITING (1 << 23)
|
||||
#define PROCBASED_IO_EXITING (1 << 24)
|
||||
#define PROCBASED_IO_BITMAPS (1 << 25)
|
||||
#define PROCBASED_MTF (1 << 27)
|
||||
#define PROCBASED_MSR_BITMAPS (1 << 28)
|
||||
#define PROCBASED_MONITOR_EXITING (1 << 29)
|
||||
#define PROCBASED_PAUSE_EXITING (1 << 30)
|
||||
#define PROCBASED_SECONDARY_CONTROLS (1 << 31)
|
||||
|
||||
/* Secondary Processor-Based VM-Execution Controls */
|
||||
#define PROCBASED2_VIRTUALIZE_APIC (1 << 0)
|
||||
#define PROCBASED2_ENABLE_EPT (1 << 1)
|
||||
#define PROCBASED2_DESC_TABLE_EXITING (1 << 2)
|
||||
#define PROCBASED2_ENABLE_RDTSCP (1 << 3)
|
||||
#define PROCBASED2_VIRTUALIZE_X2APIC (1 << 4)
|
||||
#define PROCBASED2_ENABLE_VPID (1 << 5)
|
||||
#define PROCBASED2_WBINVD_EXITING (1 << 6)
|
||||
#define PROCBASED2_UNRESTRICTED_GUEST (1 << 7)
|
||||
#define PROCBASED2_PAUSE_LOOP_EXITING (1 << 10)
|
||||
|
||||
/* VM Exit Controls */
|
||||
#define VM_EXIT_SAVE_DEBUG_CONTROLS (1 << 2)
|
||||
#define VM_EXIT_HOST_LMA (1 << 9)
|
||||
#define VM_EXIT_LOAD_PERF_GLOBAL_CTRL (1 << 12)
|
||||
#define VM_EXIT_ACKNOWLEDGE_INTERRUPT (1 << 15)
|
||||
#define VM_EXIT_SAVE_PAT (1 << 18)
|
||||
#define VM_EXIT_LOAD_PAT (1 << 19)
|
||||
#define VM_EXIT_SAVE_EFER (1 << 20)
|
||||
#define VM_EXIT_LOAD_EFER (1 << 21)
|
||||
#define VM_EXIT_SAVE_PREEMPTION_TIMER (1 << 22)
|
||||
|
||||
/* VM Entry Controls */
|
||||
#define VM_ENTRY_LOAD_DEBUG_CONTROLS (1 << 2)
|
||||
#define VM_ENTRY_GUEST_LMA (1 << 9)
|
||||
#define VM_ENTRY_INTO_SMM (1 << 10)
|
||||
#define VM_ENTRY_DEACTIVATE_DUAL_MONITOR (1 << 11)
|
||||
#define VM_ENTRY_LOAD_PERF_GLOBAL_CTRL (1 << 13)
|
||||
#define VM_ENTRY_LOAD_PAT (1 << 14)
|
||||
#define VM_ENTRY_LOAD_EFER (1 << 15)
|
||||
|
||||
#endif
|
218
sys/amd64/vmm/intel/vmx_cpufunc.h
Normal file
218
sys/amd64/vmm/intel/vmx_cpufunc.h
Normal file
@ -0,0 +1,218 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_CPUFUNC_H_
|
||||
#define _VMX_CPUFUNC_H_
|
||||
|
||||
struct vmcs;
|
||||
|
||||
/*
|
||||
* Section 5.2 "Conventions" from Intel Architecture Manual 2B.
|
||||
*
|
||||
* error
|
||||
* VMsucceed 0
|
||||
* VMFailInvalid 1
|
||||
* VMFailValid 2 see also VMCS VM-Instruction Error Field
|
||||
*/
|
||||
#define VM_SUCCESS 0
|
||||
#define VM_FAIL_INVALID 1
|
||||
#define VM_FAIL_VALID 2
|
||||
#define VMX_SET_ERROR_CODE \
|
||||
" jnc 1f;" \
|
||||
" mov $1, %[error];" /* CF: error = 1 */ \
|
||||
" jmp 3f;" \
|
||||
"1: jnz 2f;" \
|
||||
" mov $2, %[error];" /* ZF: error = 2 */ \
|
||||
" jmp 3f;" \
|
||||
"2: mov $0, %[error];" \
|
||||
"3:"
|
||||
|
||||
/* returns 0 on success and non-zero on failure */
|
||||
static __inline int
|
||||
vmxon(char *region)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(region);
|
||||
__asm __volatile("vmxon %[addr];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [addr] "m" (*(uint64_t *)&addr)
|
||||
: "memory");
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* returns 0 on success and non-zero on failure */
|
||||
static __inline int
|
||||
vmclear(struct vmcs *vmcs)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(vmcs);
|
||||
__asm __volatile("vmclear %[addr];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [addr] "m" (*(uint64_t *)&addr)
|
||||
: "memory");
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
vmxoff(void)
|
||||
{
|
||||
|
||||
__asm __volatile("vmxoff");
|
||||
}
|
||||
|
||||
static __inline void
|
||||
vmptrst(uint64_t *addr)
|
||||
{
|
||||
|
||||
__asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory");
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmptrld(struct vmcs *vmcs)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(vmcs);
|
||||
__asm __volatile("vmptrld %[addr];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [addr] "m" (*(uint64_t *)&addr)
|
||||
: "memory");
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmwrite(uint64_t reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("vmwrite %[val], %[reg];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [val] "r" (val), [reg] "r" (reg)
|
||||
: "memory");
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmread(uint64_t r, uint64_t *addr)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("vmread %[r], %[addr];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [r] "r" (r), [addr] "m" (*addr)
|
||||
: "memory");
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void __inline
|
||||
VMCLEAR(struct vmcs *vmcs)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = vmclear(vmcs);
|
||||
if (err != 0)
|
||||
panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
|
||||
|
||||
critical_exit();
|
||||
}
|
||||
|
||||
static void __inline
|
||||
VMPTRLD(struct vmcs *vmcs)
|
||||
{
|
||||
int err;
|
||||
|
||||
critical_enter();
|
||||
|
||||
err = vmptrld(vmcs);
|
||||
if (err != 0)
|
||||
panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
|
||||
}
|
||||
|
||||
#define INVVPID_TYPE_ADDRESS 0UL
|
||||
#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
|
||||
#define INVVPID_TYPE_ALL_CONTEXTS 2UL
|
||||
|
||||
struct invvpid_desc {
|
||||
uint16_t vpid;
|
||||
uint16_t _res1;
|
||||
uint32_t _res2;
|
||||
uint64_t linear_addr;
|
||||
};
|
||||
CTASSERT(sizeof(struct invvpid_desc) == 16);
|
||||
|
||||
static void __inline
|
||||
invvpid(uint64_t type, struct invvpid_desc desc)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("invvpid %[desc], %[type];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [desc] "m" (desc), [type] "r" (type)
|
||||
: "memory");
|
||||
|
||||
if (error)
|
||||
panic("invvpid error %d", error);
|
||||
}
|
||||
|
||||
#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
|
||||
#define INVEPT_TYPE_ALL_CONTEXTS 2UL
|
||||
struct invept_desc {
|
||||
uint64_t eptp;
|
||||
uint64_t _res;
|
||||
};
|
||||
CTASSERT(sizeof(struct invept_desc) == 16);
|
||||
|
||||
static void __inline
|
||||
invept(uint64_t type, struct invept_desc desc)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("invept %[desc], %[type];"
|
||||
VMX_SET_ERROR_CODE
|
||||
: [error] "=r" (error)
|
||||
: [desc] "m" (desc), [type] "r" (type)
|
||||
: "memory");
|
||||
|
||||
if (error)
|
||||
panic("invept error %d", error);
|
||||
}
|
||||
#endif
|
89
sys/amd64/vmm/intel/vmx_genassym.c
Normal file
89
sys/amd64/vmm/intel/vmx_genassym.c
Normal file
@ -0,0 +1,89 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/assym.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmx.h"
|
||||
#include "vmx_cpufunc.h"
|
||||
|
||||
ASSYM(VMXCTX_TMPSTKTOP, offsetof(struct vmxctx, tmpstktop));
|
||||
ASSYM(VMXCTX_GUEST_RDI, offsetof(struct vmxctx, guest_rdi));
|
||||
ASSYM(VMXCTX_GUEST_RSI, offsetof(struct vmxctx, guest_rsi));
|
||||
ASSYM(VMXCTX_GUEST_RDX, offsetof(struct vmxctx, guest_rdx));
|
||||
ASSYM(VMXCTX_GUEST_RCX, offsetof(struct vmxctx, guest_rcx));
|
||||
ASSYM(VMXCTX_GUEST_R8, offsetof(struct vmxctx, guest_r8));
|
||||
ASSYM(VMXCTX_GUEST_R9, offsetof(struct vmxctx, guest_r9));
|
||||
ASSYM(VMXCTX_GUEST_RAX, offsetof(struct vmxctx, guest_rax));
|
||||
ASSYM(VMXCTX_GUEST_RBX, offsetof(struct vmxctx, guest_rbx));
|
||||
ASSYM(VMXCTX_GUEST_RBP, offsetof(struct vmxctx, guest_rbp));
|
||||
ASSYM(VMXCTX_GUEST_R10, offsetof(struct vmxctx, guest_r10));
|
||||
ASSYM(VMXCTX_GUEST_R11, offsetof(struct vmxctx, guest_r11));
|
||||
ASSYM(VMXCTX_GUEST_R12, offsetof(struct vmxctx, guest_r12));
|
||||
ASSYM(VMXCTX_GUEST_R13, offsetof(struct vmxctx, guest_r13));
|
||||
ASSYM(VMXCTX_GUEST_R14, offsetof(struct vmxctx, guest_r14));
|
||||
ASSYM(VMXCTX_GUEST_R15, offsetof(struct vmxctx, guest_r15));
|
||||
ASSYM(VMXCTX_GUEST_CR2, offsetof(struct vmxctx, guest_cr2));
|
||||
|
||||
ASSYM(VMXCTX_HOST_R15, offsetof(struct vmxctx, host_r15));
|
||||
ASSYM(VMXCTX_HOST_R14, offsetof(struct vmxctx, host_r14));
|
||||
ASSYM(VMXCTX_HOST_R13, offsetof(struct vmxctx, host_r13));
|
||||
ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12));
|
||||
ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp));
|
||||
ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp));
|
||||
ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
|
||||
ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
|
||||
|
||||
ASSYM(VMXCTX_LAUNCH_ERROR, offsetof(struct vmxctx, launch_error));
|
||||
|
||||
ASSYM(VM_SUCCESS, VM_SUCCESS);
|
||||
ASSYM(VM_FAIL_INVALID, VM_FAIL_INVALID);
|
||||
ASSYM(VM_FAIL_VALID, VM_FAIL_VALID);
|
||||
|
||||
ASSYM(VMX_RETURN_DIRECT, VMX_RETURN_DIRECT);
|
||||
ASSYM(VMX_RETURN_LONGJMP, VMX_RETURN_LONGJMP);
|
||||
ASSYM(VMX_RETURN_VMRESUME, VMX_RETURN_VMRESUME);
|
||||
ASSYM(VMX_RETURN_VMLAUNCH, VMX_RETURN_VMLAUNCH);
|
||||
ASSYM(VMX_RETURN_AST, VMX_RETURN_AST);
|
||||
|
||||
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
|
||||
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
|
||||
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
|
||||
ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
|
172
sys/amd64/vmm/intel/vmx_msr.c
Normal file
172
sys/amd64/vmm/intel/vmx_msr.c
Normal file
@ -0,0 +1,172 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
#include "vmx_msr.h"
|
||||
|
||||
static boolean_t
|
||||
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
|
||||
{
|
||||
|
||||
if (msr_val & (1UL << (bitpos + 32)))
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
|
||||
{
|
||||
|
||||
if ((msr_val & (1UL << bitpos)) == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
vmx_revision(void)
|
||||
{
|
||||
|
||||
return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a bitmask to be used for the VMCS execution control fields.
|
||||
*
|
||||
* The caller specifies what bits should be set to one in 'ones_mask'
|
||||
* and what bits should be set to zero in 'zeros_mask'. The don't-care
|
||||
* bits are set to the default value. The default values are obtained
|
||||
* based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
|
||||
* VMX Capabilities".
|
||||
*
|
||||
* Returns zero on success and non-zero on error.
|
||||
*/
|
||||
int
|
||||
vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
|
||||
uint32_t zeros_mask, uint32_t *retval)
|
||||
{
|
||||
int i;
|
||||
uint64_t val, trueval;
|
||||
boolean_t true_ctls_avail, one_allowed, zero_allowed;
|
||||
|
||||
/* We cannot ask the same bit to be set to both '1' and '0' */
|
||||
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
|
||||
return (EINVAL);
|
||||
|
||||
if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
|
||||
true_ctls_avail = TRUE;
|
||||
else
|
||||
true_ctls_avail = FALSE;
|
||||
|
||||
val = rdmsr(ctl_reg);
|
||||
if (true_ctls_avail)
|
||||
trueval = rdmsr(true_ctl_reg); /* step c */
|
||||
else
|
||||
trueval = val; /* step a */
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
one_allowed = vmx_ctl_allows_one_setting(trueval, i);
|
||||
zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
|
||||
|
||||
KASSERT(one_allowed || zero_allowed,
|
||||
("invalid zero/one setting for bit %d of ctl 0x%0x, "
|
||||
"truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
|
||||
|
||||
if (zero_allowed && !one_allowed) { /* b(i),c(i) */
|
||||
if (ones_mask & (1 << i))
|
||||
return (EINVAL);
|
||||
*retval &= ~(1 << i);
|
||||
} else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
|
||||
if (zeros_mask & (1 << i))
|
||||
return (EINVAL);
|
||||
*retval |= 1 << i;
|
||||
} else {
|
||||
if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
|
||||
*retval &= ~(1 << i);
|
||||
else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
|
||||
*retval |= 1 << i;
|
||||
else if (!true_ctls_avail)
|
||||
*retval &= ~(1 << i); /* b(iii) */
|
||||
else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
|
||||
*retval &= ~(1 << i);
|
||||
else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
|
||||
*retval |= 1 << i;
|
||||
else {
|
||||
panic("vmx_set_ctlreg: unable to determine "
|
||||
"correct value of ctl bit %d for msr "
|
||||
"0x%0x and true msr 0x%0x", i, ctl_reg,
|
||||
true_ctl_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
msr_bitmap_initialize(char *bitmap)
|
||||
{
|
||||
|
||||
memset(bitmap, 0xff, PAGE_SIZE);
|
||||
}
|
||||
|
||||
int
|
||||
msr_bitmap_change_access(char *bitmap, u_int msr, int access)
|
||||
{
|
||||
int byte, bit;
|
||||
|
||||
if (msr <= 0x00001FFF)
|
||||
byte = msr / 8;
|
||||
else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
|
||||
byte = 1024 + (msr - 0xC0000000) / 8;
|
||||
else
|
||||
return (EINVAL);
|
||||
|
||||
bit = msr & 0x7;
|
||||
|
||||
if (access & MSR_BITMAP_ACCESS_READ)
|
||||
bitmap[byte] &= ~(1 << bit);
|
||||
else
|
||||
bitmap[byte] |= 1 << bit;
|
||||
|
||||
byte += 2048;
|
||||
if (access & MSR_BITMAP_ACCESS_WRITE)
|
||||
bitmap[byte] &= ~(1 << bit);
|
||||
else
|
||||
bitmap[byte] |= 1 << bit;
|
||||
|
||||
return (0);
|
||||
}
|
78
sys/amd64/vmm/intel/vmx_msr.h
Normal file
78
sys/amd64/vmm/intel/vmx_msr.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_MSR_H_
|
||||
#define _VMX_MSR_H_
|
||||
|
||||
#define MSR_VMX_BASIC 0x480
|
||||
#define MSR_VMX_EPT_VPID_CAP 0x48C
|
||||
|
||||
#define MSR_VMX_PROCBASED_CTLS 0x482
|
||||
#define MSR_VMX_TRUE_PROCBASED_CTLS 0x48E
|
||||
|
||||
#define MSR_VMX_PINBASED_CTLS 0x481
|
||||
#define MSR_VMX_TRUE_PINBASED_CTLS 0x48D
|
||||
|
||||
#define MSR_VMX_PROCBASED_CTLS2 0x48B
|
||||
|
||||
#define MSR_VMX_EXIT_CTLS 0x483
|
||||
#define MSR_VMX_TRUE_EXIT_CTLS 0x48f
|
||||
|
||||
#define MSR_VMX_ENTRY_CTLS 0x484
|
||||
#define MSR_VMX_TRUE_ENTRY_CTLS 0x490
|
||||
|
||||
#define MSR_VMX_CR0_FIXED0 0x486
|
||||
#define MSR_VMX_CR0_FIXED1 0x487
|
||||
|
||||
#define MSR_VMX_CR4_FIXED0 0x488
|
||||
#define MSR_VMX_CR4_FIXED1 0x489
|
||||
|
||||
uint32_t vmx_revision(void);
|
||||
|
||||
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
|
||||
uint32_t zeros_mask, uint32_t *retval);
|
||||
|
||||
/*
|
||||
* According to Section 21.10.4 "Software Access to Related Structures",
|
||||
* changes to data structures pointed to by the VMCS must be made only when
|
||||
* there is no logical processor with a current VMCS that points to the
|
||||
* data structure.
|
||||
*
|
||||
* This pretty much limits us to configuring the MSR bitmap before VMCS
|
||||
* initialization for SMP VMs. Unless of course we do it the hard way - which
|
||||
* would involve some form of synchronization between the vcpus to vmclear
|
||||
* all VMCSs' that point to the bitmap.
|
||||
*/
|
||||
#define MSR_BITMAP_ACCESS_NONE 0x0
|
||||
#define MSR_BITMAP_ACCESS_READ 0x1
|
||||
#define MSR_BITMAP_ACCESS_WRITE 0x2
|
||||
#define MSR_BITMAP_ACCESS_RW (MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
|
||||
void msr_bitmap_initialize(char *bitmap);
|
||||
int msr_bitmap_change_access(char *bitmap, u_int msr, int access);
|
||||
|
||||
#endif
|
246
sys/amd64/vmm/intel/vmx_support.S
Normal file
246
sys/amd64/vmm/intel/vmx_support.S
Normal file
@ -0,0 +1,246 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
#include "vmx_assym.s"
|
||||
|
||||
/*
|
||||
* Disable interrupts before updating %rsp in VMX_CHECK_AST or
|
||||
* VMX_GUEST_RESTORE.
|
||||
*
|
||||
* The location that %rsp points to is a 'vmxctx' and not a
|
||||
* real stack so we don't want an interrupt handler to trash it
|
||||
*/
|
||||
#define VMX_DISABLE_INTERRUPTS cli
|
||||
|
||||
/*
|
||||
* If the thread hosting the vcpu has an ast pending then take care of it
|
||||
* by returning from vmx_setjmp() with a return value of VMX_RETURN_AST.
|
||||
*
|
||||
* Assumes that %rdi holds a pointer to the 'vmxctx' and that interrupts
|
||||
* are disabled.
|
||||
*/
|
||||
#define VMX_CHECK_AST \
|
||||
movq PCPU(CURTHREAD),%rax; \
|
||||
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax); \
|
||||
je 9f; \
|
||||
movq $VMX_RETURN_AST,%rsi; \
|
||||
movq %rdi,%rsp; \
|
||||
addq $VMXCTX_TMPSTKTOP,%rsp; \
|
||||
callq vmx_return; \
|
||||
9:
|
||||
|
||||
/*
|
||||
* Assumes that %rdi holds a pointer to the 'vmxctx'.
|
||||
*
|
||||
* On "return" all registers are updated to reflect guest state. The two
|
||||
* exceptions are %rip and %rsp. These registers are atomically switched
|
||||
* by hardware from the guest area of the vmcs.
|
||||
*
|
||||
* We modify %rsp to point to the 'vmxctx' so we can use it to restore
|
||||
* host context in case of an error with 'vmlaunch' or 'vmresume'.
|
||||
*/
|
||||
#define VMX_GUEST_RESTORE \
|
||||
movq %rdi,%rsp; \
|
||||
movq VMXCTX_GUEST_CR2(%rdi),%rsi; \
|
||||
movq %rsi,%cr2; \
|
||||
movq VMXCTX_GUEST_RSI(%rdi),%rsi; \
|
||||
movq VMXCTX_GUEST_RDX(%rdi),%rdx; \
|
||||
movq VMXCTX_GUEST_RCX(%rdi),%rcx; \
|
||||
movq VMXCTX_GUEST_R8(%rdi),%r8; \
|
||||
movq VMXCTX_GUEST_R9(%rdi),%r9; \
|
||||
movq VMXCTX_GUEST_RAX(%rdi),%rax; \
|
||||
movq VMXCTX_GUEST_RBX(%rdi),%rbx; \
|
||||
movq VMXCTX_GUEST_RBP(%rdi),%rbp; \
|
||||
movq VMXCTX_GUEST_R10(%rdi),%r10; \
|
||||
movq VMXCTX_GUEST_R11(%rdi),%r11; \
|
||||
movq VMXCTX_GUEST_R12(%rdi),%r12; \
|
||||
movq VMXCTX_GUEST_R13(%rdi),%r13; \
|
||||
movq VMXCTX_GUEST_R14(%rdi),%r14; \
|
||||
movq VMXCTX_GUEST_R15(%rdi),%r15; \
|
||||
movq VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
|
||||
|
||||
#define VM_INSTRUCTION_ERROR(reg) \
|
||||
jnc 1f; \
|
||||
movl $VM_FAIL_INVALID,reg; /* CF is set */ \
|
||||
jmp 3f; \
|
||||
1: jnz 2f; \
|
||||
movl $VM_FAIL_VALID,reg; /* ZF is set */ \
|
||||
jmp 3f; \
|
||||
2: movl $VM_SUCCESS,reg; \
|
||||
3: movl reg,VMXCTX_LAUNCH_ERROR(%rsp)
|
||||
|
||||
.text
|
||||
/*
|
||||
* int vmx_setjmp(ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Return value is '0' when it returns directly from here.
|
||||
* Return value is '1' when it returns after a vm exit through vmx_longjmp.
|
||||
*/
|
||||
ENTRY(vmx_setjmp)
|
||||
movq (%rsp),%rax /* return address */
|
||||
movq %r15,VMXCTX_HOST_R15(%rdi)
|
||||
movq %r14,VMXCTX_HOST_R14(%rdi)
|
||||
movq %r13,VMXCTX_HOST_R13(%rdi)
|
||||
movq %r12,VMXCTX_HOST_R12(%rdi)
|
||||
movq %rbp,VMXCTX_HOST_RBP(%rdi)
|
||||
movq %rsp,VMXCTX_HOST_RSP(%rdi)
|
||||
movq %rbx,VMXCTX_HOST_RBX(%rdi)
|
||||
movq %rax,VMXCTX_HOST_RIP(%rdi)
|
||||
|
||||
/*
|
||||
* XXX save host debug registers
|
||||
*/
|
||||
movl $VMX_RETURN_DIRECT,%eax
|
||||
ret
|
||||
END(vmx_setjmp)
|
||||
|
||||
/*
|
||||
* void vmx_return(struct vmxctx *ctxp, int retval)
|
||||
* %rdi = ctxp
|
||||
* %rsi = retval
|
||||
* Return to vmm context through vmx_setjmp() with a value of 'retval'.
|
||||
*/
|
||||
ENTRY(vmx_return)
|
||||
/* Restore host context. */
|
||||
movq VMXCTX_HOST_R15(%rdi),%r15
|
||||
movq VMXCTX_HOST_R14(%rdi),%r14
|
||||
movq VMXCTX_HOST_R13(%rdi),%r13
|
||||
movq VMXCTX_HOST_R12(%rdi),%r12
|
||||
movq VMXCTX_HOST_RBP(%rdi),%rbp
|
||||
movq VMXCTX_HOST_RSP(%rdi),%rsp
|
||||
movq VMXCTX_HOST_RBX(%rdi),%rbx
|
||||
movq VMXCTX_HOST_RIP(%rdi),%rax
|
||||
movq %rax,(%rsp) /* return address */
|
||||
|
||||
/*
|
||||
* XXX restore host debug registers
|
||||
*/
|
||||
movl %esi,%eax
|
||||
ret
|
||||
END(vmx_return)
|
||||
|
||||
/*
|
||||
* void vmx_longjmp(void)
|
||||
* %rsp points to the struct vmxctx
|
||||
*/
|
||||
ENTRY(vmx_longjmp)
|
||||
/*
|
||||
* Save guest state that is not automatically saved in the vmcs.
|
||||
*/
|
||||
movq %rdi,VMXCTX_GUEST_RDI(%rsp)
|
||||
movq %rsi,VMXCTX_GUEST_RSI(%rsp)
|
||||
movq %rdx,VMXCTX_GUEST_RDX(%rsp)
|
||||
movq %rcx,VMXCTX_GUEST_RCX(%rsp)
|
||||
movq %r8,VMXCTX_GUEST_R8(%rsp)
|
||||
movq %r9,VMXCTX_GUEST_R9(%rsp)
|
||||
movq %rax,VMXCTX_GUEST_RAX(%rsp)
|
||||
movq %rbx,VMXCTX_GUEST_RBX(%rsp)
|
||||
movq %rbp,VMXCTX_GUEST_RBP(%rsp)
|
||||
movq %r10,VMXCTX_GUEST_R10(%rsp)
|
||||
movq %r11,VMXCTX_GUEST_R11(%rsp)
|
||||
movq %r12,VMXCTX_GUEST_R12(%rsp)
|
||||
movq %r13,VMXCTX_GUEST_R13(%rsp)
|
||||
movq %r14,VMXCTX_GUEST_R14(%rsp)
|
||||
movq %r15,VMXCTX_GUEST_R15(%rsp)
|
||||
|
||||
movq %cr2,%rdi
|
||||
movq %rdi,VMXCTX_GUEST_CR2(%rsp)
|
||||
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_LONGJMP,%rsi
|
||||
|
||||
addq $VMXCTX_TMPSTKTOP,%rsp
|
||||
callq vmx_return
|
||||
END(vmx_longjmp)
|
||||
|
||||
/*
|
||||
* void vmx_resume(struct vmxctx *ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Although the return type is a 'void' this function may return indirectly
|
||||
* through vmx_setjmp() with a return value of 2.
|
||||
*/
|
||||
ENTRY(vmx_resume)
|
||||
VMX_DISABLE_INTERRUPTS
|
||||
|
||||
VMX_CHECK_AST
|
||||
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
VMX_GUEST_RESTORE
|
||||
|
||||
vmresume
|
||||
|
||||
/*
|
||||
* Capture the reason why vmresume failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_VMRESUME,%rsi
|
||||
|
||||
addq $VMXCTX_TMPSTKTOP,%rsp
|
||||
callq vmx_return
|
||||
END(vmx_resume)
|
||||
|
||||
/*
|
||||
* void vmx_launch(struct vmxctx *ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Although the return type is a 'void' this function may return indirectly
|
||||
* through vmx_setjmp() with a return value of 3.
|
||||
*/
|
||||
ENTRY(vmx_launch)
|
||||
VMX_DISABLE_INTERRUPTS
|
||||
|
||||
VMX_CHECK_AST
|
||||
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
VMX_GUEST_RESTORE
|
||||
|
||||
vmlaunch
|
||||
|
||||
/*
|
||||
* Capture the reason why vmlaunch failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_VMLAUNCH,%rsi
|
||||
|
||||
addq $VMXCTX_TMPSTKTOP,%rsp
|
||||
callq vmx_return
|
||||
END(vmx_launch)
|
677
sys/amd64/vmm/intel/vtd.c
Normal file
677
sys/amd64/vmm/intel/vtd.c
Normal file
@ -0,0 +1,677 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/pci_cfgreg.h>
|
||||
|
||||
#include "io/iommu.h"
|
||||
|
||||
/*
|
||||
* Documented in the "Intel Virtualization Technology for Directed I/O",
|
||||
* Architecture Spec, September 2008.
|
||||
*/
|
||||
|
||||
/* Section 10.4 "Register Descriptions" */
|
||||
struct vtdmap {
|
||||
volatile uint32_t version;
|
||||
volatile uint32_t res0;
|
||||
volatile uint64_t cap;
|
||||
volatile uint64_t ext_cap;
|
||||
volatile uint32_t gcr;
|
||||
volatile uint32_t gsr;
|
||||
volatile uint64_t rta;
|
||||
volatile uint64_t ccr;
|
||||
};
|
||||
|
||||
#define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
|
||||
#define VTD_CAP_ND(cap) ((cap) & 0x7)
|
||||
#define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
|
||||
#define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
|
||||
#define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
|
||||
|
||||
#define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
|
||||
#define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
|
||||
#define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
|
||||
|
||||
#define VTD_GCR_WBF (1 << 27)
|
||||
#define VTD_GCR_SRTP (1 << 30)
|
||||
#define VTD_GCR_TE (1 << 31)
|
||||
|
||||
#define VTD_GSR_WBFS (1 << 27)
|
||||
#define VTD_GSR_RTPS (1 << 30)
|
||||
#define VTD_GSR_TES (1 << 31)
|
||||
|
||||
#define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */
|
||||
#define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */
|
||||
|
||||
#define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */
|
||||
#define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */
|
||||
#define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */
|
||||
#define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */
|
||||
#define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */
|
||||
#define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */
|
||||
#define VTD_IIR_DOMAIN_P 32
|
||||
|
||||
#define VTD_ROOT_PRESENT 0x1
|
||||
#define VTD_CTX_PRESENT 0x1
|
||||
#define VTD_CTX_TT_ALL (1UL << 2)
|
||||
|
||||
#define VTD_PTE_RD (1UL << 0)
|
||||
#define VTD_PTE_WR (1UL << 1)
|
||||
#define VTD_PTE_SUPERPAGE (1UL << 7)
|
||||
#define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
|
||||
|
||||
struct domain {
|
||||
uint64_t *ptp; /* first level page table page */
|
||||
int pt_levels; /* number of page table levels */
|
||||
int addrwidth; /* 'AW' field in context entry */
|
||||
int spsmask; /* supported super page sizes */
|
||||
u_int id; /* domain id */
|
||||
vm_paddr_t maxaddr; /* highest address to be mapped */
|
||||
SLIST_ENTRY(domain) next;
|
||||
};
|
||||
|
||||
static SLIST_HEAD(, domain) domhead;
|
||||
|
||||
#define DRHD_MAX_UNITS 8
|
||||
static int drhd_num;
|
||||
static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
|
||||
static int max_domains;
|
||||
typedef int (*drhd_ident_func_t)(void);
|
||||
|
||||
static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
|
||||
static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
|
||||
|
||||
static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
|
||||
|
||||
/*
|
||||
* Config space register definitions from the "Intel 5520 and 5500" datasheet.
|
||||
*/
|
||||
static int
|
||||
tylersburg_vtd_ident(void)
|
||||
{
|
||||
int units, nlbus;
|
||||
uint16_t did, vid;
|
||||
uint32_t miscsts, vtbar;
|
||||
|
||||
const int bus = 0;
|
||||
const int slot = 20;
|
||||
const int func = 0;
|
||||
|
||||
units = 0;
|
||||
|
||||
vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
|
||||
did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
|
||||
if (vid != 0x8086 || did != 0x342E)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Check if this is a dual IOH configuration.
|
||||
*/
|
||||
miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
|
||||
if (miscsts & (1 << 25))
|
||||
nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
|
||||
else
|
||||
nlbus = -1;
|
||||
|
||||
vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
|
||||
if (vtbar & 0x1) {
|
||||
vtdmaps[units++] = (struct vtdmap *)
|
||||
PHYS_TO_DMAP(vtbar & 0xffffe000);
|
||||
} else if (bootverbose)
|
||||
printf("VT-d unit in legacy IOH is disabled!\n");
|
||||
|
||||
if (nlbus != -1) {
|
||||
vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
|
||||
if (vtbar & 0x1) {
|
||||
vtdmaps[units++] = (struct vtdmap *)
|
||||
PHYS_TO_DMAP(vtbar & 0xffffe000);
|
||||
} else if (bootverbose)
|
||||
printf("VT-d unit in non-legacy IOH is disabled!\n");
|
||||
}
|
||||
done:
|
||||
return (units);
|
||||
}
|
||||
|
||||
static drhd_ident_func_t drhd_ident_funcs[] = {
|
||||
tylersburg_vtd_ident,
|
||||
NULL
|
||||
};
|
||||
|
||||
static int
|
||||
vtd_max_domains(struct vtdmap *vtdmap)
|
||||
{
|
||||
int nd;
|
||||
|
||||
nd = VTD_CAP_ND(vtdmap->cap);
|
||||
|
||||
switch (nd) {
|
||||
case 0:
|
||||
return (16);
|
||||
case 1:
|
||||
return (64);
|
||||
case 2:
|
||||
return (256);
|
||||
case 3:
|
||||
return (1024);
|
||||
case 4:
|
||||
return (4 * 1024);
|
||||
case 5:
|
||||
return (16 * 1024);
|
||||
case 6:
|
||||
return (64 * 1024);
|
||||
default:
|
||||
panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
|
||||
}
|
||||
}
|
||||
|
||||
static u_int
|
||||
domain_id(void)
|
||||
{
|
||||
u_int id;
|
||||
struct domain *dom;
|
||||
|
||||
/* Skip domain id 0 - it is reserved when Caching Mode field is set */
|
||||
for (id = 1; id < max_domains; id++) {
|
||||
SLIST_FOREACH(dom, &domhead, next) {
|
||||
if (dom->id == id)
|
||||
break;
|
||||
}
|
||||
if (dom == NULL)
|
||||
break; /* found it */
|
||||
}
|
||||
|
||||
if (id >= max_domains)
|
||||
panic("domain ids exhausted");
|
||||
|
||||
return (id);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_wbflush(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
|
||||
pmap_invalidate_cache();
|
||||
|
||||
if (VTD_CAP_RWBF(vtdmap->cap)) {
|
||||
vtdmap->gcr = VTD_GCR_WBF;
|
||||
while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
|
||||
while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
|
||||
;
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
|
||||
{
|
||||
int offset;
|
||||
volatile uint64_t *iotlb_reg, val;
|
||||
|
||||
vtd_wbflush(vtdmap);
|
||||
|
||||
offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
|
||||
iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
|
||||
|
||||
*iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
|
||||
VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
|
||||
|
||||
while (1) {
|
||||
val = *iotlb_reg;
|
||||
if ((val & VTD_IIR_IVT) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_translation_enable(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->gcr = VTD_GCR_TE;
|
||||
while ((vtdmap->gsr & VTD_GSR_TES) == 0)
|
||||
;
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_translation_disable(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->gcr = 0;
|
||||
while ((vtdmap->gsr & VTD_GSR_TES) != 0)
|
||||
;
|
||||
}
|
||||
|
||||
static int
|
||||
vtd_init(void)
|
||||
{
|
||||
int i, units;
|
||||
struct vtdmap *vtdmap;
|
||||
vm_paddr_t ctx_paddr;
|
||||
|
||||
for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
|
||||
units = (*drhd_ident_funcs[i])();
|
||||
if (units > 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (units <= 0)
|
||||
return (ENXIO);
|
||||
|
||||
drhd_num = units;
|
||||
vtdmap = vtdmaps[0];
|
||||
|
||||
if (VTD_CAP_CM(vtdmap->cap) != 0)
|
||||
panic("vtd_init: invalid caching mode");
|
||||
|
||||
max_domains = vtd_max_domains(vtdmap);
|
||||
|
||||
/*
|
||||
* Set up the root-table to point to the context-entry tables
|
||||
*/
|
||||
for (i = 0; i < 256; i++) {
|
||||
ctx_paddr = vtophys(ctx_tables[i]);
|
||||
if (ctx_paddr & PAGE_MASK)
|
||||
panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
|
||||
|
||||
root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_cleanup(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_enable(void)
|
||||
{
|
||||
int i;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_wbflush(vtdmap);
|
||||
|
||||
/* Update the root table address */
|
||||
vtdmap->rta = vtophys(root_table);
|
||||
vtdmap->gcr = VTD_GCR_SRTP;
|
||||
while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
|
||||
;
|
||||
|
||||
vtd_ctx_global_invalidate(vtdmap);
|
||||
vtd_iotlb_global_invalidate(vtdmap);
|
||||
|
||||
vtd_translation_enable(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_disable(void)
|
||||
{
|
||||
int i;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_translation_disable(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_add_device(void *arg, int bus, int slot, int func)
|
||||
{
|
||||
int idx;
|
||||
uint64_t *ctxp;
|
||||
struct domain *dom = arg;
|
||||
vm_paddr_t pt_paddr;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (bus < 0 || bus > PCI_BUSMAX ||
|
||||
slot < 0 || slot > PCI_SLOTMAX ||
|
||||
func < 0 || func > PCI_FUNCMAX)
|
||||
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
|
||||
|
||||
vtdmap = vtdmaps[0];
|
||||
ctxp = ctx_tables[bus];
|
||||
pt_paddr = vtophys(dom->ptp);
|
||||
idx = (slot << 3 | func) * 2;
|
||||
|
||||
if (ctxp[idx] & VTD_CTX_PRESENT) {
|
||||
panic("vtd_add_device: device %d/%d/%d is already owned by "
|
||||
"domain %d", bus, slot, func,
|
||||
(uint16_t)(ctxp[idx + 1] >> 8));
|
||||
}
|
||||
|
||||
/*
|
||||
* Order is important. The 'present' bit is set only after all fields
|
||||
* of the context pointer are initialized.
|
||||
*/
|
||||
ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
|
||||
|
||||
if (VTD_ECAP_DI(vtdmap->ext_cap))
|
||||
ctxp[idx] = VTD_CTX_TT_ALL;
|
||||
else
|
||||
ctxp[idx] = 0;
|
||||
|
||||
ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
|
||||
|
||||
/*
|
||||
* 'Not Present' entries are not cached in either the Context Cache
|
||||
* or in the IOTLB, so there is no need to invalidate either of them.
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_remove_device(void *arg, int bus, int slot, int func)
|
||||
{
|
||||
int i, idx;
|
||||
uint64_t *ctxp;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (bus < 0 || bus > PCI_BUSMAX ||
|
||||
slot < 0 || slot > PCI_SLOTMAX ||
|
||||
func < 0 || func > PCI_FUNCMAX)
|
||||
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
|
||||
|
||||
ctxp = ctx_tables[bus];
|
||||
idx = (slot << 3 | func) * 2;
|
||||
|
||||
/*
|
||||
* Order is important. The 'present' bit is must be cleared first.
|
||||
*/
|
||||
ctxp[idx] = 0;
|
||||
ctxp[idx + 1] = 0;
|
||||
|
||||
/*
|
||||
* Invalidate the Context Cache and the IOTLB.
|
||||
*
|
||||
* XXX use device-selective invalidation for Context Cache
|
||||
* XXX use domain-selective invalidation for IOTLB
|
||||
*/
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_ctx_global_invalidate(vtdmap);
|
||||
vtd_iotlb_global_invalidate(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
#define CREATE_MAPPING 0
|
||||
#define REMOVE_MAPPING 1
|
||||
|
||||
static uint64_t
|
||||
vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
|
||||
int remove)
|
||||
{
|
||||
struct domain *dom;
|
||||
int i, spshift, ptpshift, ptpindex, nlevels;
|
||||
uint64_t spsize, *ptp;
|
||||
|
||||
dom = arg;
|
||||
ptpindex = 0;
|
||||
ptpshift = 0;
|
||||
|
||||
if (gpa & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
|
||||
|
||||
if (hpa & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
|
||||
|
||||
if (len & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned len 0x%0lx", len);
|
||||
|
||||
/*
|
||||
* Compute the size of the mapping that we can accomodate.
|
||||
*
|
||||
* This is based on three factors:
|
||||
* - supported super page size
|
||||
* - alignment of the region starting at 'gpa' and 'hpa'
|
||||
* - length of the region 'len'
|
||||
*/
|
||||
spshift = 48;
|
||||
for (i = 3; i >= 0; i--) {
|
||||
spsize = 1UL << spshift;
|
||||
if ((dom->spsmask & (1 << i)) != 0 &&
|
||||
(gpa & (spsize - 1)) == 0 &&
|
||||
(hpa & (spsize - 1)) == 0 &&
|
||||
(len >= spsize)) {
|
||||
break;
|
||||
}
|
||||
spshift -= 9;
|
||||
}
|
||||
|
||||
ptp = dom->ptp;
|
||||
nlevels = dom->pt_levels;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = 12 + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
/* We have reached the leaf mapping */
|
||||
if (spshift >= ptpshift) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are working on a non-leaf page table page.
|
||||
*
|
||||
* Create a downstream page table page if necessary and point
|
||||
* to it from the current page table.
|
||||
*/
|
||||
if (ptp[ptpindex] == 0) {
|
||||
void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
|
||||
ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
|
||||
}
|
||||
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
|
||||
}
|
||||
|
||||
if ((gpa & ((1UL << ptpshift) - 1)) != 0)
|
||||
panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
|
||||
|
||||
/*
|
||||
* Update the 'gpa' -> 'hpa' mapping
|
||||
*/
|
||||
if (remove) {
|
||||
ptp[ptpindex] = 0;
|
||||
} else {
|
||||
ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
|
||||
|
||||
if (nlevels > 0)
|
||||
ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
|
||||
}
|
||||
|
||||
return (1UL << ptpshift);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
|
||||
{
|
||||
|
||||
return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
|
||||
{
|
||||
|
||||
return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_invalidate_tlb(void *dom)
|
||||
{
|
||||
int i;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
/*
|
||||
* Invalidate the IOTLB.
|
||||
* XXX use domain-selective invalidation for IOTLB
|
||||
*/
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_iotlb_global_invalidate(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
vtd_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
struct domain *dom;
|
||||
vm_paddr_t addr;
|
||||
int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (drhd_num <= 0)
|
||||
panic("vtd_create_domain: no dma remapping hardware available");
|
||||
|
||||
vtdmap = vtdmaps[0];
|
||||
|
||||
/*
|
||||
* Calculate AGAW.
|
||||
* Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
|
||||
*/
|
||||
addr = 0;
|
||||
for (gaw = 0; addr < maxaddr; gaw++)
|
||||
addr = 1ULL << gaw;
|
||||
|
||||
res = (gaw - 12) % 9;
|
||||
if (res == 0)
|
||||
agaw = gaw;
|
||||
else
|
||||
agaw = gaw + 9 - res;
|
||||
|
||||
if (agaw > 64)
|
||||
agaw = 64;
|
||||
|
||||
/*
|
||||
* Select the smallest Supported AGAW and the corresponding number
|
||||
* of page table levels.
|
||||
*/
|
||||
pt_levels = 2;
|
||||
sagaw = 30;
|
||||
addrwidth = 0;
|
||||
tmp = VTD_CAP_SAGAW(vtdmap->cap);
|
||||
for (i = 0; i < 5; i++) {
|
||||
if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
|
||||
break;
|
||||
pt_levels++;
|
||||
addrwidth++;
|
||||
sagaw += 9;
|
||||
if (sagaw > 64)
|
||||
sagaw = 64;
|
||||
}
|
||||
|
||||
if (i >= 5) {
|
||||
panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
|
||||
VTD_CAP_SAGAW(vtdmap->cap), agaw);
|
||||
}
|
||||
|
||||
dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
|
||||
dom->pt_levels = pt_levels;
|
||||
dom->addrwidth = addrwidth;
|
||||
dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
|
||||
dom->id = domain_id();
|
||||
dom->maxaddr = maxaddr;
|
||||
dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
|
||||
if ((uintptr_t)dom->ptp & PAGE_MASK)
|
||||
panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
|
||||
|
||||
SLIST_INSERT_HEAD(&domhead, dom, next);
|
||||
|
||||
return (dom);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_free_ptp(uint64_t *ptp, int level)
|
||||
{
|
||||
int i;
|
||||
uint64_t *nlp;
|
||||
|
||||
if (level > 1) {
|
||||
for (i = 0; i < 512; i++) {
|
||||
if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
|
||||
continue;
|
||||
if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
|
||||
continue;
|
||||
nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
|
||||
vtd_free_ptp(nlp, level - 1);
|
||||
}
|
||||
}
|
||||
|
||||
bzero(ptp, PAGE_SIZE);
|
||||
free(ptp, M_VTD);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_destroy_domain(void *arg)
|
||||
{
|
||||
struct domain *dom;
|
||||
|
||||
dom = arg;
|
||||
|
||||
SLIST_REMOVE(&domhead, dom, domain, next);
|
||||
vtd_free_ptp(dom->ptp, dom->pt_levels);
|
||||
free(dom, M_VTD);
|
||||
}
|
||||
|
||||
struct iommu_ops iommu_ops_intel = {
|
||||
vtd_init,
|
||||
vtd_cleanup,
|
||||
vtd_enable,
|
||||
vtd_disable,
|
||||
vtd_create_domain,
|
||||
vtd_destroy_domain,
|
||||
vtd_create_mapping,
|
||||
vtd_remove_mapping,
|
||||
vtd_add_device,
|
||||
vtd_remove_device,
|
||||
vtd_invalidate_tlb,
|
||||
};
|
277
sys/amd64/vmm/io/iommu.c
Normal file
277
sys/amd64/vmm/io/iommu.c
Normal file
@ -0,0 +1,277 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <dev/pci/pcivar.h>
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
#include "vmm_mem.h"
|
||||
#include "iommu.h"
|
||||
|
||||
static boolean_t iommu_avail;
|
||||
static struct iommu_ops *ops;
|
||||
static void *host_domain;
|
||||
|
||||
static __inline int
|
||||
IOMMU_INIT(void)
|
||||
{
|
||||
if (ops != NULL)
|
||||
return ((*ops->init)());
|
||||
else
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_CLEANUP(void)
|
||||
{
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->cleanup)();
|
||||
}
|
||||
|
||||
static __inline void *
|
||||
IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
return ((*ops->create_domain)(maxaddr));
|
||||
else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_DESTROY_DOMAIN(void *dom)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->destroy_domain)(dom);
|
||||
}
|
||||
|
||||
static __inline uint64_t
|
||||
IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
return ((*ops->create_mapping)(domain, gpa, hpa, len));
|
||||
else
|
||||
return (len); /* XXX */
|
||||
}
|
||||
|
||||
static __inline uint64_t
|
||||
IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
return ((*ops->remove_mapping)(domain, gpa, len));
|
||||
else
|
||||
return (len); /* XXX */
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_ADD_DEVICE(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->add_device)(domain, bus, slot, func);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_REMOVE_DEVICE(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->remove_device)(domain, bus, slot, func);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_INVALIDATE_TLB(void *domain)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->invalidate_tlb)(domain);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_ENABLE(void)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->enable)();
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_DISABLE(void)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->disable)();
|
||||
}
|
||||
|
||||
void
|
||||
iommu_init(void)
|
||||
{
|
||||
int error, bus, slot, func;
|
||||
vm_paddr_t maxaddr;
|
||||
const char *name;
|
||||
device_t dev;
|
||||
|
||||
if (vmm_is_intel())
|
||||
ops = &iommu_ops_intel;
|
||||
else if (vmm_is_amd())
|
||||
ops = &iommu_ops_amd;
|
||||
else
|
||||
ops = NULL;
|
||||
|
||||
error = IOMMU_INIT();
|
||||
if (error)
|
||||
return;
|
||||
|
||||
iommu_avail = TRUE;
|
||||
|
||||
/*
|
||||
* Create a domain for the devices owned by the host
|
||||
*/
|
||||
maxaddr = vmm_mem_maxaddr();
|
||||
host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
|
||||
if (host_domain == NULL)
|
||||
panic("iommu_init: unable to create a host domain");
|
||||
|
||||
/*
|
||||
* Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to
|
||||
* the host
|
||||
*/
|
||||
iommu_create_mapping(host_domain, 0, 0, maxaddr);
|
||||
|
||||
for (bus = 0; bus <= PCI_BUSMAX; bus++) {
|
||||
for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
|
||||
for (func = 0; func <= PCI_FUNCMAX; func++) {
|
||||
dev = pci_find_dbsf(0, bus, slot, func);
|
||||
if (dev == NULL)
|
||||
continue;
|
||||
|
||||
/* skip passthrough devices */
|
||||
name = device_get_name(dev);
|
||||
if (name != NULL && strcmp(name, "ppt") == 0)
|
||||
continue;
|
||||
|
||||
/* everything else belongs to the host domain */
|
||||
iommu_add_device(host_domain, bus, slot, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
IOMMU_ENABLE();
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
iommu_cleanup(void)
|
||||
{
|
||||
IOMMU_DISABLE();
|
||||
IOMMU_DESTROY_DOMAIN(host_domain);
|
||||
IOMMU_CLEANUP();
|
||||
}
|
||||
|
||||
void *
|
||||
iommu_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
return (IOMMU_CREATE_DOMAIN(maxaddr));
|
||||
}
|
||||
|
||||
void
|
||||
iommu_destroy_domain(void *dom)
|
||||
{
|
||||
|
||||
IOMMU_DESTROY_DOMAIN(dom);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
|
||||
{
|
||||
uint64_t mapped, remaining;
|
||||
|
||||
remaining = len;
|
||||
|
||||
while (remaining > 0) {
|
||||
mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
|
||||
gpa += mapped;
|
||||
hpa += mapped;
|
||||
remaining -= mapped;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
uint64_t unmapped, remaining;
|
||||
|
||||
remaining = len;
|
||||
|
||||
while (remaining > 0) {
|
||||
unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining);
|
||||
gpa += unmapped;
|
||||
remaining -= unmapped;
|
||||
}
|
||||
}
|
||||
|
||||
void *
|
||||
iommu_host_domain(void)
|
||||
{
|
||||
|
||||
return (host_domain);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_add_device(void *dom, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
IOMMU_ADD_DEVICE(dom, bus, slot, func);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_remove_device(void *dom, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
IOMMU_REMOVE_DEVICE(dom, bus, slot, func);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_invalidate_tlb(void *domain)
|
||||
{
|
||||
|
||||
IOMMU_INVALIDATE_TLB(domain);
|
||||
}
|
75
sys/amd64/vmm/io/iommu.h
Normal file
75
sys/amd64/vmm/io/iommu.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IO_IOMMU_H_
|
||||
#define _IO_IOMMU_H_
|
||||
|
||||
typedef int (*iommu_init_func_t)(void);
|
||||
typedef void (*iommu_cleanup_func_t)(void);
|
||||
typedef void (*iommu_enable_func_t)(void);
|
||||
typedef void (*iommu_disable_func_t)(void);
|
||||
typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr);
|
||||
typedef void (*iommu_destroy_domain_t)(void *domain);
|
||||
typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
|
||||
vm_paddr_t hpa, uint64_t len);
|
||||
typedef uint64_t (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa,
|
||||
uint64_t len);
|
||||
typedef void (*iommu_add_device_t)(void *domain, int bus, int slot, int func);
|
||||
typedef void (*iommu_remove_device_t)(void *dom, int bus, int slot, int func);
|
||||
typedef void (*iommu_invalidate_tlb_t)(void *dom);
|
||||
|
||||
struct iommu_ops {
|
||||
iommu_init_func_t init; /* module wide */
|
||||
iommu_cleanup_func_t cleanup;
|
||||
iommu_enable_func_t enable;
|
||||
iommu_disable_func_t disable;
|
||||
|
||||
iommu_create_domain_t create_domain; /* domain-specific */
|
||||
iommu_destroy_domain_t destroy_domain;
|
||||
iommu_create_mapping_t create_mapping;
|
||||
iommu_remove_mapping_t remove_mapping;
|
||||
iommu_add_device_t add_device;
|
||||
iommu_remove_device_t remove_device;
|
||||
iommu_invalidate_tlb_t invalidate_tlb;
|
||||
};
|
||||
|
||||
extern struct iommu_ops iommu_ops_intel;
|
||||
extern struct iommu_ops iommu_ops_amd;
|
||||
|
||||
void iommu_init(void);
|
||||
void iommu_cleanup(void);
|
||||
void *iommu_host_domain(void);
|
||||
void *iommu_create_domain(vm_paddr_t maxaddr);
|
||||
void iommu_destroy_domain(void *dom);
|
||||
void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
size_t len);
|
||||
void iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len);
|
||||
void iommu_add_device(void *dom, int bus, int slot, int func);
|
||||
void iommu_remove_device(void *dom, int bus, int slot, int func);
|
||||
void iommu_invalidate_tlb(void *domain);
|
||||
#endif
|
610
sys/amd64/vmm/io/ppt.c
Normal file
610
sys/amd64/vmm/io/ppt.c
Normal file
@ -0,0 +1,610 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/rman.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <dev/pci/pcivar.h>
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/resource.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_ktr.h"
|
||||
|
||||
#include "iommu.h"
|
||||
#include "ppt.h"
|
||||
|
||||
/* XXX locking */
|
||||
|
||||
#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
|
||||
#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1)
|
||||
#define MAX_MSIMSGS 32
|
||||
|
||||
MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
|
||||
|
||||
struct pptintr_arg { /* pptintr(pptintr_arg) */
|
||||
struct pptdev *pptdev;
|
||||
int vec;
|
||||
int vcpu;
|
||||
};
|
||||
|
||||
static struct pptdev {
|
||||
device_t dev;
|
||||
struct vm *vm; /* owner of this device */
|
||||
struct vm_memory_segment mmio[MAX_MMIOSEGS];
|
||||
struct {
|
||||
int num_msgs; /* guest state */
|
||||
|
||||
int startrid; /* host state */
|
||||
struct resource *res[MAX_MSIMSGS];
|
||||
void *cookie[MAX_MSIMSGS];
|
||||
struct pptintr_arg arg[MAX_MSIMSGS];
|
||||
} msi;
|
||||
|
||||
struct {
|
||||
int num_msgs;
|
||||
int startrid;
|
||||
int msix_table_rid;
|
||||
struct resource *msix_table_res;
|
||||
struct resource **res;
|
||||
void **cookie;
|
||||
struct pptintr_arg *arg;
|
||||
} msix;
|
||||
} pptdevs[32];
|
||||
|
||||
static int num_pptdevs;
|
||||
|
||||
static int
|
||||
ppt_probe(device_t dev)
|
||||
{
|
||||
int bus, slot, func;
|
||||
struct pci_devinfo *dinfo;
|
||||
|
||||
dinfo = (struct pci_devinfo *)device_get_ivars(dev);
|
||||
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
|
||||
/*
|
||||
* To qualify as a pci passthrough device a device must:
|
||||
* - be allowed by administrator to be used in this role
|
||||
* - be an endpoint device
|
||||
*/
|
||||
if (vmm_is_pptdev(bus, slot, func) &&
|
||||
(dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
|
||||
return (0);
|
||||
else
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
ppt_attach(device_t dev)
|
||||
{
|
||||
int n;
|
||||
|
||||
if (num_pptdevs >= MAX_PPTDEVS) {
|
||||
printf("ppt_attach: maximum number of pci passthrough devices "
|
||||
"exceeded\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
n = num_pptdevs++;
|
||||
pptdevs[n].dev = dev;
|
||||
|
||||
if (bootverbose)
|
||||
device_printf(dev, "attached\n");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ppt_detach(device_t dev)
|
||||
{
|
||||
/*
|
||||
* XXX check whether there are any pci passthrough devices assigned
|
||||
* to guests before we allow this driver to detach.
|
||||
*/
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static device_method_t ppt_methods[] = {
|
||||
/* Device interface */
|
||||
DEVMETHOD(device_probe, ppt_probe),
|
||||
DEVMETHOD(device_attach, ppt_attach),
|
||||
DEVMETHOD(device_detach, ppt_detach),
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
static devclass_t ppt_devclass;
|
||||
DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
|
||||
DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
|
||||
|
||||
static struct pptdev *
|
||||
ppt_find(int bus, int slot, int func)
|
||||
{
|
||||
device_t dev;
|
||||
int i, b, s, f;
|
||||
|
||||
for (i = 0; i < num_pptdevs; i++) {
|
||||
dev = pptdevs[i].dev;
|
||||
b = pci_get_bus(dev);
|
||||
s = pci_get_slot(dev);
|
||||
f = pci_get_function(dev);
|
||||
if (bus == b && slot == s && func == f)
|
||||
return (&pptdevs[i]);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
|
||||
{
|
||||
int i;
|
||||
struct vm_memory_segment *seg;
|
||||
|
||||
for (i = 0; i < MAX_MMIOSEGS; i++) {
|
||||
seg = &ppt->mmio[i];
|
||||
if (seg->len == 0)
|
||||
continue;
|
||||
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
|
||||
bzero(seg, sizeof(struct vm_memory_segment));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_teardown_msi(struct pptdev *ppt)
|
||||
{
|
||||
int i, rid;
|
||||
void *cookie;
|
||||
struct resource *res;
|
||||
|
||||
if (ppt->msi.num_msgs == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ppt->msi.num_msgs; i++) {
|
||||
rid = ppt->msi.startrid + i;
|
||||
res = ppt->msi.res[i];
|
||||
cookie = ppt->msi.cookie[i];
|
||||
|
||||
if (cookie != NULL)
|
||||
bus_teardown_intr(ppt->dev, res, cookie);
|
||||
|
||||
if (res != NULL)
|
||||
bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
|
||||
|
||||
ppt->msi.res[i] = NULL;
|
||||
ppt->msi.cookie[i] = NULL;
|
||||
}
|
||||
|
||||
if (ppt->msi.startrid == 1)
|
||||
pci_release_msi(ppt->dev);
|
||||
|
||||
ppt->msi.num_msgs = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
|
||||
{
|
||||
int rid;
|
||||
struct resource *res;
|
||||
void *cookie;
|
||||
|
||||
rid = ppt->msix.startrid + idx;
|
||||
res = ppt->msix.res[idx];
|
||||
cookie = ppt->msix.cookie[idx];
|
||||
|
||||
if (cookie != NULL)
|
||||
bus_teardown_intr(ppt->dev, res, cookie);
|
||||
|
||||
if (res != NULL)
|
||||
bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
|
||||
|
||||
ppt->msix.res[idx] = NULL;
|
||||
ppt->msix.cookie[idx] = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_teardown_msix(struct pptdev *ppt)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (ppt->msix.num_msgs == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ppt->msix.num_msgs; i++)
|
||||
ppt_teardown_msix_intr(ppt, i);
|
||||
|
||||
if (ppt->msix.msix_table_res) {
|
||||
bus_release_resource(ppt->dev, SYS_RES_MEMORY,
|
||||
ppt->msix.msix_table_rid,
|
||||
ppt->msix.msix_table_res);
|
||||
ppt->msix.msix_table_res = NULL;
|
||||
ppt->msix.msix_table_rid = 0;
|
||||
}
|
||||
|
||||
free(ppt->msix.res, M_PPTMSIX);
|
||||
free(ppt->msix.cookie, M_PPTMSIX);
|
||||
free(ppt->msix.arg, M_PPTMSIX);
|
||||
|
||||
pci_release_msi(ppt->dev);
|
||||
|
||||
ppt->msix.num_msgs = 0;
|
||||
}
|
||||
|
||||
int
|
||||
ppt_assign_device(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
/*
|
||||
* If this device is owned by a different VM then we
|
||||
* cannot change its owner.
|
||||
*/
|
||||
if (ppt->vm != NULL && ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
ppt->vm = vm;
|
||||
iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
|
||||
return (0);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
/*
|
||||
* If this device is not owned by this 'vm' then bail out.
|
||||
*/
|
||||
if (ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
ppt_unmap_mmio(vm, ppt);
|
||||
ppt_teardown_msi(ppt);
|
||||
ppt_teardown_msix(ppt);
|
||||
iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
|
||||
ppt->vm = NULL;
|
||||
return (0);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_unassign_all(struct vm *vm)
|
||||
{
|
||||
int i, bus, slot, func;
|
||||
device_t dev;
|
||||
|
||||
for (i = 0; i < num_pptdevs; i++) {
|
||||
if (pptdevs[i].vm == vm) {
|
||||
dev = pptdevs[i].dev;
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
ppt_unassign_device(vm, bus, slot, func);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
int i, error;
|
||||
struct vm_memory_segment *seg;
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
if (ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
for (i = 0; i < MAX_MMIOSEGS; i++) {
|
||||
seg = &ppt->mmio[i];
|
||||
if (seg->len == 0) {
|
||||
error = vm_map_mmio(vm, gpa, len, hpa);
|
||||
if (error == 0) {
|
||||
seg->gpa = gpa;
|
||||
seg->len = len;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
return (ENOSPC);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
static int
|
||||
pptintr(void *arg)
|
||||
{
|
||||
int vec;
|
||||
struct pptdev *ppt;
|
||||
struct pptintr_arg *pptarg;
|
||||
|
||||
pptarg = arg;
|
||||
ppt = pptarg->pptdev;
|
||||
vec = pptarg->vec;
|
||||
|
||||
if (ppt->vm != NULL)
|
||||
(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
|
||||
else {
|
||||
/*
|
||||
* XXX
|
||||
* This is not expected to happen - panic?
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* For legacy interrupts give other filters a chance in case
|
||||
* the interrupt was not generated by the passthrough device.
|
||||
*/
|
||||
if (ppt->msi.startrid == 0)
|
||||
return (FILTER_STRAY);
|
||||
else
|
||||
return (FILTER_HANDLED);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* When we try to free the MSI resource the kernel will bind the thread to
|
||||
* the host cpu was originally handling the MSI. The function freeing the
|
||||
* MSI vector (apic_free_vector()) will panic the kernel if the thread
|
||||
* is already bound to a cpu.
|
||||
*
|
||||
* So, we temporarily unbind the vcpu thread before freeing the MSI resource.
|
||||
*/
|
||||
static void
|
||||
PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
|
||||
{
|
||||
int pincpu = -1;
|
||||
|
||||
vm_get_pinning(vm, vcpu, &pincpu);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, -1);
|
||||
|
||||
ppt_teardown_msi(ppt);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, pincpu);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
{
|
||||
int i, rid, flags;
|
||||
int msi_count, startrid, error, tmp;
|
||||
struct pptdev *ppt;
|
||||
|
||||
if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
|
||||
(vector < 0 || vector > 255) ||
|
||||
(numvec < 0 || numvec > MAX_MSIMSGS))
|
||||
return (EINVAL);
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt == NULL)
|
||||
return (ENOENT);
|
||||
if (ppt->vm != vm) /* Make sure we own this device */
|
||||
return (EBUSY);
|
||||
|
||||
/* Free any allocated resources */
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
|
||||
if (numvec == 0) /* nothing more to do */
|
||||
return (0);
|
||||
|
||||
flags = RF_ACTIVE;
|
||||
msi_count = pci_msi_count(ppt->dev);
|
||||
if (msi_count == 0) {
|
||||
startrid = 0; /* legacy interrupt */
|
||||
msi_count = 1;
|
||||
flags |= RF_SHAREABLE;
|
||||
} else
|
||||
startrid = 1; /* MSI */
|
||||
|
||||
/*
|
||||
* The device must be capable of supporting the number of vectors
|
||||
* the guest wants to allocate.
|
||||
*/
|
||||
if (numvec > msi_count)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* Make sure that we can allocate all the MSI vectors that are needed
|
||||
* by the guest.
|
||||
*/
|
||||
if (startrid == 1) {
|
||||
tmp = numvec;
|
||||
error = pci_alloc_msi(ppt->dev, &tmp);
|
||||
if (error)
|
||||
return (error);
|
||||
else if (tmp != numvec) {
|
||||
pci_release_msi(ppt->dev);
|
||||
return (ENOSPC);
|
||||
} else {
|
||||
/* success */
|
||||
}
|
||||
}
|
||||
|
||||
ppt->msi.startrid = startrid;
|
||||
|
||||
/*
|
||||
* Allocate the irq resource and attach it to the interrupt handler.
|
||||
*/
|
||||
for (i = 0; i < numvec; i++) {
|
||||
ppt->msi.num_msgs = i + 1;
|
||||
ppt->msi.cookie[i] = NULL;
|
||||
|
||||
rid = startrid + i;
|
||||
ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
|
||||
&rid, flags);
|
||||
if (ppt->msi.res[i] == NULL)
|
||||
break;
|
||||
|
||||
ppt->msi.arg[i].pptdev = ppt;
|
||||
ppt->msi.arg[i].vec = vector + i;
|
||||
ppt->msi.arg[i].vcpu = destcpu;
|
||||
|
||||
error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
|
||||
INTR_TYPE_NET | INTR_MPSAFE,
|
||||
pptintr, NULL, &ppt->msi.arg[i],
|
||||
&ppt->msi.cookie[i]);
|
||||
if (error != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < numvec) {
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
|
||||
{
|
||||
struct pptdev *ppt;
|
||||
struct pci_devinfo *dinfo;
|
||||
int numvec, alloced, rid, error;
|
||||
size_t res_size, cookie_size, arg_size;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt == NULL)
|
||||
return (ENOENT);
|
||||
if (ppt->vm != vm) /* Make sure we own this device */
|
||||
return (EBUSY);
|
||||
|
||||
dinfo = device_get_ivars(ppt->dev);
|
||||
if (!dinfo)
|
||||
return (ENXIO);
|
||||
|
||||
/*
|
||||
* First-time configuration:
|
||||
* Allocate the MSI-X table
|
||||
* Allocate the IRQ resources
|
||||
* Set up some variables in ppt->msix
|
||||
*/
|
||||
if (ppt->msix.num_msgs == 0) {
|
||||
numvec = pci_msix_count(ppt->dev);
|
||||
if (numvec <= 0)
|
||||
return (EINVAL);
|
||||
|
||||
ppt->msix.startrid = 1;
|
||||
ppt->msix.num_msgs = numvec;
|
||||
|
||||
res_size = numvec * sizeof(ppt->msix.res[0]);
|
||||
cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
|
||||
arg_size = numvec * sizeof(ppt->msix.arg[0]);
|
||||
|
||||
ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
|
||||
ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
|
||||
M_WAITOK | M_ZERO);
|
||||
ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
|
||||
|
||||
rid = dinfo->cfg.msix.msix_table_bar;
|
||||
ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
|
||||
SYS_RES_MEMORY, &rid, RF_ACTIVE);
|
||||
|
||||
if (ppt->msix.msix_table_res == NULL) {
|
||||
ppt_teardown_msix(ppt);
|
||||
return (ENOSPC);
|
||||
}
|
||||
ppt->msix.msix_table_rid = rid;
|
||||
|
||||
alloced = numvec;
|
||||
error = pci_alloc_msix(ppt->dev, &alloced);
|
||||
if (error || alloced != numvec) {
|
||||
ppt_teardown_msix(ppt);
|
||||
return (error == 0 ? ENOSPC: error);
|
||||
}
|
||||
}
|
||||
|
||||
if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
|
||||
/* Tear down the IRQ if it's already set up */
|
||||
ppt_teardown_msix_intr(ppt, idx);
|
||||
|
||||
/* Allocate the IRQ resource */
|
||||
ppt->msix.cookie[idx] = NULL;
|
||||
rid = ppt->msix.startrid + idx;
|
||||
ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
|
||||
&rid, RF_ACTIVE);
|
||||
if (ppt->msix.res[idx] == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
ppt->msix.arg[idx].pptdev = ppt;
|
||||
ppt->msix.arg[idx].vec = msg;
|
||||
ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
|
||||
|
||||
/* Setup the MSI-X interrupt */
|
||||
error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
|
||||
INTR_TYPE_NET | INTR_MPSAFE,
|
||||
pptintr, NULL, &ppt->msix.arg[idx],
|
||||
&ppt->msix.cookie[idx]);
|
||||
|
||||
if (error != 0) {
|
||||
bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
|
||||
bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
|
||||
ppt->msix.cookie[idx] = NULL;
|
||||
ppt->msix.res[idx] = NULL;
|
||||
return (ENXIO);
|
||||
}
|
||||
} else {
|
||||
/* Masked, tear it down if it's already been set up */
|
||||
ppt_teardown_msix_intr(ppt, idx);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
41
sys/amd64/vmm/io/ppt.h
Normal file
41
sys/amd64/vmm/io/ppt.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IO_PPT_H_
|
||||
#define _IO_PPT_H_
|
||||
|
||||
int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_all(struct vm *vm);
|
||||
int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec);
|
||||
int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
|
||||
#endif
|
270
sys/amd64/vmm/io/vdev.c
Normal file
270
sys/amd64/vmm/io/vdev.c
Normal file
@ -0,0 +1,270 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
|
||||
#include "vdev.h"
|
||||
|
||||
struct vdev {
|
||||
SLIST_ENTRY(vdev) entry;
|
||||
struct vdev_ops *ops;
|
||||
void *dev;
|
||||
};
|
||||
static SLIST_HEAD(, vdev) vdev_head;
|
||||
static int vdev_count;
|
||||
|
||||
struct vdev_region {
|
||||
SLIST_ENTRY(vdev_region) entry;
|
||||
struct vdev_ops *ops;
|
||||
void *dev;
|
||||
struct io_region *io;
|
||||
};
|
||||
static SLIST_HEAD(, vdev_region) region_head;
|
||||
static int region_count;
|
||||
|
||||
static MALLOC_DEFINE(M_VDEV, "vdev", "vdev");
|
||||
|
||||
#define VDEV_INIT (0)
|
||||
#define VDEV_RESET (1)
|
||||
#define VDEV_HALT (2)
|
||||
|
||||
// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"};
|
||||
|
||||
static int
|
||||
vdev_system_event(int event)
|
||||
{
|
||||
struct vdev *vd;
|
||||
int rc;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_FOREACH(vd, &vdev_head, entry) {
|
||||
// printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name);
|
||||
switch (event) {
|
||||
case VDEV_INIT:
|
||||
rc = vd->ops->init(vd->dev);
|
||||
break;
|
||||
case VDEV_RESET:
|
||||
rc = vd->ops->reset(vd->dev);
|
||||
break;
|
||||
case VDEV_HALT:
|
||||
rc = vd->ops->halt(vd->dev);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (rc) {
|
||||
printf("vdev %s init failed rc=%d\n",
|
||||
vd->ops->name, rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_init(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_INIT);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_reset(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_RESET);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_halt(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_HALT);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_vm_init(void)
|
||||
{
|
||||
SLIST_INIT(&vdev_head);
|
||||
vdev_count = 0;
|
||||
|
||||
SLIST_INIT(®ion_head);
|
||||
region_count = 0;
|
||||
}
|
||||
void
|
||||
vdev_vm_cleanup(void)
|
||||
{
|
||||
struct vdev *vd;
|
||||
|
||||
// TODO: locking
|
||||
while (!SLIST_EMPTY(&vdev_head)) {
|
||||
vd = SLIST_FIRST(&vdev_head);
|
||||
SLIST_REMOVE_HEAD(&vdev_head, entry);
|
||||
free(vd, M_VDEV);
|
||||
vdev_count--;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vdev_register(struct vdev_ops *ops, void *dev)
|
||||
{
|
||||
struct vdev *vd;
|
||||
vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO);
|
||||
vd->ops = ops;
|
||||
vd->dev = dev;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_INSERT_HEAD(&vdev_head, vd, entry);
|
||||
vdev_count++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vdev_unregister(void *dev)
|
||||
{
|
||||
struct vdev *vd, *found;
|
||||
|
||||
found = NULL;
|
||||
// TODO: locking
|
||||
SLIST_FOREACH(vd, &vdev_head, entry) {
|
||||
if (vd->dev == dev) {
|
||||
found = vd;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
SLIST_REMOVE(&vdev_head, found, vdev, entry);
|
||||
free(found, M_VDEV);
|
||||
}
|
||||
}
|
||||
|
||||
#define IN_RANGE(val, start, end) \
|
||||
(((val) >= (start)) && ((val) < (end)))
|
||||
|
||||
static struct vdev_region*
|
||||
vdev_find_region(struct io_region *io, void *dev)
|
||||
{
|
||||
struct vdev_region *region, *found;
|
||||
uint64_t region_base;
|
||||
uint64_t region_end;
|
||||
|
||||
found = NULL;
|
||||
|
||||
// TODO: locking
|
||||
// FIXME: we should verify we are in the context the current
|
||||
// vcpu here as well.
|
||||
SLIST_FOREACH(region, ®ion_head, entry) {
|
||||
region_base = region->io->base;
|
||||
region_end = region_base + region->io->len;
|
||||
if (IN_RANGE(io->base, region_base, region_end) &&
|
||||
IN_RANGE(io->base+io->len, region_base, region_end+1) &&
|
||||
(dev && dev == region->dev)) {
|
||||
found = region;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
|
||||
region = vdev_find_region(io, dev);
|
||||
if (region) {
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO);
|
||||
region->io = io;
|
||||
region->ops = ops;
|
||||
region->dev = dev;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_INSERT_HEAD(®ion_head, region, entry);
|
||||
region_count++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vdev_unregister_region(void *dev, struct io_region *io)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
|
||||
region = vdev_find_region(io, dev);
|
||||
|
||||
if (region) {
|
||||
SLIST_REMOVE(®ion_head, region, vdev_region, entry);
|
||||
free(region, M_VDEV);
|
||||
region_count--;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
struct io_region io;
|
||||
region_attr_t attr;
|
||||
int rc;
|
||||
|
||||
io.base = gpa;
|
||||
io.len = size;
|
||||
|
||||
region = vdev_find_region(&io, NULL);
|
||||
if (!region)
|
||||
return -EINVAL;
|
||||
|
||||
attr = (read) ? MMIO_READ : MMIO_WRITE;
|
||||
if (!(region->io->attr & attr))
|
||||
return -EPERM;
|
||||
|
||||
if (read)
|
||||
rc = region->ops->memread(region->dev, gpa, size, data);
|
||||
else
|
||||
rc = region->ops->memwrite(region->dev, gpa, size, *data);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data)
|
||||
{
|
||||
return vdev_memrw(gpa, size, data, 1);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data)
|
||||
{
|
||||
return vdev_memrw(gpa, size, &data, 0);
|
||||
}
|
84
sys/amd64/vmm/io/vdev.h
Normal file
84
sys/amd64/vmm/io/vdev.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VDEV_H_
|
||||
#define _VDEV_H_
|
||||
|
||||
typedef enum {
|
||||
BYTE = 1,
|
||||
WORD = 2,
|
||||
DWORD = 4,
|
||||
QWORD = 8,
|
||||
} opsize_t;
|
||||
|
||||
typedef enum {
|
||||
MMIO_READ = 1,
|
||||
MMIO_WRITE = 2,
|
||||
} region_attr_t;
|
||||
|
||||
struct io_region {
|
||||
uint64_t base;
|
||||
uint64_t len;
|
||||
region_attr_t attr;
|
||||
int vcpu;
|
||||
};
|
||||
|
||||
typedef int (*vdev_init_t)(void* dev);
|
||||
typedef int (*vdev_reset_t)(void* dev);
|
||||
typedef int (*vdev_halt_t)(void* dev);
|
||||
typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data);
|
||||
typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data);
|
||||
|
||||
|
||||
struct vdev_ops {
|
||||
const char *name;
|
||||
vdev_init_t init;
|
||||
vdev_reset_t reset;
|
||||
vdev_halt_t halt;
|
||||
vdev_memread_t memread;
|
||||
vdev_memwrite_t memwrite;
|
||||
};
|
||||
|
||||
|
||||
void vdev_vm_init(void);
|
||||
void vdev_vm_cleanup(void);
|
||||
|
||||
int vdev_register(struct vdev_ops *ops, void *dev);
|
||||
void vdev_unregister(void *dev);
|
||||
|
||||
int vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io);
|
||||
void vdev_unregister_region(void *dev, struct io_region *io);
|
||||
|
||||
int vdev_init(void);
|
||||
int vdev_reset(void);
|
||||
int vdev_halt(void);
|
||||
int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data);
|
||||
int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data);
|
||||
|
||||
#endif /* _VDEV_H_ */
|
||||
|
901
sys/amd64/vmm/io/vlapic.c
Normal file
901
sys/amd64/vmm/io/vlapic.c
Normal file
@ -0,0 +1,901 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <machine/clock.h>
|
||||
#include <x86/specialreg.h>
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_ktr.h"
|
||||
#include "vdev.h"
|
||||
#include "vlapic.h"
|
||||
|
||||
#define VLAPIC_CTR0(vlapic, format) \
|
||||
VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
|
||||
|
||||
#define VLAPIC_CTR1(vlapic, format, p1) \
|
||||
VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
|
||||
|
||||
#define VLAPIC_CTR_IRR(vlapic, msg) \
|
||||
do { \
|
||||
uint32_t *irrptr = &(vlapic)->apic.irr0; \
|
||||
irrptr[0] = irrptr[0]; /* silence compiler */ \
|
||||
VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \
|
||||
} while (0)
|
||||
|
||||
#define VLAPIC_CTR_ISR(vlapic, msg) \
|
||||
do { \
|
||||
uint32_t *isrptr = &(vlapic)->apic.isr0; \
|
||||
isrptr[0] = isrptr[0]; /* silence compiler */ \
|
||||
VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
|
||||
} while (0)
|
||||
|
||||
static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
|
||||
|
||||
#define PRIO(x) ((x) >> 4)
|
||||
|
||||
#define VLAPIC_VERSION (16)
|
||||
#define VLAPIC_MAXLVT_ENTRIES (5)
|
||||
|
||||
#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
|
||||
|
||||
enum boot_state {
|
||||
BS_INIT,
|
||||
BS_SIPI,
|
||||
BS_RUNNING
|
||||
};
|
||||
|
||||
struct vlapic {
|
||||
struct vm *vm;
|
||||
int vcpuid;
|
||||
|
||||
struct io_region *mmio;
|
||||
struct vdev_ops *ops;
|
||||
struct LAPIC apic;
|
||||
|
||||
int esr_update;
|
||||
|
||||
int divisor;
|
||||
int ccr_ticks;
|
||||
|
||||
/*
|
||||
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
|
||||
* A vector is popped from the stack when the processor does an EOI.
|
||||
* The vector on the top of the stack is used to compute the
|
||||
* Processor Priority in conjunction with the TPR.
|
||||
*/
|
||||
uint8_t isrvec_stk[ISRVEC_STK_SIZE];
|
||||
int isrvec_stk_top;
|
||||
|
||||
uint64_t msr_apicbase;
|
||||
enum boot_state boot_state;
|
||||
};
|
||||
|
||||
#define VLAPIC_BUS_FREQ tsc_freq
|
||||
|
||||
static int
|
||||
vlapic_timer_divisor(uint32_t dcr)
|
||||
{
|
||||
switch (dcr & 0xB) {
|
||||
case APIC_TDCR_2:
|
||||
return (2);
|
||||
case APIC_TDCR_4:
|
||||
return (4);
|
||||
case APIC_TDCR_8:
|
||||
return (8);
|
||||
case APIC_TDCR_16:
|
||||
return (16);
|
||||
case APIC_TDCR_32:
|
||||
return (32);
|
||||
case APIC_TDCR_64:
|
||||
return (64);
|
||||
case APIC_TDCR_128:
|
||||
return (128);
|
||||
default:
|
||||
panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_lvt; i++) {
|
||||
*lvts |= APIC_LVT_M;
|
||||
lvts += 4;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static inline void
|
||||
vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
|
||||
{
|
||||
printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
|
||||
*lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
|
||||
*lvt & APIC_LVTT_M);
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint64_t
|
||||
vlapic_get_ccr(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
return lapic->ccr_timer;
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_update_errors(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
lapic->esr = 0; // XXX
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_init_ipi(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
lapic->version = VLAPIC_VERSION;
|
||||
lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
|
||||
lapic->dfr = 0xffffffff;
|
||||
lapic->svr = APIC_SVR_VECTOR;
|
||||
vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_reset(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
|
||||
memset(lapic, 0, sizeof(*lapic));
|
||||
lapic->apr = vlapic->vcpuid;
|
||||
vlapic_init_ipi(vlapic);
|
||||
vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
|
||||
|
||||
if (vlapic->vcpuid == 0)
|
||||
vlapic->boot_state = BS_RUNNING; /* BSP */
|
||||
else
|
||||
vlapic->boot_state = BS_INIT; /* AP */
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_init(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
vdev_register_region(vlapic->ops, vlapic, vlapic->mmio);
|
||||
return vlapic_op_reset(dev);
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_halt(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
vdev_unregister_region(vlapic, vlapic->mmio);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_set_intr_ready(struct vlapic *vlapic, int vector)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *irrptr;
|
||||
int idx;
|
||||
|
||||
if (vector < 0 || vector >= 256)
|
||||
panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
|
||||
|
||||
idx = (vector / 32) * 4;
|
||||
irrptr = &lapic->irr0;
|
||||
atomic_set_int(&irrptr[idx], 1 << (vector % 32));
|
||||
VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
|
||||
{
|
||||
uint32_t icr_timer;
|
||||
|
||||
icr_timer = vlapic->apic.icr_timer;
|
||||
|
||||
vlapic->ccr_ticks = ticks;
|
||||
if (elapsed < icr_timer)
|
||||
vlapic->apic.ccr_timer = icr_timer - elapsed;
|
||||
else {
|
||||
/*
|
||||
* This can happen when the guest is trying to run its local
|
||||
* apic timer higher that the setting of 'hz' in the host.
|
||||
*
|
||||
* We deal with this by running the guest local apic timer
|
||||
* at the rate of the host's 'hz' setting.
|
||||
*/
|
||||
vlapic->apic.ccr_timer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static __inline uint32_t *
|
||||
vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
int i;
|
||||
|
||||
if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
|
||||
panic("vlapic_get_lvt: invalid LVT\n");
|
||||
}
|
||||
i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
|
||||
return ((&lapic->lvt_timer) + i);;
|
||||
}
|
||||
|
||||
#if 1
|
||||
static void
|
||||
dump_isrvec_stk(struct vlapic *vlapic)
|
||||
{
|
||||
int i;
|
||||
uint32_t *isrptr;
|
||||
|
||||
isrptr = &vlapic->apic.isr0;
|
||||
for (i = 0; i < 8; i++)
|
||||
printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
|
||||
|
||||
for (i = 0; i <= vlapic->isrvec_stk_top; i++)
|
||||
printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Algorithm adopted from section "Interrupt, Task and Processor Priority"
|
||||
* in Intel Architecture Manual Vol 3a.
|
||||
*/
|
||||
static void
|
||||
vlapic_update_ppr(struct vlapic *vlapic)
|
||||
{
|
||||
int isrvec, tpr, ppr;
|
||||
|
||||
/*
|
||||
* Note that the value on the stack at index 0 is always 0.
|
||||
*
|
||||
* This is a placeholder for the value of ISRV when none of the
|
||||
* bits is set in the ISRx registers.
|
||||
*/
|
||||
isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
|
||||
tpr = vlapic->apic.tpr;
|
||||
|
||||
#if 1
|
||||
{
|
||||
int i, lastprio, curprio, vector, idx;
|
||||
uint32_t *isrptr;
|
||||
|
||||
if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
|
||||
panic("isrvec_stk is corrupted: %d", isrvec);
|
||||
|
||||
/*
|
||||
* Make sure that the priority of the nested interrupts is
|
||||
* always increasing.
|
||||
*/
|
||||
lastprio = -1;
|
||||
for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
|
||||
curprio = PRIO(vlapic->isrvec_stk[i]);
|
||||
if (curprio <= lastprio) {
|
||||
dump_isrvec_stk(vlapic);
|
||||
panic("isrvec_stk does not satisfy invariant");
|
||||
}
|
||||
lastprio = curprio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that each bit set in the ISRx registers has a
|
||||
* corresponding entry on the isrvec stack.
|
||||
*/
|
||||
i = 1;
|
||||
isrptr = &vlapic->apic.isr0;
|
||||
for (vector = 0; vector < 256; vector++) {
|
||||
idx = (vector / 32) * 4;
|
||||
if (isrptr[idx] & (1 << (vector % 32))) {
|
||||
if (i > vlapic->isrvec_stk_top ||
|
||||
vlapic->isrvec_stk[i] != vector) {
|
||||
dump_isrvec_stk(vlapic);
|
||||
panic("ISR and isrvec_stk out of sync");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (PRIO(tpr) >= PRIO(isrvec))
|
||||
ppr = tpr;
|
||||
else
|
||||
ppr = isrvec & 0xf0;
|
||||
|
||||
vlapic->apic.ppr = ppr;
|
||||
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_process_eoi(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *isrptr;
|
||||
int i, idx, bitpos;
|
||||
|
||||
isrptr = &lapic->isr0;
|
||||
|
||||
/*
|
||||
* The x86 architecture reserves the the first 32 vectors for use
|
||||
* by the processor.
|
||||
*/
|
||||
for (i = 7; i > 0; i--) {
|
||||
idx = i * 4;
|
||||
bitpos = fls(isrptr[idx]);
|
||||
if (bitpos != 0) {
|
||||
if (vlapic->isrvec_stk_top <= 0) {
|
||||
panic("invalid vlapic isrvec_stk_top %d",
|
||||
vlapic->isrvec_stk_top);
|
||||
}
|
||||
isrptr[idx] &= ~(1 << (bitpos - 1));
|
||||
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
|
||||
vlapic->isrvec_stk_top--;
|
||||
vlapic_update_ppr(vlapic);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
|
||||
{
|
||||
return (*lvt & mask);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vlapic_periodic_timer(struct vlapic *vlapic)
|
||||
{
|
||||
uint32_t *lvt;
|
||||
|
||||
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
|
||||
|
||||
return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_fire_timer(struct vlapic *vlapic)
|
||||
{
|
||||
int vector;
|
||||
uint32_t *lvt;
|
||||
|
||||
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
|
||||
|
||||
if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
|
||||
vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
|
||||
vlapic_set_intr_ready(vlapic, vector);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
|
||||
{
|
||||
int i;
|
||||
cpuset_t dmask;
|
||||
uint32_t dest, vec, mode;
|
||||
struct vlapic *vlapic2;
|
||||
struct vm_exit *vmexit;
|
||||
|
||||
if (x2apic(vlapic))
|
||||
dest = icrval >> 32;
|
||||
else
|
||||
dest = icrval >> (32 + 24);
|
||||
vec = icrval & APIC_VECTOR_MASK;
|
||||
mode = icrval & APIC_DELMODE_MASK;
|
||||
|
||||
if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
|
||||
switch (icrval & APIC_DEST_MASK) {
|
||||
case APIC_DEST_DESTFLD:
|
||||
CPU_SETOF(dest, &dmask);
|
||||
break;
|
||||
case APIC_DEST_SELF:
|
||||
CPU_SETOF(vlapic->vcpuid, &dmask);
|
||||
break;
|
||||
case APIC_DEST_ALLISELF:
|
||||
dmask = vm_active_cpus(vlapic->vm);
|
||||
break;
|
||||
case APIC_DEST_ALLESELF:
|
||||
dmask = vm_active_cpus(vlapic->vm);
|
||||
CPU_CLR(vlapic->vcpuid, &dmask);
|
||||
break;
|
||||
}
|
||||
|
||||
while ((i = cpusetobj_ffs(&dmask)) != 0) {
|
||||
i--;
|
||||
CPU_CLR(i, &dmask);
|
||||
if (mode == APIC_DELMODE_FIXED)
|
||||
lapic_set_intr(vlapic->vm, i, vec);
|
||||
else
|
||||
vm_inject_nmi(vlapic->vm, i);
|
||||
}
|
||||
|
||||
return (0); /* handled completely in the kernel */
|
||||
}
|
||||
|
||||
if (mode == APIC_DELMODE_INIT) {
|
||||
if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
|
||||
return (0);
|
||||
|
||||
if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
|
||||
vlapic2 = vm_lapic(vlapic->vm, dest);
|
||||
|
||||
/* move from INIT to waiting-for-SIPI state */
|
||||
if (vlapic2->boot_state == BS_INIT) {
|
||||
vlapic2->boot_state = BS_SIPI;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == APIC_DELMODE_STARTUP) {
|
||||
if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
|
||||
vlapic2 = vm_lapic(vlapic->vm, dest);
|
||||
|
||||
/*
|
||||
* Ignore SIPIs in any state other than wait-for-SIPI
|
||||
*/
|
||||
if (vlapic2->boot_state != BS_SIPI)
|
||||
return (0);
|
||||
|
||||
vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
|
||||
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
|
||||
vmexit->u.spinup_ap.vcpu = dest;
|
||||
vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* XXX this assumes that the startup IPI always succeeds
|
||||
*/
|
||||
vlapic2->boot_state = BS_RUNNING;
|
||||
vm_activate_cpu(vlapic2->vm, dest);
|
||||
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This will cause a return to userland.
|
||||
*/
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_pending_intr(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
int idx, i, bitpos, vector;
|
||||
uint32_t *irrptr, val;
|
||||
|
||||
irrptr = &lapic->irr0;
|
||||
|
||||
/*
|
||||
* The x86 architecture reserves the the first 32 vectors for use
|
||||
* by the processor.
|
||||
*/
|
||||
for (i = 7; i > 0; i--) {
|
||||
idx = i * 4;
|
||||
val = atomic_load_acq_int(&irrptr[idx]);
|
||||
bitpos = fls(val);
|
||||
if (bitpos != 0) {
|
||||
vector = i * 32 + (bitpos - 1);
|
||||
if (PRIO(vector) > PRIO(lapic->ppr)) {
|
||||
VLAPIC_CTR1(vlapic, "pending intr %d", vector);
|
||||
return (vector);
|
||||
} else
|
||||
break;
|
||||
}
|
||||
}
|
||||
VLAPIC_CTR0(vlapic, "no pending intr");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_intr_accepted(struct vlapic *vlapic, int vector)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *irrptr, *isrptr;
|
||||
int idx, stk_top;
|
||||
|
||||
/*
|
||||
* clear the ready bit for vector being accepted in irr
|
||||
* and set the vector as in service in isr.
|
||||
*/
|
||||
idx = (vector / 32) * 4;
|
||||
|
||||
irrptr = &lapic->irr0;
|
||||
atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
|
||||
VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
|
||||
|
||||
isrptr = &lapic->isr0;
|
||||
isrptr[idx] |= 1 << (vector % 32);
|
||||
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
|
||||
|
||||
/*
|
||||
* Update the PPR
|
||||
*/
|
||||
vlapic->isrvec_stk_top++;
|
||||
|
||||
stk_top = vlapic->isrvec_stk_top;
|
||||
if (stk_top >= ISRVEC_STK_SIZE)
|
||||
panic("isrvec_stk_top overflow %d", stk_top);
|
||||
|
||||
vlapic->isrvec_stk[stk_top] = vector;
|
||||
vlapic_update_ppr(vlapic);
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint64_t offset = gpa & ~(PAGE_SIZE);
|
||||
uint32_t *reg;
|
||||
int i;
|
||||
|
||||
if (offset > sizeof(*lapic)) {
|
||||
*data = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
offset &= ~3;
|
||||
switch(offset)
|
||||
{
|
||||
case APIC_OFFSET_ID:
|
||||
if (x2apic(vlapic))
|
||||
*data = vlapic->vcpuid;
|
||||
else
|
||||
*data = vlapic->vcpuid << 24;
|
||||
break;
|
||||
case APIC_OFFSET_VER:
|
||||
*data = lapic->version;
|
||||
break;
|
||||
case APIC_OFFSET_TPR:
|
||||
*data = lapic->tpr;
|
||||
break;
|
||||
case APIC_OFFSET_APR:
|
||||
*data = lapic->apr;
|
||||
break;
|
||||
case APIC_OFFSET_PPR:
|
||||
*data = lapic->ppr;
|
||||
break;
|
||||
case APIC_OFFSET_EOI:
|
||||
*data = lapic->eoi;
|
||||
break;
|
||||
case APIC_OFFSET_LDR:
|
||||
*data = lapic->ldr;
|
||||
break;
|
||||
case APIC_OFFSET_DFR:
|
||||
*data = lapic->dfr;
|
||||
break;
|
||||
case APIC_OFFSET_SVR:
|
||||
*data = lapic->svr;
|
||||
break;
|
||||
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
|
||||
i = (offset - APIC_OFFSET_ISR0) >> 2;
|
||||
reg = &lapic->isr0;
|
||||
*data = *(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
|
||||
i = (offset - APIC_OFFSET_TMR0) >> 2;
|
||||
reg = &lapic->tmr0;
|
||||
*data = *(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
|
||||
i = (offset - APIC_OFFSET_IRR0) >> 2;
|
||||
reg = &lapic->irr0;
|
||||
*data = atomic_load_acq_int(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_ESR:
|
||||
*data = lapic->esr;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_LOW:
|
||||
*data = lapic->icr_lo;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_HI:
|
||||
*data = lapic->icr_hi;
|
||||
break;
|
||||
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
|
||||
reg = vlapic_get_lvt(vlapic, offset);
|
||||
*data = *(reg);
|
||||
break;
|
||||
case APIC_OFFSET_ICR:
|
||||
*data = lapic->icr_timer;
|
||||
break;
|
||||
case APIC_OFFSET_CCR:
|
||||
*data = vlapic_get_ccr(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_DCR:
|
||||
*data = lapic->dcr_timer;
|
||||
break;
|
||||
case APIC_OFFSET_RRR:
|
||||
default:
|
||||
*data = 0;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint64_t offset = gpa & ~(PAGE_SIZE);
|
||||
uint32_t *reg;
|
||||
int retval;
|
||||
|
||||
if (offset > sizeof(*lapic)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
retval = 0;
|
||||
offset &= ~3;
|
||||
switch(offset)
|
||||
{
|
||||
case APIC_OFFSET_ID:
|
||||
break;
|
||||
case APIC_OFFSET_TPR:
|
||||
lapic->tpr = data & 0xff;
|
||||
vlapic_update_ppr(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_EOI:
|
||||
vlapic_process_eoi(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_LDR:
|
||||
break;
|
||||
case APIC_OFFSET_DFR:
|
||||
break;
|
||||
case APIC_OFFSET_SVR:
|
||||
lapic->svr = data;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_LOW:
|
||||
if (!x2apic(vlapic)) {
|
||||
data &= 0xffffffff;
|
||||
data |= (uint64_t)lapic->icr_hi << 32;
|
||||
}
|
||||
retval = lapic_process_icr(vlapic, data);
|
||||
break;
|
||||
case APIC_OFFSET_ICR_HI:
|
||||
if (!x2apic(vlapic)) {
|
||||
retval = 0;
|
||||
lapic->icr_hi = data;
|
||||
}
|
||||
break;
|
||||
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
|
||||
reg = vlapic_get_lvt(vlapic, offset);
|
||||
if (!(lapic->svr & APIC_SVR_ENABLE)) {
|
||||
data |= APIC_LVT_M;
|
||||
}
|
||||
*reg = data;
|
||||
// vlapic_dump_lvt(offset, reg);
|
||||
break;
|
||||
case APIC_OFFSET_ICR:
|
||||
lapic->icr_timer = data;
|
||||
vlapic_start_timer(vlapic, 0);
|
||||
break;
|
||||
|
||||
case APIC_OFFSET_DCR:
|
||||
lapic->dcr_timer = data;
|
||||
vlapic->divisor = vlapic_timer_divisor(data);
|
||||
break;
|
||||
|
||||
case APIC_OFFSET_ESR:
|
||||
vlapic_update_errors(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_VER:
|
||||
case APIC_OFFSET_APR:
|
||||
case APIC_OFFSET_PPR:
|
||||
case APIC_OFFSET_RRR:
|
||||
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
|
||||
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
|
||||
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
|
||||
case APIC_OFFSET_CCR:
|
||||
default:
|
||||
// Read only.
|
||||
break;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_timer_tick(struct vlapic *vlapic)
|
||||
{
|
||||
int curticks, delta, periodic, fired;
|
||||
uint32_t ccr;
|
||||
uint32_t decrement, leftover;
|
||||
|
||||
restart:
|
||||
curticks = ticks;
|
||||
delta = curticks - vlapic->ccr_ticks;
|
||||
|
||||
/* Local APIC timer is disabled */
|
||||
if (vlapic->apic.icr_timer == 0)
|
||||
return (-1);
|
||||
|
||||
/* One-shot mode and timer has already counted down to zero */
|
||||
periodic = vlapic_periodic_timer(vlapic);
|
||||
if (!periodic && vlapic->apic.ccr_timer == 0)
|
||||
return (-1);
|
||||
/*
|
||||
* The 'curticks' and 'ccr_ticks' are out of sync by more than
|
||||
* 2^31 ticks. We deal with this by restarting the timer.
|
||||
*/
|
||||
if (delta < 0) {
|
||||
vlapic_start_timer(vlapic, 0);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
fired = 0;
|
||||
decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
|
||||
|
||||
vlapic->ccr_ticks = curticks;
|
||||
ccr = vlapic->apic.ccr_timer;
|
||||
|
||||
while (delta-- > 0) {
|
||||
if (ccr > decrement) {
|
||||
ccr -= decrement;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Trigger the local apic timer interrupt */
|
||||
vlapic_fire_timer(vlapic);
|
||||
if (periodic) {
|
||||
leftover = decrement - ccr;
|
||||
vlapic_start_timer(vlapic, leftover);
|
||||
ccr = vlapic->apic.ccr_timer;
|
||||
} else {
|
||||
/*
|
||||
* One-shot timer has counted down to zero.
|
||||
*/
|
||||
ccr = 0;
|
||||
}
|
||||
fired = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
vlapic->apic.ccr_timer = ccr;
|
||||
|
||||
if (!fired)
|
||||
return ((ccr / decrement) + 1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct vdev_ops vlapic_dev_ops = {
|
||||
.name = "vlapic",
|
||||
.init = vlapic_op_init,
|
||||
.reset = vlapic_op_reset,
|
||||
.halt = vlapic_op_halt,
|
||||
.memread = vlapic_op_mem_read,
|
||||
.memwrite = vlapic_op_mem_write,
|
||||
};
|
||||
static struct io_region vlapic_mmio[VM_MAXCPU];
|
||||
|
||||
struct vlapic *
|
||||
vlapic_init(struct vm *vm, int vcpuid)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
|
||||
vlapic->vm = vm;
|
||||
vlapic->vcpuid = vcpuid;
|
||||
|
||||
vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
|
||||
|
||||
if (vcpuid == 0)
|
||||
vlapic->msr_apicbase |= APICBASE_BSP;
|
||||
|
||||
vlapic->ops = &vlapic_dev_ops;
|
||||
|
||||
vlapic->mmio = vlapic_mmio + vcpuid;
|
||||
vlapic->mmio->base = DEFAULT_APIC_BASE;
|
||||
vlapic->mmio->len = PAGE_SIZE;
|
||||
vlapic->mmio->attr = MMIO_READ|MMIO_WRITE;
|
||||
vlapic->mmio->vcpu = vcpuid;
|
||||
|
||||
vdev_register(&vlapic_dev_ops, vlapic);
|
||||
|
||||
vlapic_op_init(vlapic);
|
||||
|
||||
return (vlapic);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_cleanup(struct vlapic *vlapic)
|
||||
{
|
||||
vlapic_op_halt(vlapic);
|
||||
vdev_unregister(vlapic);
|
||||
free(vlapic, M_VLAPIC);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vlapic_get_apicbase(struct vlapic *vlapic)
|
||||
{
|
||||
|
||||
return (vlapic->msr_apicbase);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
|
||||
{
|
||||
int err;
|
||||
enum x2apic_state state;
|
||||
|
||||
err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
|
||||
if (err)
|
||||
panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
|
||||
|
||||
if (state == X2APIC_DISABLED)
|
||||
val &= ~APICBASE_X2APIC;
|
||||
|
||||
vlapic->msr_apicbase = val;
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, vcpuid);
|
||||
|
||||
if (state == X2APIC_DISABLED)
|
||||
vlapic->msr_apicbase &= ~APICBASE_X2APIC;
|
||||
}
|
111
sys/amd64/vmm/io/vlapic.h
Normal file
111
sys/amd64/vmm/io/vlapic.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VLAPIC_H_
|
||||
#define _VLAPIC_H_
|
||||
|
||||
#include "vdev.h"
|
||||
|
||||
struct vm;
|
||||
|
||||
/*
|
||||
* Map of APIC Registers: Offset Description Access
|
||||
*/
|
||||
#define APIC_OFFSET_ID 0x20 // Local APIC ID R/W
|
||||
#define APIC_OFFSET_VER 0x30 // Local APIC Version R
|
||||
#define APIC_OFFSET_TPR 0x80 // Task Priority Register R/W
|
||||
#define APIC_OFFSET_APR 0x90 // Arbitration Priority Register R
|
||||
#define APIC_OFFSET_PPR 0xA0 // Processor Priority Register R
|
||||
#define APIC_OFFSET_EOI 0xB0 // EOI Register W
|
||||
#define APIC_OFFSET_RRR 0xC0 // Remote read R
|
||||
#define APIC_OFFSET_LDR 0xD0 // Logical Destination R/W
|
||||
#define APIC_OFFSET_DFR 0xE0 // Destination Format Register 0..27 R; 28..31 R/W
|
||||
#define APIC_OFFSET_SVR 0xF0 // Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W
|
||||
#define APIC_OFFSET_ISR0 0x100 // ISR 000-031 R
|
||||
#define APIC_OFFSET_ISR1 0x110 // ISR 032-063 R
|
||||
#define APIC_OFFSET_ISR2 0x120 // ISR 064-095 R
|
||||
#define APIC_OFFSET_ISR3 0x130 // ISR 095-128 R
|
||||
#define APIC_OFFSET_ISR4 0x140 // ISR 128-159 R
|
||||
#define APIC_OFFSET_ISR5 0x150 // ISR 160-191 R
|
||||
#define APIC_OFFSET_ISR6 0x160 // ISR 192-223 R
|
||||
#define APIC_OFFSET_ISR7 0x170 // ISR 224-255 R
|
||||
#define APIC_OFFSET_TMR0 0x180 // TMR 000-031 R
|
||||
#define APIC_OFFSET_TMR1 0x190 // TMR 032-063 R
|
||||
#define APIC_OFFSET_TMR2 0x1A0 // TMR 064-095 R
|
||||
#define APIC_OFFSET_TMR3 0x1B0 // TMR 095-128 R
|
||||
#define APIC_OFFSET_TMR4 0x1C0 // TMR 128-159 R
|
||||
#define APIC_OFFSET_TMR5 0x1D0 // TMR 160-191 R
|
||||
#define APIC_OFFSET_TMR6 0x1E0 // TMR 192-223 R
|
||||
#define APIC_OFFSET_TMR7 0x1F0 // TMR 224-255 R
|
||||
#define APIC_OFFSET_IRR0 0x200 // IRR 000-031 R
|
||||
#define APIC_OFFSET_IRR1 0x210 // IRR 032-063 R
|
||||
#define APIC_OFFSET_IRR2 0x220 // IRR 064-095 R
|
||||
#define APIC_OFFSET_IRR3 0x230 // IRR 095-128 R
|
||||
#define APIC_OFFSET_IRR4 0x240 // IRR 128-159 R
|
||||
#define APIC_OFFSET_IRR5 0x250 // IRR 160-191 R
|
||||
#define APIC_OFFSET_IRR6 0x260 // IRR 192-223 R
|
||||
#define APIC_OFFSET_IRR7 0x270 // IRR 224-255 R
|
||||
#define APIC_OFFSET_ESR 0x280 // Error Status Register R
|
||||
#define APIC_OFFSET_ICR_LOW 0x300 // Interrupt Command Reg. (0-31) R/W
|
||||
#define APIC_OFFSET_ICR_HI 0x310 // Interrupt Command Reg. (32-63) R/W
|
||||
#define APIC_OFFSET_TIMER_LVT 0x320 // Local Vector Table (Timer) R/W
|
||||
#define APIC_OFFSET_THERM_LVT 0x330 // Local Vector Table (Thermal) R/W (PIV+)
|
||||
#define APIC_OFFSET_PERF_LVT 0x340 // Local Vector Table (Performance) R/W (P6+)
|
||||
#define APIC_OFFSET_LINT0_LVT 0x350 // Local Vector Table (LINT0) R/W
|
||||
#define APIC_OFFSET_LINT1_LVT 0x360 // Local Vector Table (LINT1) R/W
|
||||
#define APIC_OFFSET_ERROR_LVT 0x370 // Local Vector Table (ERROR) R/W
|
||||
#define APIC_OFFSET_ICR 0x380 // Initial Count Reg. for Timer R/W
|
||||
#define APIC_OFFSET_CCR 0x390 // Current Count of Timer R
|
||||
#define APIC_OFFSET_DCR 0x3E0 // Timer Divide Configuration Reg. R/W
|
||||
|
||||
/*
|
||||
* 16 priority levels with at most one vector injected per level.
|
||||
*/
|
||||
#define ISRVEC_STK_SIZE (16 + 1)
|
||||
|
||||
enum x2apic_state;
|
||||
|
||||
struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
|
||||
void vlapic_cleanup(struct vlapic *vlapic);
|
||||
|
||||
int vlapic_op_mem_write(void* dev, uint64_t gpa,
|
||||
opsize_t size, uint64_t data);
|
||||
|
||||
int vlapic_op_mem_read(void* dev, uint64_t gpa,
|
||||
opsize_t size, uint64_t *data);
|
||||
|
||||
int vlapic_pending_intr(struct vlapic *vlapic);
|
||||
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
|
||||
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector);
|
||||
int vlapic_timer_tick(struct vlapic *vlapic);
|
||||
|
||||
uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
|
||||
void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
|
||||
void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
|
||||
|
||||
#endif /* _VLAPIC_H_ */
|
1022
sys/amd64/vmm/vmm.c
Normal file
1022
sys/amd64/vmm/vmm.c
Normal file
File diff suppressed because it is too large
Load Diff
538
sys/amd64/vmm/vmm_dev.c
Normal file
538
sys/amd64/vmm/vmm_dev.c
Normal file
@ -0,0 +1,538 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_stat.h"
|
||||
#include "vmm_mem.h"
|
||||
#include "io/ppt.h"
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
struct vmmdev_softc {
|
||||
struct vm *vm; /* vm instance cookie */
|
||||
struct cdev *cdev;
|
||||
SLIST_ENTRY(vmmdev_softc) link;
|
||||
};
|
||||
static SLIST_HEAD(, vmmdev_softc) head;
|
||||
|
||||
static struct mtx vmmdev_mtx;
|
||||
|
||||
static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
|
||||
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
|
||||
static struct vmmdev_softc *
|
||||
vmmdev_lookup(const char *name)
|
||||
{
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
#ifdef notyet /* XXX kernel is not compiled with invariants */
|
||||
mtx_assert(&vmmdev_mtx, MA_OWNED);
|
||||
#endif
|
||||
|
||||
SLIST_FOREACH(sc, &head, link) {
|
||||
if (strcmp(name, vm_name(sc->vm)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return (sc);
|
||||
}
|
||||
|
||||
static struct vmmdev_softc *
|
||||
vmmdev_lookup2(struct cdev *cdev)
|
||||
{
|
||||
|
||||
return (cdev->si_drv1);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
|
||||
{
|
||||
int error, off, c;
|
||||
vm_paddr_t hpa, gpa;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
static char zerobuf[PAGE_SIZE];
|
||||
|
||||
error = 0;
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc == NULL)
|
||||
error = ENXIO;
|
||||
|
||||
while (uio->uio_resid > 0 && error == 0) {
|
||||
gpa = uio->uio_offset;
|
||||
off = gpa & PAGE_MASK;
|
||||
c = min(uio->uio_resid, PAGE_SIZE - off);
|
||||
|
||||
/*
|
||||
* The VM has a hole in its physical memory map. If we want to
|
||||
* use 'dd' to inspect memory beyond the hole we need to
|
||||
* provide bogus data for memory that lies in the hole.
|
||||
*
|
||||
* Since this device does not support lseek(2), dd(1) will
|
||||
* read(2) blocks of data to simulate the lseek(2).
|
||||
*/
|
||||
hpa = vm_gpa2hpa(sc->vm, gpa, c);
|
||||
if (hpa == (vm_paddr_t)-1) {
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
error = uiomove(zerobuf, c, uio);
|
||||
else
|
||||
error = EFAULT;
|
||||
} else
|
||||
error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
|
||||
}
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
struct thread *td)
|
||||
{
|
||||
int error, vcpu, state_changed;
|
||||
enum vcpu_state new_state;
|
||||
struct vmmdev_softc *sc;
|
||||
struct vm_memory_segment *seg;
|
||||
struct vm_register *vmreg;
|
||||
struct vm_seg_desc* vmsegdesc;
|
||||
struct vm_pin *vmpin;
|
||||
struct vm_run *vmrun;
|
||||
struct vm_event *vmevent;
|
||||
struct vm_lapic_irq *vmirq;
|
||||
struct vm_capability *vmcap;
|
||||
struct vm_pptdev *pptdev;
|
||||
struct vm_pptdev_mmio *pptmmio;
|
||||
struct vm_pptdev_msi *pptmsi;
|
||||
struct vm_pptdev_msix *pptmsix;
|
||||
struct vm_nmi *vmnmi;
|
||||
struct vm_stats *vmstats;
|
||||
struct vm_stat_desc *statdesc;
|
||||
struct vm_x2apic *x2apic;
|
||||
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
vcpu = -1;
|
||||
state_changed = 0;
|
||||
|
||||
/*
|
||||
* Some VMM ioctls can operate only on vcpus that are not running.
|
||||
*/
|
||||
switch (cmd) {
|
||||
case VM_RUN:
|
||||
case VM_SET_PINNING:
|
||||
case VM_GET_REGISTER:
|
||||
case VM_SET_REGISTER:
|
||||
case VM_GET_SEGMENT_DESCRIPTOR:
|
||||
case VM_SET_SEGMENT_DESCRIPTOR:
|
||||
case VM_INJECT_EVENT:
|
||||
case VM_GET_CAPABILITY:
|
||||
case VM_SET_CAPABILITY:
|
||||
case VM_PPTDEV_MSI:
|
||||
case VM_PPTDEV_MSIX:
|
||||
case VM_SET_X2APIC_STATE:
|
||||
/*
|
||||
* XXX fragile, handle with care
|
||||
* Assumes that the first field of the ioctl data is the vcpu.
|
||||
*/
|
||||
vcpu = *(int *)data;
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
|
||||
error = EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (cmd == VM_RUN)
|
||||
new_state = VCPU_RUNNING;
|
||||
else
|
||||
new_state = VCPU_CANNOT_RUN;
|
||||
|
||||
error = vcpu_set_state(sc->vm, vcpu, new_state);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
state_changed = 1;
|
||||
break;
|
||||
|
||||
case VM_MAP_PPTDEV_MMIO:
|
||||
case VM_BIND_PPTDEV:
|
||||
case VM_UNBIND_PPTDEV:
|
||||
case VM_MAP_MEMORY:
|
||||
/*
|
||||
* ioctls that operate on the entire virtual machine must
|
||||
* prevent all vcpus from running.
|
||||
*/
|
||||
error = 0;
|
||||
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
|
||||
error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
while (--vcpu >= 0)
|
||||
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
|
||||
goto done;
|
||||
}
|
||||
|
||||
state_changed = 2;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch(cmd) {
|
||||
case VM_RUN:
|
||||
vmrun = (struct vm_run *)data;
|
||||
error = vm_run(sc->vm, vmrun);
|
||||
break;
|
||||
case VM_STAT_DESC: {
|
||||
const char *desc;
|
||||
statdesc = (struct vm_stat_desc *)data;
|
||||
desc = vmm_stat_desc(statdesc->index);
|
||||
if (desc != NULL) {
|
||||
error = 0;
|
||||
strlcpy(statdesc->desc, desc, sizeof(statdesc->desc));
|
||||
} else
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
case VM_STATS: {
|
||||
CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES);
|
||||
vmstats = (struct vm_stats *)data;
|
||||
getmicrotime(&vmstats->tv);
|
||||
error = vmm_stat_copy(sc->vm, vmstats->cpuid,
|
||||
&vmstats->num_entries, vmstats->statbuf);
|
||||
break;
|
||||
}
|
||||
case VM_PPTDEV_MSI:
|
||||
pptmsi = (struct vm_pptdev_msi *)data;
|
||||
error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
|
||||
pptmsi->bus, pptmsi->slot, pptmsi->func,
|
||||
pptmsi->destcpu, pptmsi->vector,
|
||||
pptmsi->numvec);
|
||||
break;
|
||||
case VM_PPTDEV_MSIX:
|
||||
pptmsix = (struct vm_pptdev_msix *)data;
|
||||
error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
|
||||
pptmsix->bus, pptmsix->slot,
|
||||
pptmsix->func, pptmsix->idx,
|
||||
pptmsix->msg, pptmsix->vector_control,
|
||||
pptmsix->addr);
|
||||
break;
|
||||
case VM_MAP_PPTDEV_MMIO:
|
||||
pptmmio = (struct vm_pptdev_mmio *)data;
|
||||
error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
|
||||
pptmmio->func, pptmmio->gpa, pptmmio->len,
|
||||
pptmmio->hpa);
|
||||
break;
|
||||
case VM_BIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_UNBIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_INJECT_EVENT:
|
||||
vmevent = (struct vm_event *)data;
|
||||
error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
|
||||
vmevent->vector,
|
||||
vmevent->error_code,
|
||||
vmevent->error_code_valid);
|
||||
break;
|
||||
case VM_INJECT_NMI:
|
||||
vmnmi = (struct vm_nmi *)data;
|
||||
error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
|
||||
break;
|
||||
case VM_LAPIC_IRQ:
|
||||
vmirq = (struct vm_lapic_irq *)data;
|
||||
error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
|
||||
break;
|
||||
case VM_SET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_GET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
&vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_MAP_MEMORY:
|
||||
seg = (struct vm_memory_segment *)data;
|
||||
error = vm_malloc(sc->vm, seg->gpa, seg->len);
|
||||
break;
|
||||
case VM_GET_MEMORY_SEG:
|
||||
seg = (struct vm_memory_segment *)data;
|
||||
seg->len = 0;
|
||||
(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
|
||||
error = 0;
|
||||
break;
|
||||
case VM_GET_REGISTER:
|
||||
vmreg = (struct vm_register *)data;
|
||||
error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
|
||||
&vmreg->regval);
|
||||
break;
|
||||
case VM_SET_REGISTER:
|
||||
vmreg = (struct vm_register *)data;
|
||||
error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
|
||||
vmreg->regval);
|
||||
break;
|
||||
case VM_SET_SEGMENT_DESCRIPTOR:
|
||||
vmsegdesc = (struct vm_seg_desc *)data;
|
||||
error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
|
||||
vmsegdesc->regnum,
|
||||
&vmsegdesc->desc);
|
||||
break;
|
||||
case VM_GET_SEGMENT_DESCRIPTOR:
|
||||
vmsegdesc = (struct vm_seg_desc *)data;
|
||||
error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
|
||||
vmsegdesc->regnum,
|
||||
&vmsegdesc->desc);
|
||||
break;
|
||||
case VM_GET_CAPABILITY:
|
||||
vmcap = (struct vm_capability *)data;
|
||||
error = vm_get_capability(sc->vm, vmcap->cpuid,
|
||||
vmcap->captype,
|
||||
&vmcap->capval);
|
||||
break;
|
||||
case VM_SET_CAPABILITY:
|
||||
vmcap = (struct vm_capability *)data;
|
||||
error = vm_set_capability(sc->vm, vmcap->cpuid,
|
||||
vmcap->captype,
|
||||
vmcap->capval);
|
||||
break;
|
||||
case VM_SET_X2APIC_STATE:
|
||||
x2apic = (struct vm_x2apic *)data;
|
||||
error = vm_set_x2apic_state(sc->vm,
|
||||
x2apic->cpuid, x2apic->state);
|
||||
break;
|
||||
case VM_GET_X2APIC_STATE:
|
||||
x2apic = (struct vm_x2apic *)data;
|
||||
error = vm_get_x2apic_state(sc->vm,
|
||||
x2apic->cpuid, &x2apic->state);
|
||||
break;
|
||||
default:
|
||||
error = ENOTTY;
|
||||
break;
|
||||
}
|
||||
|
||||
if (state_changed == 1) {
|
||||
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
|
||||
} else if (state_changed == 2) {
|
||||
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
|
||||
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
|
||||
}
|
||||
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
int nprot, vm_memattr_t *memattr)
|
||||
{
|
||||
int error;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
error = -1;
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc != NULL && (nprot & PROT_EXEC) == 0) {
|
||||
*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
|
||||
if (*paddr != (vm_paddr_t)-1)
|
||||
error = 0;
|
||||
}
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
vmmdev_destroy(struct vmmdev_softc *sc, boolean_t unlink)
|
||||
{
|
||||
|
||||
/*
|
||||
* XXX must stop virtual machine instances that may be still
|
||||
* running and cleanup their state.
|
||||
*/
|
||||
if (sc->cdev)
|
||||
destroy_dev(sc->cdev);
|
||||
|
||||
if (sc->vm)
|
||||
vm_destroy(sc->vm);
|
||||
|
||||
if (unlink) {
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
SLIST_REMOVE(&head, sc, vmmdev_softc, link);
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
}
|
||||
|
||||
free(sc, M_VMMDEV);
|
||||
}
|
||||
|
||||
static int
|
||||
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
char buf[VM_MAX_NAMELEN];
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
strlcpy(buf, "beavis", sizeof(buf));
|
||||
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
|
||||
if (error != 0 || req->newptr == NULL)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* XXX TODO if any process has this device open then fail
|
||||
*/
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup(buf);
|
||||
if (sc == NULL) {
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
sc->cdev->si_drv1 = NULL;
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
vmmdev_destroy(sc, TRUE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
|
||||
NULL, 0, sysctl_vmm_destroy, "A", NULL);
|
||||
|
||||
static struct cdevsw vmmdevsw = {
|
||||
.d_name = "vmmdev",
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = vmmdev_ioctl,
|
||||
.d_mmap = vmmdev_mmap,
|
||||
.d_read = vmmdev_rw,
|
||||
.d_write = vmmdev_rw,
|
||||
};
|
||||
|
||||
static int
|
||||
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
struct vm *vm;
|
||||
struct vmmdev_softc *sc, *sc2;
|
||||
char buf[VM_MAX_NAMELEN];
|
||||
|
||||
strlcpy(buf, "beavis", sizeof(buf));
|
||||
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
|
||||
if (error != 0 || req->newptr == NULL)
|
||||
return (error);
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup(buf);
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
if (sc != NULL)
|
||||
return (EEXIST);
|
||||
|
||||
vm = vm_create(buf);
|
||||
if (vm == NULL)
|
||||
return (EINVAL);
|
||||
|
||||
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
|
||||
sc->vm = vm;
|
||||
|
||||
/*
|
||||
* Lookup the name again just in case somebody sneaked in when we
|
||||
* dropped the lock.
|
||||
*/
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc2 = vmmdev_lookup(buf);
|
||||
if (sc2 == NULL)
|
||||
SLIST_INSERT_HEAD(&head, sc, link);
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
if (sc2 != NULL) {
|
||||
vmmdev_destroy(sc, FALSE);
|
||||
return (EEXIST);
|
||||
}
|
||||
|
||||
sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
|
||||
"vmm/%s", buf);
|
||||
sc->cdev->si_drv1 = sc;
|
||||
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
|
||||
NULL, 0, sysctl_vmm_create, "A", NULL);
|
||||
|
||||
void
|
||||
vmmdev_init(void)
|
||||
{
|
||||
mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
|
||||
}
|
||||
|
||||
int
|
||||
vmmdev_cleanup(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (SLIST_EMPTY(&head))
|
||||
error = 0;
|
||||
else
|
||||
error = EBUSY;
|
||||
|
||||
return (error);
|
||||
}
|
124
sys/amd64/vmm/vmm_host.c
Normal file
124
sys/amd64/vmm/vmm_host.c
Normal file
@ -0,0 +1,124 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/pcpu.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include "vmm_host.h"
|
||||
|
||||
static uint64_t vmm_host_efer, vmm_host_pat, vmm_host_cr0, vmm_host_cr4;
|
||||
|
||||
void
|
||||
vmm_host_state_init(void)
|
||||
{
|
||||
|
||||
vmm_host_efer = rdmsr(MSR_EFER);
|
||||
vmm_host_pat = rdmsr(MSR_PAT);
|
||||
|
||||
/*
|
||||
* We always want CR0.TS to be set when the processor does a VM exit.
|
||||
*
|
||||
* With emulation turned on unconditionally after a VM exit, we are
|
||||
* able to trap inadvertent use of the FPU until the guest FPU state
|
||||
* has been safely squirreled away.
|
||||
*/
|
||||
vmm_host_cr0 = rcr0() | CR0_TS;
|
||||
|
||||
vmm_host_cr4 = rcr4();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_pat(void)
|
||||
{
|
||||
|
||||
return (vmm_host_pat);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_efer(void)
|
||||
{
|
||||
|
||||
return (vmm_host_efer);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_cr0(void)
|
||||
{
|
||||
|
||||
return (vmm_host_cr0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_cr4(void)
|
||||
{
|
||||
|
||||
return (vmm_host_cr4);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_datasel(void)
|
||||
{
|
||||
|
||||
return (GSEL(GDATA_SEL, SEL_KPL));
|
||||
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_codesel(void)
|
||||
{
|
||||
|
||||
return (GSEL(GCODE_SEL, SEL_KPL));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_tsssel(void)
|
||||
{
|
||||
|
||||
return (GSEL(GPROC0_SEL, SEL_KPL));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_fsbase(void)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmm_get_host_idtrbase(void)
|
||||
{
|
||||
|
||||
return (r_idt.rd_base);
|
||||
}
|
75
sys/amd64/vmm/vmm_host.h
Normal file
75
sys/amd64/vmm/vmm_host.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_HOST_H_
|
||||
#define _VMM_HOST_H_
|
||||
|
||||
#ifndef _KERNEL
|
||||
#error "no user-servicable parts inside"
|
||||
#endif
|
||||
|
||||
void vmm_host_state_init(void);
|
||||
|
||||
uint64_t vmm_get_host_pat(void);
|
||||
uint64_t vmm_get_host_efer(void);
|
||||
uint64_t vmm_get_host_cr0(void);
|
||||
uint64_t vmm_get_host_cr4(void);
|
||||
uint64_t vmm_get_host_datasel(void);
|
||||
uint64_t vmm_get_host_codesel(void);
|
||||
uint64_t vmm_get_host_tsssel(void);
|
||||
uint64_t vmm_get_host_fsbase(void);
|
||||
uint64_t vmm_get_host_idtrbase(void);
|
||||
|
||||
/*
|
||||
* Inline access to host state that is used on every VM entry
|
||||
*/
|
||||
static __inline uint64_t
|
||||
vmm_get_host_trbase(void)
|
||||
{
|
||||
|
||||
return ((uint64_t)PCPU_GET(tssp));
|
||||
}
|
||||
|
||||
static __inline uint64_t
|
||||
vmm_get_host_gdtrbase(void)
|
||||
{
|
||||
|
||||
return ((uint64_t)&gdt[NGDT * curcpu]);
|
||||
}
|
||||
|
||||
struct pcpu;
|
||||
extern struct pcpu __pcpu[];
|
||||
|
||||
static __inline uint64_t
|
||||
vmm_get_host_gsbase(void)
|
||||
{
|
||||
|
||||
return ((uint64_t)&__pcpu[curcpu]);
|
||||
}
|
||||
|
||||
#endif
|
810
sys/amd64/vmm/vmm_instruction_emul.c
Normal file
810
sys/amd64/vmm/vmm_instruction_emul.c
Normal file
@ -0,0 +1,810 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 Sandvine, Inc.
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/param.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/vmm.h>
|
||||
#else /* !_KERNEL */
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
#endif /* _KERNEL */
|
||||
|
||||
|
||||
|
||||
/* struct vie_op.op_type */
|
||||
enum {
|
||||
VIE_OP_TYPE_NONE = 0,
|
||||
VIE_OP_TYPE_MOV,
|
||||
VIE_OP_TYPE_AND,
|
||||
VIE_OP_TYPE_LAST
|
||||
};
|
||||
|
||||
/* struct vie_op.op_flags */
|
||||
#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */
|
||||
#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
|
||||
|
||||
static const struct vie_op one_byte_opcodes[256] = {
|
||||
[0x89] = {
|
||||
.op_byte = 0x89,
|
||||
.op_type = VIE_OP_TYPE_MOV,
|
||||
},
|
||||
[0x8B] = {
|
||||
.op_byte = 0x8B,
|
||||
.op_type = VIE_OP_TYPE_MOV,
|
||||
},
|
||||
[0xC7] = {
|
||||
.op_byte = 0xC7,
|
||||
.op_type = VIE_OP_TYPE_MOV,
|
||||
.op_flags = VIE_OP_F_IMM,
|
||||
},
|
||||
[0x23] = {
|
||||
.op_byte = 0x23,
|
||||
.op_type = VIE_OP_TYPE_AND,
|
||||
},
|
||||
[0x81] = {
|
||||
/* XXX Group 1 extended opcode - not just AND */
|
||||
.op_byte = 0x81,
|
||||
.op_type = VIE_OP_TYPE_AND,
|
||||
.op_flags = VIE_OP_F_IMM,
|
||||
}
|
||||
};
|
||||
|
||||
/* struct vie.mod */
|
||||
#define VIE_MOD_INDIRECT 0
|
||||
#define VIE_MOD_INDIRECT_DISP8 1
|
||||
#define VIE_MOD_INDIRECT_DISP32 2
|
||||
#define VIE_MOD_DIRECT 3
|
||||
|
||||
/* struct vie.rm */
|
||||
#define VIE_RM_SIB 4
|
||||
#define VIE_RM_DISP32 5
|
||||
|
||||
#define GB (1024 * 1024 * 1024)
|
||||
|
||||
static enum vm_reg_name gpr_map[16] = {
|
||||
VM_REG_GUEST_RAX,
|
||||
VM_REG_GUEST_RCX,
|
||||
VM_REG_GUEST_RDX,
|
||||
VM_REG_GUEST_RBX,
|
||||
VM_REG_GUEST_RSP,
|
||||
VM_REG_GUEST_RBP,
|
||||
VM_REG_GUEST_RSI,
|
||||
VM_REG_GUEST_RDI,
|
||||
VM_REG_GUEST_R8,
|
||||
VM_REG_GUEST_R9,
|
||||
VM_REG_GUEST_R10,
|
||||
VM_REG_GUEST_R11,
|
||||
VM_REG_GUEST_R12,
|
||||
VM_REG_GUEST_R13,
|
||||
VM_REG_GUEST_R14,
|
||||
VM_REG_GUEST_R15
|
||||
};
|
||||
|
||||
static uint64_t size2mask[] = {
|
||||
[1] = 0xff,
|
||||
[2] = 0xffff,
|
||||
[4] = 0xffffffff,
|
||||
[8] = 0xffffffffffffffff,
|
||||
};
|
||||
|
||||
static int
|
||||
vie_valid_register(enum vm_reg_name reg)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* XXX
|
||||
* The operand register in which we store the result of the
|
||||
* read must be a GPR that we can modify even if the vcpu
|
||||
* is "running". All the GPRs qualify except for %rsp.
|
||||
*
|
||||
* This is a limitation of the vm_set_register() API
|
||||
* and can be fixed if necessary.
|
||||
*/
|
||||
if (reg == VM_REG_GUEST_RSP)
|
||||
return (0);
|
||||
#endif
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!vie_valid_register(reg))
|
||||
return (EINVAL);
|
||||
|
||||
error = vm_get_register(vm, vcpuid, reg, rval);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
|
||||
uint64_t val, int size)
|
||||
{
|
||||
int error;
|
||||
uint64_t origval;
|
||||
|
||||
if (!vie_valid_register(reg))
|
||||
return (EINVAL);
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 2:
|
||||
error = vie_read_register(vm, vcpuid, reg, &origval);
|
||||
if (error)
|
||||
return (error);
|
||||
val &= size2mask[size];
|
||||
val |= origval & ~size2mask[size];
|
||||
break;
|
||||
case 4:
|
||||
val &= 0xffffffffUL;
|
||||
break;
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
error = vm_set_register(vm, vcpuid, reg, val);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following simplifying assumptions are made during emulation:
|
||||
*
|
||||
* - guest is in 64-bit mode
|
||||
* - default address size is 64-bits
|
||||
* - default operand size is 32-bits
|
||||
*
|
||||
* - operand size override is not supported
|
||||
*
|
||||
* - address size override is not supported
|
||||
*/
|
||||
static int
|
||||
emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
|
||||
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
|
||||
{
|
||||
int error, size;
|
||||
enum vm_reg_name reg;
|
||||
uint64_t val;
|
||||
|
||||
size = 4;
|
||||
error = EINVAL;
|
||||
|
||||
switch (vie->op.op_byte) {
|
||||
case 0x89:
|
||||
/*
|
||||
* MOV from reg (ModRM:reg) to mem (ModRM:r/m)
|
||||
* 89/r: mov r/m32, r32
|
||||
* REX.W + 89/r mov r/m64, r64
|
||||
*/
|
||||
if (vie->rex_w)
|
||||
size = 8;
|
||||
reg = gpr_map[vie->reg];
|
||||
error = vie_read_register(vm, vcpuid, reg, &val);
|
||||
if (error == 0) {
|
||||
val &= size2mask[size];
|
||||
error = memwrite(vm, vcpuid, gpa, val, size, arg);
|
||||
}
|
||||
break;
|
||||
case 0x8B:
|
||||
/*
|
||||
* MOV from mem (ModRM:r/m) to reg (ModRM:reg)
|
||||
* 8B/r: mov r32, r/m32
|
||||
* REX.W 8B/r: mov r64, r/m64
|
||||
*/
|
||||
if (vie->rex_w)
|
||||
size = 8;
|
||||
error = memread(vm, vcpuid, gpa, &val, size, arg);
|
||||
if (error == 0) {
|
||||
reg = gpr_map[vie->reg];
|
||||
error = vie_update_register(vm, vcpuid, reg, val, size);
|
||||
}
|
||||
break;
|
||||
case 0xC7:
|
||||
/*
|
||||
* MOV from imm32 to mem (ModRM:r/m)
|
||||
* C7/0 mov r/m32, imm32
|
||||
* REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
|
||||
*/
|
||||
val = vie->immediate; /* already sign-extended */
|
||||
|
||||
if (vie->rex_w)
|
||||
size = 8;
|
||||
|
||||
if (size != 8)
|
||||
val &= size2mask[size];
|
||||
|
||||
error = memwrite(vm, vcpuid, gpa, val, size, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
|
||||
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
|
||||
{
|
||||
int error, size;
|
||||
enum vm_reg_name reg;
|
||||
uint64_t val1, val2;
|
||||
|
||||
size = 4;
|
||||
error = EINVAL;
|
||||
|
||||
switch (vie->op.op_byte) {
|
||||
case 0x23:
|
||||
/*
|
||||
* AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
|
||||
* result in reg.
|
||||
*
|
||||
* 23/r and r32, r/m32
|
||||
* REX.W + 23/r and r64, r/m64
|
||||
*/
|
||||
if (vie->rex_w)
|
||||
size = 8;
|
||||
|
||||
/* get the first operand */
|
||||
reg = gpr_map[vie->reg];
|
||||
error = vie_read_register(vm, vcpuid, reg, &val1);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/* get the second operand */
|
||||
error = memread(vm, vcpuid, gpa, &val2, size, arg);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/* perform the operation and write the result */
|
||||
val1 &= val2;
|
||||
error = vie_update_register(vm, vcpuid, reg, val1, size);
|
||||
break;
|
||||
case 0x81:
|
||||
/*
|
||||
* AND reg (ModRM:reg) with immediate and store the
|
||||
* result in reg
|
||||
*
|
||||
* 81/ and r/m32, imm32
|
||||
* REX.W + 81/ and r/m64, imm32 sign-extended to 64
|
||||
*
|
||||
* Currently, only the AND operation of the 0x81 opcode
|
||||
* is implemented (ModRM:reg = b100).
|
||||
*/
|
||||
if ((vie->reg & 7) != 4)
|
||||
break;
|
||||
|
||||
if (vie->rex_w)
|
||||
size = 8;
|
||||
|
||||
/* get the first operand */
|
||||
error = memread(vm, vcpuid, gpa, &val1, size, arg);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/*
|
||||
* perform the operation with the pre-fetched immediate
|
||||
* operand and write the result
|
||||
*/
|
||||
val1 &= vie->immediate;
|
||||
error = memwrite(vm, vcpuid, gpa, val1, size, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
|
||||
mem_region_read_t memread, mem_region_write_t memwrite,
|
||||
void *memarg)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!vie->decoded)
|
||||
return (EINVAL);
|
||||
|
||||
switch (vie->op.op_type) {
|
||||
case VIE_OP_TYPE_MOV:
|
||||
error = emulate_mov(vm, vcpuid, gpa, vie,
|
||||
memread, memwrite, memarg);
|
||||
break;
|
||||
case VIE_OP_TYPE_AND:
|
||||
error = emulate_and(vm, vcpuid, gpa, vie,
|
||||
memread, memwrite, memarg);
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
static void
|
||||
vie_init(struct vie *vie)
|
||||
{
|
||||
|
||||
bzero(vie, sizeof(struct vie));
|
||||
|
||||
vie->base_register = VM_REG_LAST;
|
||||
vie->index_register = VM_REG_LAST;
|
||||
}
|
||||
|
||||
static int
|
||||
gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
|
||||
uint64_t *gpa, uint64_t *gpaend)
|
||||
{
|
||||
vm_paddr_t hpa;
|
||||
int nlevels, ptpshift, ptpindex;
|
||||
uint64_t *ptpbase, pte, pgsize;
|
||||
|
||||
/*
|
||||
* XXX assumes 64-bit guest with 4 page walk levels
|
||||
*/
|
||||
nlevels = 4;
|
||||
while (--nlevels >= 0) {
|
||||
/* Zero out the lower 12 bits and the upper 12 bits */
|
||||
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
|
||||
|
||||
hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE);
|
||||
if (hpa == -1)
|
||||
goto error;
|
||||
|
||||
ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa);
|
||||
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gla >> ptpshift) & 0x1FF;
|
||||
pgsize = 1UL << ptpshift;
|
||||
|
||||
pte = ptpbase[ptpindex];
|
||||
|
||||
if ((pte & PG_V) == 0)
|
||||
goto error;
|
||||
|
||||
if (pte & PG_PS) {
|
||||
if (pgsize > 1 * GB)
|
||||
goto error;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
ptpphys = pte;
|
||||
}
|
||||
|
||||
/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
|
||||
pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
|
||||
*gpa = pte | (gla & (pgsize - 1));
|
||||
*gpaend = pte + pgsize;
|
||||
return (0);
|
||||
|
||||
error:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
|
||||
uint64_t cr3, struct vie *vie)
|
||||
{
|
||||
int n, err;
|
||||
uint64_t hpa, gpa, gpaend, off;
|
||||
|
||||
/*
|
||||
* XXX cache previously fetched instructions using 'rip' as the tag
|
||||
*/
|
||||
|
||||
if (inst_length > VIE_INST_SIZE)
|
||||
panic("vmm_fetch_instruction: invalid length %d", inst_length);
|
||||
|
||||
vie_init(vie);
|
||||
|
||||
/* Copy the instruction into 'vie' */
|
||||
while (vie->num_valid < inst_length) {
|
||||
err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
off = gpa & PAGE_MASK;
|
||||
n = min(inst_length - vie->num_valid, PAGE_SIZE - off);
|
||||
|
||||
hpa = vm_gpa2hpa(vm, gpa, n);
|
||||
if (hpa == -1)
|
||||
break;
|
||||
|
||||
bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n);
|
||||
|
||||
rip += n;
|
||||
vie->num_valid += n;
|
||||
}
|
||||
|
||||
if (vie->num_valid == inst_length)
|
||||
return (0);
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
vie_peek(struct vie *vie, uint8_t *x)
|
||||
{
|
||||
|
||||
if (vie->num_processed < vie->num_valid) {
|
||||
*x = vie->inst[vie->num_processed];
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
vie_advance(struct vie *vie)
|
||||
{
|
||||
|
||||
vie->num_processed++;
|
||||
}
|
||||
|
||||
static int
|
||||
decode_rex(struct vie *vie)
|
||||
{
|
||||
uint8_t x;
|
||||
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
if (x >= 0x40 && x <= 0x4F) {
|
||||
vie->rex_w = x & 0x8 ? 1 : 0;
|
||||
vie->rex_r = x & 0x4 ? 1 : 0;
|
||||
vie->rex_x = x & 0x2 ? 1 : 0;
|
||||
vie->rex_b = x & 0x1 ? 1 : 0;
|
||||
|
||||
vie_advance(vie);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
decode_opcode(struct vie *vie)
|
||||
{
|
||||
uint8_t x;
|
||||
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
vie->op = one_byte_opcodes[x];
|
||||
|
||||
if (vie->op.op_type == VIE_OP_TYPE_NONE)
|
||||
return (-1);
|
||||
|
||||
vie_advance(vie);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX assuming 32-bit or 64-bit guest
|
||||
*/
|
||||
static int
|
||||
decode_modrm(struct vie *vie)
|
||||
{
|
||||
uint8_t x;
|
||||
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
vie->mod = (x >> 6) & 0x3;
|
||||
vie->rm = (x >> 0) & 0x7;
|
||||
vie->reg = (x >> 3) & 0x7;
|
||||
|
||||
/*
|
||||
* A direct addressing mode makes no sense in the context of an EPT
|
||||
* fault. There has to be a memory access involved to cause the
|
||||
* EPT fault.
|
||||
*/
|
||||
if (vie->mod == VIE_MOD_DIRECT)
|
||||
return (-1);
|
||||
|
||||
if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
|
||||
(vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
|
||||
/*
|
||||
* Table 2-5: Special Cases of REX Encodings
|
||||
*
|
||||
* mod=0, r/m=5 is used in the compatibility mode to
|
||||
* indicate a disp32 without a base register.
|
||||
*
|
||||
* mod!=3, r/m=4 is used in the compatibility mode to
|
||||
* indicate that the SIB byte is present.
|
||||
*
|
||||
* The 'b' bit in the REX prefix is don't care in
|
||||
* this case.
|
||||
*/
|
||||
} else {
|
||||
vie->rm |= (vie->rex_b << 3);
|
||||
}
|
||||
|
||||
vie->reg |= (vie->rex_r << 3);
|
||||
|
||||
/* SIB */
|
||||
if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
|
||||
goto done;
|
||||
|
||||
vie->base_register = gpr_map[vie->rm];
|
||||
|
||||
switch (vie->mod) {
|
||||
case VIE_MOD_INDIRECT_DISP8:
|
||||
vie->disp_bytes = 1;
|
||||
break;
|
||||
case VIE_MOD_INDIRECT_DISP32:
|
||||
vie->disp_bytes = 4;
|
||||
break;
|
||||
case VIE_MOD_INDIRECT:
|
||||
if (vie->rm == VIE_RM_DISP32) {
|
||||
vie->disp_bytes = 4;
|
||||
vie->base_register = VM_REG_LAST; /* no base */
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Figure out immediate operand size (if any) */
|
||||
if (vie->op.op_flags & VIE_OP_F_IMM)
|
||||
vie->imm_bytes = 4;
|
||||
else if (vie->op.op_flags & VIE_OP_F_IMM8)
|
||||
vie->imm_bytes = 1;
|
||||
|
||||
done:
|
||||
vie_advance(vie);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
decode_sib(struct vie *vie)
|
||||
{
|
||||
uint8_t x;
|
||||
|
||||
/* Proceed only if SIB byte is present */
|
||||
if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
|
||||
return (0);
|
||||
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
/* De-construct the SIB byte */
|
||||
vie->ss = (x >> 6) & 0x3;
|
||||
vie->index = (x >> 3) & 0x7;
|
||||
vie->base = (x >> 0) & 0x7;
|
||||
|
||||
/* Apply the REX prefix modifiers */
|
||||
vie->index |= vie->rex_x << 3;
|
||||
vie->base |= vie->rex_b << 3;
|
||||
|
||||
switch (vie->mod) {
|
||||
case VIE_MOD_INDIRECT_DISP8:
|
||||
vie->disp_bytes = 1;
|
||||
break;
|
||||
case VIE_MOD_INDIRECT_DISP32:
|
||||
vie->disp_bytes = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
if (vie->mod == VIE_MOD_INDIRECT &&
|
||||
(vie->base == 5 || vie->base == 13)) {
|
||||
/*
|
||||
* Special case when base register is unused if mod = 0
|
||||
* and base = %rbp or %r13.
|
||||
*
|
||||
* Documented in:
|
||||
* Table 2-3: 32-bit Addressing Forms with the SIB Byte
|
||||
* Table 2-5: Special Cases of REX Encodings
|
||||
*/
|
||||
vie->disp_bytes = 4;
|
||||
} else {
|
||||
vie->base_register = gpr_map[vie->base];
|
||||
}
|
||||
|
||||
/*
|
||||
* All encodings of 'index' are valid except for %rsp (4).
|
||||
*
|
||||
* Documented in:
|
||||
* Table 2-3: 32-bit Addressing Forms with the SIB Byte
|
||||
* Table 2-5: Special Cases of REX Encodings
|
||||
*/
|
||||
if (vie->index != 4)
|
||||
vie->index_register = gpr_map[vie->index];
|
||||
|
||||
/* 'scale' makes sense only in the context of an index register */
|
||||
if (vie->index_register < VM_REG_LAST)
|
||||
vie->scale = 1 << vie->ss;
|
||||
|
||||
vie_advance(vie);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
decode_displacement(struct vie *vie)
|
||||
{
|
||||
int n, i;
|
||||
uint8_t x;
|
||||
|
||||
union {
|
||||
char buf[4];
|
||||
int8_t signed8;
|
||||
int32_t signed32;
|
||||
} u;
|
||||
|
||||
if ((n = vie->disp_bytes) == 0)
|
||||
return (0);
|
||||
|
||||
if (n != 1 && n != 4)
|
||||
panic("decode_displacement: invalid disp_bytes %d", n);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
u.buf[i] = x;
|
||||
vie_advance(vie);
|
||||
}
|
||||
|
||||
if (n == 1)
|
||||
vie->displacement = u.signed8; /* sign-extended */
|
||||
else
|
||||
vie->displacement = u.signed32; /* sign-extended */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
decode_immediate(struct vie *vie)
|
||||
{
|
||||
int i, n;
|
||||
uint8_t x;
|
||||
union {
|
||||
char buf[4];
|
||||
int8_t signed8;
|
||||
int32_t signed32;
|
||||
} u;
|
||||
|
||||
if ((n = vie->imm_bytes) == 0)
|
||||
return (0);
|
||||
|
||||
if (n != 1 && n != 4)
|
||||
panic("decode_immediate: invalid imm_bytes %d", n);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (vie_peek(vie, &x))
|
||||
return (-1);
|
||||
|
||||
u.buf[i] = x;
|
||||
vie_advance(vie);
|
||||
}
|
||||
|
||||
if (n == 1)
|
||||
vie->immediate = u.signed8; /* sign-extended */
|
||||
else
|
||||
vie->immediate = u.signed32; /* sign-extended */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define VERIFY_GLA
|
||||
/*
|
||||
* Verify that the 'guest linear address' provided as collateral of the nested
|
||||
* page table fault matches with our instruction decoding.
|
||||
*/
|
||||
#ifdef VERIFY_GLA
|
||||
static int
|
||||
verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
|
||||
{
|
||||
int error;
|
||||
uint64_t base, idx;
|
||||
|
||||
base = 0;
|
||||
if (vie->base_register != VM_REG_LAST) {
|
||||
error = vm_get_register(vm, cpuid, vie->base_register, &base);
|
||||
if (error) {
|
||||
printf("verify_gla: error %d getting base reg %d\n",
|
||||
error, vie->base_register);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
if (vie->index_register != VM_REG_LAST) {
|
||||
error = vm_get_register(vm, cpuid, vie->index_register, &idx);
|
||||
if (error) {
|
||||
printf("verify_gla: error %d getting index reg %d\n",
|
||||
error, vie->index_register);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (base + vie->scale * idx + vie->displacement != gla) {
|
||||
printf("verify_gla mismatch: "
|
||||
"base(0x%0lx), scale(%d), index(0x%0lx), "
|
||||
"disp(0x%0lx), gla(0x%0lx)\n",
|
||||
base, vie->scale, idx, vie->displacement, gla);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* VERIFY_GLA */
|
||||
|
||||
int
|
||||
vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
|
||||
{
|
||||
|
||||
if (decode_rex(vie))
|
||||
return (-1);
|
||||
|
||||
if (decode_opcode(vie))
|
||||
return (-1);
|
||||
|
||||
if (decode_modrm(vie))
|
||||
return (-1);
|
||||
|
||||
if (decode_sib(vie))
|
||||
return (-1);
|
||||
|
||||
if (decode_displacement(vie))
|
||||
return (-1);
|
||||
|
||||
if (decode_immediate(vie))
|
||||
return (-1);
|
||||
|
||||
#ifdef VERIFY_GLA
|
||||
if (verify_gla(vm, cpuid, gla, vie))
|
||||
return (-1);
|
||||
#endif
|
||||
|
||||
vie->decoded = 1; /* success */
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* _KERNEL */
|
93
sys/amd64/vmm/vmm_ipi.c
Normal file
93
sys/amd64/vmm/vmm_ipi.c
Normal file
@ -0,0 +1,93 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <machine/intr_machdep.h>
|
||||
#include <machine/apicvar.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/md_var.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_ipi.h"
|
||||
|
||||
extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
|
||||
|
||||
/*
|
||||
* The default is to use the IPI_AST to interrupt a vcpu.
|
||||
*/
|
||||
int vmm_ipinum = IPI_AST;
|
||||
|
||||
CTASSERT(APIC_SPURIOUS_INT == 255);
|
||||
|
||||
void
|
||||
vmm_ipi_init(void)
|
||||
{
|
||||
int idx;
|
||||
uintptr_t func;
|
||||
struct gate_descriptor *ip;
|
||||
|
||||
/*
|
||||
* Search backwards from the highest IDT vector available for use
|
||||
* as our IPI vector. We install the 'justreturn' handler at that
|
||||
* vector and use it to interrupt the vcpus.
|
||||
*
|
||||
* We do this because the IPI_AST is heavyweight and saves all
|
||||
* registers in the trapframe. This is overkill for our use case
|
||||
* which is simply to EOI the interrupt and return.
|
||||
*/
|
||||
idx = APIC_SPURIOUS_INT;
|
||||
while (--idx >= APIC_IPI_INTS) {
|
||||
ip = &idt[idx];
|
||||
func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
|
||||
if (func == (uintptr_t)&IDTVEC(rsvd)) {
|
||||
vmm_ipinum = idx;
|
||||
setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (vmm_ipinum != IPI_AST && bootverbose) {
|
||||
printf("vmm_ipi_init: installing ipi handler to interrupt "
|
||||
"vcpus at vector %d\n", vmm_ipinum);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vmm_ipi_cleanup(void)
|
||||
{
|
||||
if (vmm_ipinum != IPI_AST)
|
||||
setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
|
||||
}
|
39
sys/amd64/vmm/vmm_ipi.h
Normal file
39
sys/amd64/vmm/vmm_ipi.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_IPI_H_
|
||||
#define _VMM_IPI_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
extern int vmm_ipinum;
|
||||
|
||||
void vmm_ipi_init(void);
|
||||
void vmm_ipi_cleanup(void);
|
||||
|
||||
#endif
|
51
sys/amd64/vmm/vmm_ktr.h
Normal file
51
sys/amd64/vmm/vmm_ktr.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_KTR_H_
|
||||
#define _VMM_KTR_H_
|
||||
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/pcpu.h>
|
||||
|
||||
#define KTR_VMM KTR_GEN
|
||||
|
||||
#define VMM_CTR0(vm, vcpuid, format) \
|
||||
CTR3(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu)
|
||||
|
||||
#define VMM_CTR1(vm, vcpuid, format, p1) \
|
||||
CTR4(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1))
|
||||
|
||||
#define VMM_CTR2(vm, vcpuid, format, p1, p2) \
|
||||
CTR5(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1), (p2))
|
||||
|
||||
#define VMM_CTR3(vm, vcpuid, format, p1, p2, p3) \
|
||||
CTR6(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1), (p2), (p3))
|
||||
#endif
|
201
sys/amd64/vmm/vmm_lapic.c
Normal file
201
sys/amd64/vmm/vmm_lapic.c
Normal file
@ -0,0 +1,201 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <x86/specialreg.h>
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_ipi.h"
|
||||
#include "vmm_lapic.h"
|
||||
#include "vlapic.h"
|
||||
|
||||
int
|
||||
lapic_pending_intr(struct vm *vm, int cpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
return (vlapic_pending_intr(vlapic));
|
||||
}
|
||||
|
||||
void
|
||||
lapic_intr_accepted(struct vm *vm, int cpu, int vector)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
vlapic_intr_accepted(vlapic, vector);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_set_intr(struct vm *vm, int cpu, int vector)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
if (cpu < 0 || cpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (vector < 32 || vector > 255)
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
vlapic_set_intr_ready(vlapic, vector);
|
||||
|
||||
vm_interrupt_hostcpu(vm, cpu);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_timer_tick(struct vm *vm, int cpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
return (vlapic_timer_tick(vlapic));
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
x2apic_msr(u_int msr)
|
||||
{
|
||||
if (msr >= 0x800 && msr <= 0xBFF)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
static u_int
|
||||
x2apic_msr_to_regoff(u_int msr)
|
||||
{
|
||||
|
||||
return ((msr - 0x800) << 4);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
lapic_msr(u_int msr)
|
||||
{
|
||||
|
||||
if (x2apic_msr(msr) || (msr == MSR_APICBASE))
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
|
||||
{
|
||||
int error;
|
||||
u_int offset;
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
if (msr == MSR_APICBASE) {
|
||||
*rval = vlapic_get_apicbase(vlapic);
|
||||
error = 0;
|
||||
} else {
|
||||
offset = x2apic_msr_to_regoff(msr);
|
||||
error = vlapic_op_mem_read(vlapic, offset, DWORD, rval);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
u_int offset;
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
if (msr == MSR_APICBASE) {
|
||||
vlapic_set_apicbase(vlapic, val);
|
||||
error = 0;
|
||||
} else {
|
||||
offset = x2apic_msr_to_regoff(msr);
|
||||
error = vlapic_op_mem_write(vlapic, offset, DWORD, val);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
|
||||
void *arg)
|
||||
{
|
||||
int error;
|
||||
uint64_t off;
|
||||
struct vlapic *vlapic;
|
||||
|
||||
off = gpa - DEFAULT_APIC_BASE;
|
||||
|
||||
/*
|
||||
* Memory mapped local apic accesses must be 4 bytes wide and
|
||||
* aligned on a 16-byte boundary.
|
||||
*/
|
||||
if (size != 4 || off & 0xf)
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
error = vlapic_op_mem_write(vlapic, off, DWORD, wval);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
|
||||
void *arg)
|
||||
{
|
||||
int error;
|
||||
uint64_t off;
|
||||
struct vlapic *vlapic;
|
||||
|
||||
off = gpa - DEFAULT_APIC_BASE;
|
||||
|
||||
/*
|
||||
* Memory mapped local apic accesses must be 4 bytes wide and
|
||||
* aligned on a 16-byte boundary.
|
||||
*/
|
||||
if (size != 4 || off & 0xf)
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
error = vlapic_op_mem_read(vlapic, off, DWORD, rval);
|
||||
return (error);
|
||||
}
|
71
sys/amd64/vmm/vmm_lapic.h
Normal file
71
sys/amd64/vmm/vmm_lapic.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_LAPIC_H_
|
||||
#define _VMM_LAPIC_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
boolean_t lapic_msr(u_int num);
|
||||
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
|
||||
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
|
||||
|
||||
int lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
|
||||
uint64_t *rval, int size, void *arg);
|
||||
int lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
|
||||
uint64_t wval, int size, void *arg);
|
||||
|
||||
int lapic_timer_tick(struct vm *vm, int cpu);
|
||||
|
||||
/*
|
||||
* Returns a vector between 32 and 255 if an interrupt is pending in the
|
||||
* IRR that can be delivered based on the current state of ISR and TPR.
|
||||
*
|
||||
* Note that the vector does not automatically transition to the ISR as a
|
||||
* result of calling this function.
|
||||
*
|
||||
* Returns -1 if there is no eligible vector that can be delivered to the
|
||||
* guest at this time.
|
||||
*/
|
||||
int lapic_pending_intr(struct vm *vm, int cpu);
|
||||
|
||||
/*
|
||||
* Transition 'vector' from IRR to ISR. This function is called with the
|
||||
* vector returned by 'lapic_pending_intr()' when the guest is able to
|
||||
* accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
|
||||
* block interrupt delivery).
|
||||
*/
|
||||
void lapic_intr_accepted(struct vm *vm, int cpu, int vector);
|
||||
|
||||
/*
|
||||
* Signals to the LAPIC that an interrupt at 'vector' needs to be generated
|
||||
* to the 'cpu', the state is recorded in IRR.
|
||||
*/
|
||||
int lapic_set_intr(struct vm *vm, int cpu, int vector);
|
||||
|
||||
#endif
|
135
sys/amd64/vmm/vmm_mem.c
Normal file
135
sys/amd64/vmm/vmm_mem.c
Normal file
@ -0,0 +1,135 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/linker.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pageout.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/metadata.h>
|
||||
#include <machine/pc/bios.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
#include "vmm_mem.h"
|
||||
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
|
||||
static u_long pages_allocated;
|
||||
SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
|
||||
&pages_allocated, 0, "4KB pages allocated");
|
||||
|
||||
static void
|
||||
update_pages_allocated(int howmany)
|
||||
{
|
||||
pages_allocated += howmany; /* XXX locking? */
|
||||
}
|
||||
|
||||
int
|
||||
vmm_mem_init(void)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vmm_mem_alloc(size_t size)
|
||||
{
|
||||
int flags;
|
||||
vm_page_t m;
|
||||
vm_paddr_t pa;
|
||||
|
||||
if (size != PAGE_SIZE)
|
||||
panic("vmm_mem_alloc: invalid allocation size %lu", size);
|
||||
|
||||
flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
|
||||
VM_ALLOC_ZERO;
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* XXX need policy to determine when to back off the allocation
|
||||
*/
|
||||
m = vm_page_alloc(NULL, 0, flags);
|
||||
if (m == NULL)
|
||||
VM_WAIT;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
|
||||
if ((m->flags & PG_ZERO) == 0)
|
||||
pagezero((void *)PHYS_TO_DMAP(pa));
|
||||
m->valid = VM_PAGE_BITS_ALL;
|
||||
|
||||
update_pages_allocated(1);
|
||||
|
||||
return (pa);
|
||||
}
|
||||
|
||||
void
|
||||
vmm_mem_free(vm_paddr_t base, size_t length)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
if (base & PAGE_MASK) {
|
||||
panic("vmm_mem_free: base 0x%0lx must be aligned on a "
|
||||
"0x%0x boundary\n", base, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (length != PAGE_SIZE)
|
||||
panic("vmm_mem_free: invalid length %lu", length);
|
||||
|
||||
m = PHYS_TO_VM_PAGE(base);
|
||||
m->wire_count--;
|
||||
vm_page_free(m);
|
||||
atomic_subtract_int(&cnt.v_wire_count, 1);
|
||||
|
||||
update_pages_allocated(-1);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vmm_mem_maxaddr(void)
|
||||
{
|
||||
|
||||
return (ptoa(Maxmem));
|
||||
}
|
37
sys/amd64/vmm/vmm_mem.h
Normal file
37
sys/amd64/vmm/vmm_mem.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_MEM_H_
|
||||
#define _VMM_MEM_H_
|
||||
|
||||
int vmm_mem_init(void);
|
||||
vm_paddr_t vmm_mem_alloc(size_t size);
|
||||
void vmm_mem_free(vm_paddr_t start, size_t size);
|
||||
vm_paddr_t vmm_mem_maxaddr(void);
|
||||
|
||||
#endif
|
254
sys/amd64/vmm/vmm_msr.c
Normal file
254
sys/amd64/vmm/vmm_msr.c
Normal file
@ -0,0 +1,254 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_msr.h"
|
||||
|
||||
#define VMM_MSR_F_EMULATE 0x01
|
||||
#define VMM_MSR_F_READONLY 0x02
|
||||
#define VMM_MSR_F_INVALID 0x04 /* guest_msr_valid() can override this */
|
||||
|
||||
struct vmm_msr {
|
||||
int num;
|
||||
int flags;
|
||||
uint64_t hostval;
|
||||
};
|
||||
|
||||
static struct vmm_msr vmm_msr[] = {
|
||||
{ MSR_LSTAR, 0 },
|
||||
{ MSR_CSTAR, 0 },
|
||||
{ MSR_STAR, 0 },
|
||||
{ MSR_SF_MASK, 0 },
|
||||
{ MSR_PAT, VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID },
|
||||
{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
|
||||
{ MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
|
||||
};
|
||||
|
||||
#define vmm_msr_num (sizeof(vmm_msr) / sizeof(vmm_msr[0]))
|
||||
CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
|
||||
|
||||
#define readonly_msr(idx) \
|
||||
((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
|
||||
|
||||
#define emulated_msr(idx) \
|
||||
((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
|
||||
|
||||
#define invalid_msr(idx) \
|
||||
((vmm_msr[(idx)].flags & VMM_MSR_F_INVALID) != 0)
|
||||
|
||||
void
|
||||
vmm_msr_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
/*
|
||||
* XXX this assumes that the value of the host msr does not
|
||||
* change after we have cached it.
|
||||
*/
|
||||
vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
guest_msrs_init(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
switch (vmm_msr[i].num) {
|
||||
case MSR_LSTAR:
|
||||
case MSR_CSTAR:
|
||||
case MSR_STAR:
|
||||
case MSR_SF_MASK:
|
||||
case MSR_BIOS_SIGN:
|
||||
case MSR_MCG_CAP:
|
||||
guest_msrs[i] = 0;
|
||||
break;
|
||||
case MSR_PAT:
|
||||
guest_msrs[i] = PAT_VALUE(0, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(1, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(2, PAT_UNCACHED) |
|
||||
PAT_VALUE(3, PAT_UNCACHEABLE) |
|
||||
PAT_VALUE(4, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(5, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(6, PAT_UNCACHED) |
|
||||
PAT_VALUE(7, PAT_UNCACHEABLE);
|
||||
break;
|
||||
default:
|
||||
panic("guest_msrs_init: missing initialization for msr "
|
||||
"0x%0x", vmm_msr[i].num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
msr_num_to_idx(u_int num)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++)
|
||||
if (vmm_msr[i].num == num)
|
||||
return (i);
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
|
||||
{
|
||||
int idx;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
if (lapic_msr(num))
|
||||
return (lapic_wrmsr(vm, cpu, num, val));
|
||||
|
||||
idx = msr_num_to_idx(num);
|
||||
if (idx < 0 || invalid_msr(idx))
|
||||
return (EINVAL);
|
||||
|
||||
if (!readonly_msr(idx)) {
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
/* Stash the value */
|
||||
guest_msrs[idx] = val;
|
||||
|
||||
/* Update processor state for non-emulated MSRs */
|
||||
if (!emulated_msr(idx))
|
||||
wrmsr(vmm_msr[idx].num, val);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_rdmsr(struct vm *vm, int cpu, u_int num)
|
||||
{
|
||||
int error, idx;
|
||||
uint32_t eax, edx;
|
||||
uint64_t result, *guest_msrs;
|
||||
|
||||
if (lapic_msr(num)) {
|
||||
error = lapic_rdmsr(vm, cpu, num, &result);
|
||||
goto done;
|
||||
}
|
||||
|
||||
idx = msr_num_to_idx(num);
|
||||
if (idx < 0 || invalid_msr(idx)) {
|
||||
error = EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
result = guest_msrs[idx];
|
||||
|
||||
/*
|
||||
* If this is not an emulated msr register make sure that the processor
|
||||
* state matches our cached state.
|
||||
*/
|
||||
if (!emulated_msr(idx) && (rdmsr(num) != result)) {
|
||||
panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
|
||||
"(0x%016lx) and actual (0x%016lx) values", num,
|
||||
result, rdmsr(num));
|
||||
}
|
||||
|
||||
error = 0;
|
||||
|
||||
done:
|
||||
if (error == 0) {
|
||||
eax = result;
|
||||
edx = result >> 32;
|
||||
error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
|
||||
if (error)
|
||||
panic("vm_set_register(rax) error %d", error);
|
||||
error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
|
||||
if (error)
|
||||
panic("vm_set_register(rdx) error %d", error);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
restore_guest_msrs(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
else
|
||||
wrmsr(vmm_msr[i].num, guest_msrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
restore_host_msrs(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
else
|
||||
wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called by the CPU-specific code before any guests are
|
||||
* created
|
||||
*/
|
||||
void
|
||||
guest_msr_valid(int msr)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (vmm_msr[i].num == msr && invalid_msr(i)) {
|
||||
vmm_msr[i].flags &= ~VMM_MSR_F_INVALID;
|
||||
}
|
||||
}
|
||||
}
|
43
sys/amd64/vmm/vmm_msr.h
Normal file
43
sys/amd64/vmm/vmm_msr.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_MSR_H_
|
||||
#define _VMM_MSR_H_
|
||||
|
||||
#define VMM_MSR_NUM 16
|
||||
struct vm;
|
||||
|
||||
void vmm_msr_init(void);
|
||||
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
|
||||
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
|
||||
void guest_msrs_init(struct vm *vm, int cpu);
|
||||
void guest_msr_valid(int msr);
|
||||
void restore_host_msrs(struct vm *vm, int cpu);
|
||||
void restore_guest_msrs(struct vm *vm, int cpu);
|
||||
|
||||
#endif
|
104
sys/amd64/vmm/vmm_stat.c
Normal file
104
sys/amd64/vmm/vmm_stat.c
Normal file
@ -0,0 +1,104 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_stat.h"
|
||||
|
||||
static int vstnum;
|
||||
static struct vmm_stat_type *vsttab[MAX_VMM_STAT_TYPES];
|
||||
|
||||
static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
|
||||
|
||||
void
|
||||
vmm_stat_init(void *arg)
|
||||
{
|
||||
struct vmm_stat_type *vst = arg;
|
||||
|
||||
/* We require all stats to identify themselves with a description */
|
||||
if (vst->desc == NULL)
|
||||
return;
|
||||
|
||||
if (vstnum >= MAX_VMM_STAT_TYPES) {
|
||||
printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
|
||||
return;
|
||||
}
|
||||
|
||||
vst->index = vstnum;
|
||||
vsttab[vstnum++] = vst;
|
||||
}
|
||||
|
||||
int
|
||||
vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
|
||||
{
|
||||
int i;
|
||||
uint64_t *stats;
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
stats = vcpu_stats(vm, vcpu);
|
||||
for (i = 0; i < vstnum; i++)
|
||||
buf[i] = stats[i];
|
||||
*num_stats = vstnum;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void *
|
||||
vmm_stat_alloc(void)
|
||||
{
|
||||
u_long size;
|
||||
|
||||
size = vstnum * sizeof(uint64_t);
|
||||
|
||||
return (malloc(size, M_VMM_STAT, M_ZERO | M_WAITOK));
|
||||
}
|
||||
|
||||
void
|
||||
vmm_stat_free(void *vp)
|
||||
{
|
||||
free(vp, M_VMM_STAT);
|
||||
}
|
||||
|
||||
const char *
|
||||
vmm_stat_desc(int index)
|
||||
{
|
||||
|
||||
if (index >= 0 && index < vstnum)
|
||||
return (vsttab[index]->desc);
|
||||
else
|
||||
return (NULL);
|
||||
}
|
71
sys/amd64/vmm/vmm_stat.h
Normal file
71
sys/amd64/vmm/vmm_stat.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_STAT_H_
|
||||
#define _VMM_STAT_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
#define MAX_VMM_STAT_TYPES 64 /* arbitrary */
|
||||
|
||||
struct vmm_stat_type {
|
||||
const char *desc; /* description of statistic */
|
||||
int index; /* position in the stats buffer */
|
||||
};
|
||||
|
||||
void vmm_stat_init(void *arg);
|
||||
|
||||
#define VMM_STAT_DEFINE(type, desc) \
|
||||
struct vmm_stat_type type[1] = { \
|
||||
{ desc, -1 } \
|
||||
}; \
|
||||
SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type)
|
||||
|
||||
void *vmm_stat_alloc(void);
|
||||
void vmm_stat_free(void *vp);
|
||||
|
||||
/*
|
||||
* 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
|
||||
*/
|
||||
int vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
|
||||
const char *vmm_stat_desc(int index);
|
||||
|
||||
static void __inline
|
||||
vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
|
||||
{
|
||||
#ifdef VMM_KEEP_STATS
|
||||
uint64_t *stats = vcpu_stats(vm, vcpu);
|
||||
if (vst->index >= 0)
|
||||
stats[vst->index] += x;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
42
sys/amd64/vmm/vmm_support.S
Normal file
42
sys/amd64/vmm/vmm_support.S
Normal file
@ -0,0 +1,42 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#define LOCORE
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
#define LA_EOI 0xB0
|
||||
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(justreturn)
|
||||
pushq %rax
|
||||
movq lapic, %rax
|
||||
movl $0, LA_EOI(%rax)
|
||||
popq %rax
|
||||
iretq
|
111
sys/amd64/vmm/vmm_util.c
Normal file
111
sys/amd64/vmm/vmm_util.c
Normal file
@ -0,0 +1,111 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/libkern.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
|
||||
boolean_t
|
||||
vmm_is_intel(void)
|
||||
{
|
||||
|
||||
if (strcmp(cpu_vendor, "GenuineIntel") == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vmm_is_amd(void)
|
||||
{
|
||||
if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vmm_supports_1G_pages(void)
|
||||
{
|
||||
unsigned int regs[4];
|
||||
|
||||
/*
|
||||
* CPUID.80000001:EDX[bit 26] = 1 indicates support for 1GB pages
|
||||
*
|
||||
* Both Intel and AMD support this bit.
|
||||
*/
|
||||
if (cpu_exthigh >= 0x80000001) {
|
||||
do_cpuid(0x80000001, regs);
|
||||
if (regs[3] & (1 << 26))
|
||||
return (TRUE);
|
||||
}
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
#include <sys/proc.h>
|
||||
#include <machine/frame.h>
|
||||
#define DUMP_REG(x) printf(#x "\t\t0x%016lx\n", (long)(tf->tf_ ## x))
|
||||
#define DUMP_SEG(x) printf(#x "\t\t0x%04x\n", (unsigned)(tf->tf_ ## x))
|
||||
void
|
||||
dump_trapframe(struct trapframe *tf)
|
||||
{
|
||||
DUMP_REG(rdi);
|
||||
DUMP_REG(rsi);
|
||||
DUMP_REG(rdx);
|
||||
DUMP_REG(rcx);
|
||||
DUMP_REG(r8);
|
||||
DUMP_REG(r9);
|
||||
DUMP_REG(rax);
|
||||
DUMP_REG(rbx);
|
||||
DUMP_REG(rbp);
|
||||
DUMP_REG(r10);
|
||||
DUMP_REG(r11);
|
||||
DUMP_REG(r12);
|
||||
DUMP_REG(r13);
|
||||
DUMP_REG(r14);
|
||||
DUMP_REG(r15);
|
||||
DUMP_REG(trapno);
|
||||
DUMP_REG(addr);
|
||||
DUMP_REG(flags);
|
||||
DUMP_REG(err);
|
||||
DUMP_REG(rip);
|
||||
DUMP_REG(rflags);
|
||||
DUMP_REG(rsp);
|
||||
DUMP_SEG(cs);
|
||||
DUMP_SEG(ss);
|
||||
DUMP_SEG(fs);
|
||||
DUMP_SEG(gs);
|
||||
DUMP_SEG(es);
|
||||
DUMP_SEG(ds);
|
||||
}
|
40
sys/amd64/vmm/vmm_util.h
Normal file
40
sys/amd64/vmm/vmm_util.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_UTIL_H_
|
||||
#define _VMM_UTIL_H_
|
||||
|
||||
struct trapframe;
|
||||
|
||||
boolean_t vmm_is_intel(void);
|
||||
boolean_t vmm_is_amd(void);
|
||||
boolean_t vmm_supports_1G_pages(void);
|
||||
|
||||
void dump_trapframe(struct trapframe *tf);
|
||||
|
||||
#endif
|
202
sys/amd64/vmm/x86.c
Normal file
202
sys/amd64/vmm/x86.c
Normal file
@ -0,0 +1,202 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/cpuset.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "x86.h"
|
||||
|
||||
#define CPUID_VM_HIGH 0x40000000
|
||||
|
||||
static const char bhyve_id[12] = "BHyVE BHyVE ";
|
||||
|
||||
int
|
||||
x86_emulate_cpuid(struct vm *vm, int vcpu_id,
|
||||
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
|
||||
{
|
||||
int error;
|
||||
unsigned int func, regs[4];
|
||||
enum x2apic_state x2apic_state;
|
||||
|
||||
func = *eax;
|
||||
|
||||
/*
|
||||
* Requests for invalid CPUID levels should map to the highest
|
||||
* available level instead.
|
||||
*/
|
||||
if (cpu_exthigh != 0 && *eax >= 0x80000000) {
|
||||
if (*eax > cpu_exthigh)
|
||||
*eax = cpu_exthigh;
|
||||
} else if (*eax >= 0x40000000) {
|
||||
if (*eax > CPUID_VM_HIGH)
|
||||
*eax = CPUID_VM_HIGH;
|
||||
} else if (*eax > cpu_high) {
|
||||
*eax = cpu_high;
|
||||
}
|
||||
|
||||
/*
|
||||
* In general the approach used for CPU topology is to
|
||||
* advertise a flat topology where all CPUs are packages with
|
||||
* no multi-core or SMT.
|
||||
*/
|
||||
switch (func) {
|
||||
case CPUID_0000_0000:
|
||||
case CPUID_0000_0002:
|
||||
case CPUID_0000_0003:
|
||||
case CPUID_0000_000A:
|
||||
cpuid_count(*eax, *ecx, regs);
|
||||
break;
|
||||
|
||||
case CPUID_8000_0000:
|
||||
case CPUID_8000_0001:
|
||||
case CPUID_8000_0002:
|
||||
case CPUID_8000_0003:
|
||||
case CPUID_8000_0004:
|
||||
case CPUID_8000_0006:
|
||||
case CPUID_8000_0007:
|
||||
case CPUID_8000_0008:
|
||||
cpuid_count(*eax, *ecx, regs);
|
||||
break;
|
||||
|
||||
case CPUID_0000_0001:
|
||||
do_cpuid(1, regs);
|
||||
|
||||
error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
|
||||
if (error) {
|
||||
panic("x86_emulate_cpuid: error %d "
|
||||
"fetching x2apic state", error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Override the APIC ID only in ebx
|
||||
*/
|
||||
regs[1] &= ~(CPUID_LOCAL_APIC_ID);
|
||||
regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
|
||||
|
||||
/*
|
||||
* Don't expose VMX, SpeedStep or TME capability.
|
||||
* Advertise x2APIC capability and Hypervisor guest.
|
||||
*/
|
||||
regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
|
||||
|
||||
regs[2] |= CPUID2_HV;
|
||||
|
||||
if (x2apic_state != X2APIC_DISABLED)
|
||||
regs[2] |= CPUID2_X2APIC;
|
||||
|
||||
/*
|
||||
* Hide xsave/osxsave/avx until the FPU save/restore
|
||||
* issues are resolved
|
||||
*/
|
||||
regs[2] &= ~(CPUID2_XSAVE | CPUID2_OSXSAVE |
|
||||
CPUID2_AVX);
|
||||
|
||||
/*
|
||||
* Hide monitor/mwait until we know how to deal with
|
||||
* these instructions.
|
||||
*/
|
||||
regs[2] &= ~CPUID2_MON;
|
||||
|
||||
/*
|
||||
* Hide thermal monitoring
|
||||
*/
|
||||
regs[3] &= ~(CPUID_ACPI | CPUID_TM);
|
||||
|
||||
/*
|
||||
* Machine check handling is done in the host.
|
||||
* Hide MTRR capability.
|
||||
*/
|
||||
regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
|
||||
|
||||
/*
|
||||
* Disable multi-core.
|
||||
*/
|
||||
regs[1] &= ~CPUID_HTT_CORES;
|
||||
regs[3] &= ~CPUID_HTT;
|
||||
break;
|
||||
|
||||
case CPUID_0000_0004:
|
||||
do_cpuid(4, regs);
|
||||
|
||||
/*
|
||||
* Do not expose topology.
|
||||
*/
|
||||
regs[0] &= 0xffff8000;
|
||||
regs[0] |= 0x04008000;
|
||||
break;
|
||||
|
||||
case CPUID_0000_0006:
|
||||
case CPUID_0000_0007:
|
||||
/*
|
||||
* Handle the access, but report 0 for
|
||||
* all options
|
||||
*/
|
||||
regs[0] = 0;
|
||||
regs[1] = 0;
|
||||
regs[2] = 0;
|
||||
regs[3] = 0;
|
||||
break;
|
||||
|
||||
case CPUID_0000_000B:
|
||||
/*
|
||||
* Processor topology enumeration
|
||||
*/
|
||||
regs[0] = 0;
|
||||
regs[1] = 0;
|
||||
regs[2] = *ecx & 0xff;
|
||||
regs[3] = vcpu_id;
|
||||
break;
|
||||
|
||||
case 0x40000000:
|
||||
regs[0] = CPUID_VM_HIGH;
|
||||
bcopy(bhyve_id, ®s[1], 4);
|
||||
bcopy(bhyve_id, ®s[2], 4);
|
||||
bcopy(bhyve_id, ®s[3], 4);
|
||||
break;
|
||||
default:
|
||||
/* XXX: Leaf 5? */
|
||||
return (0);
|
||||
}
|
||||
|
||||
*eax = regs[0];
|
||||
*ebx = regs[1];
|
||||
*ecx = regs[2];
|
||||
*edx = regs[3];
|
||||
return (1);
|
||||
}
|
64
sys/amd64/vmm/x86.h
Normal file
64
sys/amd64/vmm/x86.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _X86_H_
|
||||
#define _X86_H_
|
||||
|
||||
#define CPUID_0000_0000 (0x0)
|
||||
#define CPUID_0000_0001 (0x1)
|
||||
#define CPUID_0000_0002 (0x2)
|
||||
#define CPUID_0000_0003 (0x3)
|
||||
#define CPUID_0000_0004 (0x4)
|
||||
#define CPUID_0000_0006 (0x6)
|
||||
#define CPUID_0000_0007 (0x7)
|
||||
#define CPUID_0000_000A (0xA)
|
||||
#define CPUID_0000_000B (0xB)
|
||||
#define CPUID_8000_0000 (0x80000000)
|
||||
#define CPUID_8000_0001 (0x80000001)
|
||||
#define CPUID_8000_0002 (0x80000002)
|
||||
#define CPUID_8000_0003 (0x80000003)
|
||||
#define CPUID_8000_0004 (0x80000004)
|
||||
#define CPUID_8000_0006 (0x80000006)
|
||||
#define CPUID_8000_0007 (0x80000007)
|
||||
#define CPUID_8000_0008 (0x80000008)
|
||||
|
||||
/*
|
||||
* CPUID instruction Fn0000_0001:
|
||||
*/
|
||||
#define CPUID_0000_0001_APICID_MASK (0xff<<24)
|
||||
#define CPUID_0000_0001_APICID_SHIFT 24
|
||||
|
||||
/*
|
||||
* CPUID instruction Fn0000_0001 ECX
|
||||
*/
|
||||
#define CPUID_0000_0001_FEAT0_VMX (1<<5)
|
||||
|
||||
int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx,
|
||||
uint32_t *ecx, uint32_t *edx);
|
||||
|
||||
#endif
|
@ -464,6 +464,11 @@ libkern/memset.c standard
|
||||
compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | vesa
|
||||
contrib/x86emu/x86emu.c optional x86bios | atkbd | dpms | vesa
|
||||
#
|
||||
# bvm console
|
||||
#
|
||||
dev/bvm/bvm_console.c optional bvmconsole
|
||||
dev/bvm/bvm_dbg.c optional bvmdebug
|
||||
#
|
||||
# x86 shared code between IA32, AMD64 and PC98 architectures
|
||||
#
|
||||
x86/acpica/OsdEnvironment.c optional acpi
|
||||
|
129
sys/dev/blackhole/blackhole.c
Normal file
129
sys/dev/blackhole/blackhole.c
Normal file
@ -0,0 +1,129 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/linker.h>
|
||||
#include <sys/libkern.h>
|
||||
|
||||
#include <dev/pci/pcivar.h>
|
||||
|
||||
static int
|
||||
linker_file_iterator(linker_file_t lf, void *arg)
|
||||
{
|
||||
const char *file = arg;
|
||||
|
||||
if (strcmp(lf->filename, file) == 0)
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
pptdev(int bus, int slot, int func)
|
||||
{
|
||||
int found, b, s, f, n;
|
||||
char *val, *cp, *cp2;
|
||||
|
||||
/*
|
||||
* setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
|
||||
*/
|
||||
found = 0;
|
||||
cp = val = getenv("pptdevs");
|
||||
while (cp != NULL && *cp != '\0') {
|
||||
if ((cp2 = strchr(cp, ' ')) != NULL)
|
||||
*cp2 = '\0';
|
||||
|
||||
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
|
||||
if (n == 3 && bus == b && slot == s && func == f) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (cp2 != NULL)
|
||||
*cp2++ = ' ';
|
||||
|
||||
cp = cp2;
|
||||
}
|
||||
freeenv(val);
|
||||
return (found);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_blackhole_probe(device_t dev)
|
||||
{
|
||||
int bus, slot, func;
|
||||
|
||||
/*
|
||||
* If 'vmm.ko' has also been loaded the don't try to claim
|
||||
* any pci devices.
|
||||
*/
|
||||
if (linker_file_foreach(linker_file_iterator, "vmm.ko"))
|
||||
return (ENXIO);
|
||||
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
if (pptdev(bus, slot, func))
|
||||
return (0);
|
||||
else
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_blackhole_attach(device_t dev)
|
||||
{
|
||||
/*
|
||||
* We never really want to claim the devices but just want to prevent
|
||||
* other drivers from getting to them.
|
||||
*/
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static device_method_t pci_blackhole_methods[] = {
|
||||
/* Device interface */
|
||||
DEVMETHOD(device_probe, pci_blackhole_probe),
|
||||
DEVMETHOD(device_attach, pci_blackhole_attach),
|
||||
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
static driver_t pci_blackhole_driver = {
|
||||
"blackhole",
|
||||
pci_blackhole_methods,
|
||||
};
|
||||
|
||||
devclass_t blackhole_devclass;
|
||||
|
||||
DRIVER_MODULE(blackhole, pci, pci_blackhole_driver, blackhole_devclass, 0, 0);
|
||||
MODULE_DEPEND(blackhole, pci, 1, 1, 1);
|
240
sys/dev/bvm/bvm_console.c
Normal file
240
sys/dev/bvm/bvm_console.c
Normal file
@ -0,0 +1,240 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/cons.h>
|
||||
#include <sys/tty.h>
|
||||
#include <sys/reboot.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <sys/kdb.h>
|
||||
#include <ddb/ddb.h>
|
||||
|
||||
#ifndef BVMCONS_POLL_HZ
|
||||
#define BVMCONS_POLL_HZ 4
|
||||
#endif
|
||||
#define BVMBURSTLEN 16 /* max number of bytes to write in one chunk */
|
||||
|
||||
static tsw_open_t bvm_tty_open;
|
||||
static tsw_close_t bvm_tty_close;
|
||||
static tsw_outwakeup_t bvm_tty_outwakeup;
|
||||
|
||||
static struct ttydevsw bvm_ttydevsw = {
|
||||
.tsw_flags = TF_NOPREFIX,
|
||||
.tsw_open = bvm_tty_open,
|
||||
.tsw_close = bvm_tty_close,
|
||||
.tsw_outwakeup = bvm_tty_outwakeup,
|
||||
};
|
||||
|
||||
static int polltime;
|
||||
static struct callout_handle bvm_timeouthandle
|
||||
= CALLOUT_HANDLE_INITIALIZER(&bvm_timeouthandle);
|
||||
|
||||
#if defined(KDB)
|
||||
static int alt_break_state;
|
||||
#endif
|
||||
|
||||
#define BVM_CONS_PORT 0x220
|
||||
static int bvm_cons_port = BVM_CONS_PORT;
|
||||
|
||||
#define BVM_CONS_SIG ('b' << 8 | 'v')
|
||||
|
||||
static void bvm_timeout(void *);
|
||||
|
||||
static cn_probe_t bvm_cnprobe;
|
||||
static cn_init_t bvm_cninit;
|
||||
static cn_term_t bvm_cnterm;
|
||||
static cn_getc_t bvm_cngetc;
|
||||
static cn_putc_t bvm_cnputc;
|
||||
static cn_grab_t bvm_cngrab;
|
||||
static cn_ungrab_t bvm_cnungrab;
|
||||
|
||||
CONSOLE_DRIVER(bvm);
|
||||
|
||||
static int
|
||||
bvm_rcons(u_char *ch)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = inl(bvm_cons_port);
|
||||
if (c != -1) {
|
||||
*ch = (u_char)c;
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_wcons(u_char ch)
|
||||
{
|
||||
|
||||
outl(bvm_cons_port, ch);
|
||||
}
|
||||
|
||||
static void
|
||||
cn_drvinit(void *unused)
|
||||
{
|
||||
struct tty *tp;
|
||||
|
||||
if (bvm_consdev.cn_pri != CN_DEAD &&
|
||||
bvm_consdev.cn_name[0] != '\0') {
|
||||
tp = tty_alloc(&bvm_ttydevsw, NULL);
|
||||
tty_makedev(tp, NULL, "bvmcons");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
bvm_tty_open(struct tty *tp)
|
||||
{
|
||||
polltime = hz / BVMCONS_POLL_HZ;
|
||||
if (polltime < 1)
|
||||
polltime = 1;
|
||||
bvm_timeouthandle = timeout(bvm_timeout, tp, polltime);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_tty_close(struct tty *tp)
|
||||
{
|
||||
|
||||
/* XXX Should be replaced with callout_stop(9) */
|
||||
untimeout(bvm_timeout, tp, bvm_timeouthandle);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_tty_outwakeup(struct tty *tp)
|
||||
{
|
||||
int len, written;
|
||||
u_char buf[BVMBURSTLEN];
|
||||
|
||||
for (;;) {
|
||||
len = ttydisc_getc(tp, buf, sizeof(buf));
|
||||
if (len == 0)
|
||||
break;
|
||||
|
||||
written = 0;
|
||||
while (written < len)
|
||||
bvm_wcons(buf[written++]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_timeout(void *v)
|
||||
{
|
||||
struct tty *tp;
|
||||
int c;
|
||||
|
||||
tp = (struct tty *)v;
|
||||
|
||||
tty_lock(tp);
|
||||
while ((c = bvm_cngetc(NULL)) != -1)
|
||||
ttydisc_rint(tp, c, 0);
|
||||
ttydisc_rint_done(tp);
|
||||
tty_unlock(tp);
|
||||
|
||||
bvm_timeouthandle = timeout(bvm_timeout, tp, polltime);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cnprobe(struct consdev *cp)
|
||||
{
|
||||
int disabled, port;
|
||||
|
||||
disabled = 0;
|
||||
cp->cn_pri = CN_DEAD;
|
||||
|
||||
resource_int_value("bvmconsole", 0, "disabled", &disabled);
|
||||
if (!disabled) {
|
||||
if (resource_int_value("bvmconsole", 0, "port", &port) == 0)
|
||||
bvm_cons_port = port;
|
||||
|
||||
if (inw(bvm_cons_port) == BVM_CONS_SIG)
|
||||
cp->cn_pri = CN_REMOTE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cninit(struct consdev *cp)
|
||||
{
|
||||
int i;
|
||||
const char *bootmsg = "Using bvm console.\n";
|
||||
|
||||
if (boothowto & RB_VERBOSE) {
|
||||
for (i = 0; i < strlen(bootmsg); i++)
|
||||
bvm_cnputc(cp, bootmsg[i]);
|
||||
}
|
||||
|
||||
strcpy(cp->cn_name, "bvmcons");
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cnterm(struct consdev *cp)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
bvm_cngetc(struct consdev *cp)
|
||||
{
|
||||
unsigned char ch;
|
||||
|
||||
if (bvm_rcons(&ch) == 0) {
|
||||
#if defined(KDB)
|
||||
kdb_alt_break(ch, &alt_break_state);
|
||||
#endif
|
||||
return (ch);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cnputc(struct consdev *cp, int c)
|
||||
{
|
||||
|
||||
bvm_wcons(c);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cngrab(struct consdev *cp)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_cnungrab(struct consdev *cp)
|
||||
{
|
||||
}
|
||||
|
||||
SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL);
|
100
sys/dev/bvm/bvm_dbg.c
Normal file
100
sys/dev/bvm/bvm_dbg.c
Normal file
@ -0,0 +1,100 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <gdb/gdb.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
static gdb_probe_f bvm_dbg_probe;
|
||||
static gdb_init_f bvm_dbg_init;
|
||||
static gdb_term_f bvm_dbg_term;
|
||||
static gdb_getc_f bvm_dbg_getc;
|
||||
static gdb_putc_f bvm_dbg_putc;
|
||||
|
||||
GDB_DBGPORT(bvm, bvm_dbg_probe, bvm_dbg_init, bvm_dbg_term,
|
||||
bvm_dbg_getc, bvm_dbg_putc);
|
||||
|
||||
#define BVM_DBG_PORT 0x224
|
||||
static int bvm_dbg_port = BVM_DBG_PORT;
|
||||
|
||||
#define BVM_DBG_SIG ('B' << 8 | 'V')
|
||||
|
||||
static int
|
||||
bvm_dbg_probe(void)
|
||||
{
|
||||
int disabled, port;
|
||||
|
||||
disabled = 0;
|
||||
resource_int_value("bvmdbg", 0, "disabled", &disabled);
|
||||
|
||||
if (!disabled) {
|
||||
if (resource_int_value("bvmdbg", 0, "port", &port) == 0)
|
||||
bvm_dbg_port = port;
|
||||
|
||||
if (inw(bvm_dbg_port) == BVM_DBG_SIG) {
|
||||
/*
|
||||
* Return a higher priority than 0 to override other
|
||||
* gdb dbgport providers that may be present (e.g. uart)
|
||||
*/
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_dbg_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_dbg_term(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
bvm_dbg_putc(int c)
|
||||
{
|
||||
|
||||
outl(bvm_dbg_port, c);
|
||||
}
|
||||
|
||||
static int
|
||||
bvm_dbg_getc(void)
|
||||
{
|
||||
|
||||
return (inl(bvm_dbg_port));
|
||||
}
|
@ -48,6 +48,7 @@ SUBDIR= \
|
||||
${_bxe} \
|
||||
${_bios} \
|
||||
${_bktr} \
|
||||
${_blackhole} \
|
||||
${_bm} \
|
||||
bridgestp \
|
||||
bwi \
|
||||
@ -335,6 +336,7 @@ SUBDIR= \
|
||||
vge \
|
||||
${_viawd} \
|
||||
vkbd \
|
||||
${_vmm} \
|
||||
${_vpo} \
|
||||
vr \
|
||||
vte \
|
||||
@ -624,6 +626,7 @@ _amdtemp= amdtemp
|
||||
_arcmsr= arcmsr
|
||||
_asmc= asmc
|
||||
_bktr= bktr
|
||||
_blackhole= blackhole
|
||||
_bxe= bxe
|
||||
_cardbus= cardbus
|
||||
_cbb= cbb
|
||||
@ -720,6 +723,7 @@ _twa= twa
|
||||
_vesa= vesa
|
||||
_viawd= viawd
|
||||
_virtio= virtio
|
||||
_vmm= vmm
|
||||
_vxge= vxge
|
||||
_x86bios= x86bios
|
||||
_wbwd= wbwd
|
||||
|
9
sys/modules/blackhole/Makefile
Normal file
9
sys/modules/blackhole/Makefile
Normal file
@ -0,0 +1,9 @@
|
||||
# $FreeBSD$
|
||||
|
||||
.PATH: ${.CURDIR}/../../dev/blackhole
|
||||
|
||||
KMOD= blackhole
|
||||
SRCS= blackhole.c
|
||||
SRCS+= bus_if.h device_if.h pci_if.h
|
||||
|
||||
.include <bsd.kmod.mk>
|
62
sys/modules/vmm/Makefile
Normal file
62
sys/modules/vmm/Makefile
Normal file
@ -0,0 +1,62 @@
|
||||
# $FreeBSD$
|
||||
|
||||
KMOD= vmm
|
||||
|
||||
SRCS= opt_ddb.h device_if.h bus_if.h pci_if.h
|
||||
|
||||
CFLAGS+= -DVMM_KEEP_STATS -DSMP
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/io
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel
|
||||
|
||||
# generic vmm support
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm
|
||||
SRCS+= vmm.c \
|
||||
vmm_dev.c \
|
||||
vmm_host.c \
|
||||
vmm_instruction_emul.c \
|
||||
vmm_ipi.c \
|
||||
vmm_lapic.c \
|
||||
vmm_mem.c \
|
||||
vmm_msr.c \
|
||||
vmm_stat.c \
|
||||
vmm_util.c \
|
||||
x86.c \
|
||||
vmm_support.S
|
||||
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/io
|
||||
SRCS+= iommu.c \
|
||||
ppt.c \
|
||||
vdev.c \
|
||||
vlapic.c
|
||||
|
||||
# intel-specific files
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/intel
|
||||
SRCS+= ept.c \
|
||||
vmcs.c \
|
||||
vmx_msr.c \
|
||||
vmx.c \
|
||||
vtd.c
|
||||
|
||||
# amd-specific files
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/amd
|
||||
SRCS+= amdv.c
|
||||
|
||||
OBJS= vmx_support.o
|
||||
|
||||
CLEANFILES= vmx_assym.s vmx_genassym.o
|
||||
|
||||
vmx_assym.s: vmx_genassym.o
|
||||
.if exists(@)
|
||||
vmx_assym.s: @/kern/genassym.sh
|
||||
.endif
|
||||
sh @/kern/genassym.sh vmx_genassym.o > ${.TARGET}
|
||||
|
||||
vmx_support.o: vmx_support.S vmx_assym.s
|
||||
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
|
||||
${.IMPSRC} -o ${.TARGET}
|
||||
|
||||
vmx_genassym.o: vmx_genassym.c @ machine x86
|
||||
${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC}
|
||||
|
||||
.include <bsd.kmod.mk>
|
@ -10,6 +10,9 @@ SUBDIR+= acpi
|
||||
SUBDIR+= apm
|
||||
.endif
|
||||
SUBDIR+= asf
|
||||
SUBDIR+= bhyve
|
||||
SUBDIR+= bhyvectl
|
||||
SUBDIR+= bhyveload
|
||||
SUBDIR+= boot0cfg
|
||||
.if ${MK_TOOLCHAIN} != "no"
|
||||
SUBDIR+= btxld
|
||||
|
27
usr.sbin/bhyve/Makefile
Normal file
27
usr.sbin/bhyve/Makefile
Normal file
@ -0,0 +1,27 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= bhyve
|
||||
|
||||
DEBUG_FLAGS= -g -O0
|
||||
|
||||
SRCS= acpi.c atpic.c bhyverun.c consport.c dbgport.c elcr.c inout.c
|
||||
SRCS+= ioapic.c mem.c mevent.c mptbl.c
|
||||
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
|
||||
SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c uart.c
|
||||
SRCS+= xmsr.c spinup_ap.c
|
||||
|
||||
.PATH: ${.CURDIR}/../../sys/amd64/vmm
|
||||
SRCS+= vmm_instruction_emul.c
|
||||
|
||||
NO_MAN=
|
||||
|
||||
DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD}
|
||||
LDADD= -lvmmapi -lmd -lpthread
|
||||
|
||||
WARNS?= 2
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../sys
|
||||
|
||||
.include <bsd.prog.mk>
|
844
usr.sbin/bhyve/acpi.c
Normal file
844
usr.sbin/bhyve/acpi.c
Normal file
@ -0,0 +1,844 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* bhyve ACPI table generator.
|
||||
*
|
||||
* Create the minimal set of ACPI tables required to boot FreeBSD (and
|
||||
* hopefully other o/s's) by writing out ASL template files for each of
|
||||
* the tables and the compiling them to AML with the Intel iasl compiler.
|
||||
* The AML files are then read into guest memory.
|
||||
*
|
||||
* The tables are placed in the guest's ROM area just below 1MB physical,
|
||||
* above the MPTable.
|
||||
*
|
||||
* Layout
|
||||
* ------
|
||||
* RSDP -> 0xf0400 (36 bytes fixed)
|
||||
* RSDT -> 0xf0440 (36 bytes + 4*N table addrs, 2 used)
|
||||
* XSDT -> 0xf0480 (36 bytes + 8*N table addrs, 2 used)
|
||||
* MADT -> 0xf0500 (depends on #CPUs)
|
||||
* FADT -> 0xf0600 (268 bytes)
|
||||
* FACS -> 0xf0780 (64 bytes)
|
||||
* DSDT -> 0xf0800 (variable - can go up to 0x100000)
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <paths.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "acpi.h"
|
||||
|
||||
/*
|
||||
* Define the base address of the ACPI tables, and the offsets to
|
||||
* the individual tables
|
||||
*/
|
||||
#define BHYVE_ACPI_BASE 0xf0400
|
||||
#define RSDT_OFFSET 0x040
|
||||
#define XSDT_OFFSET 0x080
|
||||
#define MADT_OFFSET 0x100
|
||||
#define FADT_OFFSET 0x200
|
||||
#define FACS_OFFSET 0x380
|
||||
#define DSDT_OFFSET 0x400
|
||||
|
||||
#define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX"
|
||||
#define BHYVE_ASL_SUFFIX ".aml"
|
||||
#define BHYVE_ASL_COMPILER "/usr/sbin/iasl"
|
||||
|
||||
#define BHYVE_PM_TIMER_ADDR 0x408
|
||||
|
||||
static int basl_keep_temps;
|
||||
static int basl_verbose_iasl;
|
||||
static int basl_ncpu;
|
||||
static uint32_t basl_acpi_base = BHYVE_ACPI_BASE;
|
||||
|
||||
/*
|
||||
* Contains the full pathname of the template to be passed
|
||||
* to mkstemp/mktemps(3)
|
||||
*/
|
||||
static char basl_template[MAXPATHLEN];
|
||||
static char basl_stemplate[MAXPATHLEN];
|
||||
|
||||
struct basl_fio {
|
||||
int fd;
|
||||
FILE *fp;
|
||||
char f_name[MAXPATHLEN];
|
||||
};
|
||||
|
||||
#define EFPRINTF(...) \
|
||||
err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit;
|
||||
|
||||
#define EFFLUSH(x) \
|
||||
err = fflush(x); if (err != 0) goto err_exit;
|
||||
|
||||
static int
|
||||
basl_fwrite_rsdp(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve RSDP template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n");
|
||||
EFPRINTF(fp, "[0001]\t\tChecksum : 43\n");
|
||||
EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRevision : 02\n");
|
||||
EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n",
|
||||
basl_acpi_base + RSDT_OFFSET);
|
||||
EFPRINTF(fp, "[0004]\t\tLength : 00000024\n");
|
||||
EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n",
|
||||
basl_acpi_base + XSDT_OFFSET);
|
||||
EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n");
|
||||
EFPRINTF(fp, "[0003]\t\tReserved : 000000\n");
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_rsdt(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve RSDT template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
|
||||
EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
|
||||
EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
|
||||
EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT \"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
|
||||
/* iasl will fill in the compiler ID/revision fields */
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
/* Add in pointers to the MADT and FADT */
|
||||
EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n",
|
||||
basl_acpi_base + MADT_OFFSET);
|
||||
EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n",
|
||||
basl_acpi_base + FADT_OFFSET);
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_xsdt(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve XSDT template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
|
||||
EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
|
||||
EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
|
||||
EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT \"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
|
||||
/* iasl will fill in the compiler ID/revision fields */
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
/* Add in pointers to the MADT and FADT */
|
||||
EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n",
|
||||
basl_acpi_base + MADT_OFFSET);
|
||||
EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n",
|
||||
basl_acpi_base + FADT_OFFSET);
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_madt(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
int i;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve MADT template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRevision : 01\n");
|
||||
EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
|
||||
EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
|
||||
EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT \"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
|
||||
|
||||
/* iasl will fill in the compiler ID/revision fields */
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n");
|
||||
EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
/* Add a Processor Local APIC entry for each CPU */
|
||||
for (i = 0; i < basl_ncpu; i++) {
|
||||
EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tLength : 08\n");
|
||||
EFPRINTF(fp, "[0001]\t\tProcessor ID : %02d\n", i);
|
||||
EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02d\n", i);
|
||||
EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n");
|
||||
EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
}
|
||||
|
||||
/* Always a single IOAPIC entry, with ID ncpu+1 */
|
||||
EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n");
|
||||
EFPRINTF(fp, "[0001]\t\tLength : 0C\n");
|
||||
EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02d\n", basl_ncpu);
|
||||
EFPRINTF(fp, "[0001]\t\tReserved : 00\n");
|
||||
EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
/* Override the 8259 chained vector. XXX maybe not needed */
|
||||
EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n");
|
||||
EFPRINTF(fp, "[0001]\t\tLength : 0A\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBus : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSource : 09\n");
|
||||
EFPRINTF(fp, "[0004]\t\tInterrupt : 00000009\n");
|
||||
EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n");
|
||||
EFPRINTF(fp, "\t\t\tPolarity : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tTrigger Mode : 0\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_fadt(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve FADT template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRevision : 05\n");
|
||||
EFPRINTF(fp, "[0001]\t\tChecksum : 00\n");
|
||||
EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n");
|
||||
EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP \"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n");
|
||||
/* iasl will fill in the compiler ID/revision fields */
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n",
|
||||
basl_acpi_base + FACS_OFFSET);
|
||||
EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n",
|
||||
basl_acpi_base + DSDT_OFFSET);
|
||||
EFPRINTF(fp, "[0001]\t\tModel : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n");
|
||||
EFPRINTF(fp, "[0002]\t\tSCI Interrupt : 0009\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSMI Command Port : 00000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tACPI Enable Value : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tACPI Disable Value : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n",
|
||||
BHYVE_PM_TIMER_ADDR);
|
||||
EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n");
|
||||
EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n");
|
||||
EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n");
|
||||
EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n");
|
||||
EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n");
|
||||
EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n");
|
||||
EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n");
|
||||
EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n");
|
||||
EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n");
|
||||
EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n");
|
||||
EFPRINTF(fp, "[0001]\t\tReserved : 00\n");
|
||||
EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n");
|
||||
EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n");
|
||||
EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n");
|
||||
EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tReset Register : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp, "[0001]\t\tValue to cause reset : 00\n");
|
||||
EFPRINTF(fp, "[0003]\t\tReserved : 000000\n");
|
||||
EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n",
|
||||
basl_acpi_base + FACS_OFFSET);
|
||||
EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n",
|
||||
basl_acpi_base + DSDT_OFFSET);
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM1A Event Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 20\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM1B Event Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp,
|
||||
"[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM1A Control Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 10\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM1B Control Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp,
|
||||
"[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM2 Control Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp,
|
||||
"[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
/* Valid for bhyve */
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tPM Timer Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 32\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp,
|
||||
"[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
|
||||
BHYVE_PM_TIMER_ADDR);
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 80\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp,
|
||||
"[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tSleep Control Register : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
|
||||
EFPRINTF(fp,
|
||||
"[0012]\t\tSleep Status Register : [Generic Address Structure]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Width : 08\n");
|
||||
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
|
||||
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
|
||||
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_facs(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve FACS template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n");
|
||||
EFPRINTF(fp, "[0004]\t\tLength : 00000040\n");
|
||||
EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n");
|
||||
EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n");
|
||||
EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n");
|
||||
EFPRINTF(fp,
|
||||
"[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n");
|
||||
EFPRINTF(fp, "[0001]\t\tVersion : 02\n");
|
||||
EFPRINTF(fp, "[0003]\t\tReserved : 000000\n");
|
||||
EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n");
|
||||
EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n");
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_fwrite_dsdt(FILE *fp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
EFPRINTF(fp, "/*\n");
|
||||
EFPRINTF(fp, " * bhyve DSDT template\n");
|
||||
EFPRINTF(fp, " */\n");
|
||||
EFPRINTF(fp, "DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2,"
|
||||
"\"BHYVE \", \"BVDSDT \", 0x00000001)\n");
|
||||
EFPRINTF(fp, "{\n");
|
||||
EFPRINTF(fp, " Scope (_SB)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Device (PCI0)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0A03\"))\n");
|
||||
EFPRINTF(fp, " Name (_ADR, Zero)\n");
|
||||
EFPRINTF(fp, " Name (_UID, One)\n");
|
||||
EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " WordBusNumber (ResourceProducer, MinFixed,"
|
||||
"MaxFixed, PosDecode,\n");
|
||||
EFPRINTF(fp, " 0x0000, // Granularity\n");
|
||||
EFPRINTF(fp, " 0x0000, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0x00FF, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x0000, // Transl Offset\n");
|
||||
EFPRINTF(fp, " 0x0100, // Length\n");
|
||||
EFPRINTF(fp, " ,, )\n");
|
||||
EFPRINTF(fp, " IO (Decode16,\n");
|
||||
EFPRINTF(fp, " 0x0CF8, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0x0CF8, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x01, // Alignment\n");
|
||||
EFPRINTF(fp, " 0x08, // Length\n");
|
||||
EFPRINTF(fp, " )\n");
|
||||
EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed,"
|
||||
"PosDecode, EntireRange,\n");
|
||||
EFPRINTF(fp, " 0x0000, // Granularity\n");
|
||||
EFPRINTF(fp, " 0x0000, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0x0CF7, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x0000, // Transl Offset\n");
|
||||
EFPRINTF(fp, " 0x0CF8, // Length\n");
|
||||
EFPRINTF(fp, " ,, , TypeStatic)\n");
|
||||
EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed,"
|
||||
"PosDecode, EntireRange,\n");
|
||||
EFPRINTF(fp, " 0x0000, // Granularity\n");
|
||||
EFPRINTF(fp, " 0x0D00, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0xFFFF, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x0000, // Transl Offset\n");
|
||||
EFPRINTF(fp, " 0xF300, // Length\n");
|
||||
EFPRINTF(fp, " ,, , TypeStatic)\n");
|
||||
EFPRINTF(fp, " })\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
EFPRINTF(fp, " Scope (_SB.PCI0)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Device (ISA)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Name (_ADR, 0x00010000)\n");
|
||||
EFPRINTF(fp, " OperationRegion (P40C, PCI_Config, 0x60, 0x04)\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, "\n");
|
||||
EFPRINTF(fp, " Scope (_SB.PCI0.ISA)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Device (RTC)\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0B00\"))\n");
|
||||
EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n");
|
||||
EFPRINTF(fp, " {\n");
|
||||
EFPRINTF(fp, " IO (Decode16,\n");
|
||||
EFPRINTF(fp, " 0x0070, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0x0070, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x10, // Alignment\n");
|
||||
EFPRINTF(fp, " 0x02, // Length\n");
|
||||
EFPRINTF(fp, " )\n");
|
||||
EFPRINTF(fp, " IRQNoFlags ()\n");
|
||||
EFPRINTF(fp, " {8}\n");
|
||||
EFPRINTF(fp, " IO (Decode16,\n");
|
||||
EFPRINTF(fp, " 0x0072, // Range Minimum\n");
|
||||
EFPRINTF(fp, " 0x0072, // Range Maximum\n");
|
||||
EFPRINTF(fp, " 0x02, // Alignment\n");
|
||||
EFPRINTF(fp, " 0x06, // Length\n");
|
||||
EFPRINTF(fp, " )\n");
|
||||
EFPRINTF(fp, " })\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, " }\n");
|
||||
EFPRINTF(fp, "}\n");
|
||||
|
||||
EFFLUSH(fp);
|
||||
|
||||
return (0);
|
||||
|
||||
err_exit:
|
||||
return (errno);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_open(struct basl_fio *bf, int suffix)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
if (suffix) {
|
||||
strncpy(bf->f_name, basl_stemplate, MAXPATHLEN);
|
||||
bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX));
|
||||
} else {
|
||||
strncpy(bf->f_name, basl_template, MAXPATHLEN);
|
||||
bf->fd = mkstemp(bf->f_name);
|
||||
}
|
||||
|
||||
if (bf->fd > 0) {
|
||||
bf->fp = fdopen(bf->fd, "w+");
|
||||
if (bf->fp == NULL) {
|
||||
unlink(bf->f_name);
|
||||
close(bf->fd);
|
||||
}
|
||||
} else {
|
||||
err = 1;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
basl_close(struct basl_fio *bf)
|
||||
{
|
||||
|
||||
if (!basl_keep_temps)
|
||||
unlink(bf->f_name);
|
||||
fclose(bf->fp);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_start(struct basl_fio *in, struct basl_fio *out)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = basl_open(in, 0);
|
||||
if (!err) {
|
||||
err = basl_open(out, 1);
|
||||
if (err) {
|
||||
basl_close(in);
|
||||
}
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
basl_end(struct basl_fio *in, struct basl_fio *out)
|
||||
{
|
||||
|
||||
basl_close(in);
|
||||
basl_close(out);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_load(int fd, uint64_t off)
|
||||
{
|
||||
struct stat sb;
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
if (fstat(fd, &sb) < 0 ||
|
||||
read(fd, paddr_guest2host(basl_acpi_base + off), sb.st_size) < 0)
|
||||
err = errno;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_compile(int (*fwrite_section)(FILE *fp), uint64_t offset)
|
||||
{
|
||||
struct basl_fio io[2];
|
||||
static char iaslbuf[3*MAXPATHLEN + 10];
|
||||
char *fmt;
|
||||
int err;
|
||||
|
||||
err = basl_start(&io[0], &io[1]);
|
||||
if (!err) {
|
||||
err = (*fwrite_section)(io[0].fp);
|
||||
|
||||
if (!err) {
|
||||
/*
|
||||
* iasl sends the results of the compilation to
|
||||
* stdout. Shut this down by using the shell to
|
||||
* redirect stdout to /dev/null, unless the user
|
||||
* has requested verbose output for debugging
|
||||
* purposes
|
||||
*/
|
||||
fmt = basl_verbose_iasl ?
|
||||
"%s -p %s %s" :
|
||||
"/bin/sh -c \"%s -p %s %s\" 1> /dev/null";
|
||||
|
||||
snprintf(iaslbuf, sizeof(iaslbuf),
|
||||
fmt,
|
||||
BHYVE_ASL_COMPILER,
|
||||
io[1].f_name, io[0].f_name);
|
||||
err = system(iaslbuf);
|
||||
|
||||
if (!err) {
|
||||
/*
|
||||
* Copy the aml output file into guest
|
||||
* memory at the specified location
|
||||
*/
|
||||
err = basl_load(io[1].fd, offset);
|
||||
}
|
||||
}
|
||||
basl_end(&io[0], &io[1]);
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
basl_make_templates(void)
|
||||
{
|
||||
const char *tmpdir;
|
||||
int err;
|
||||
int len;
|
||||
|
||||
err = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' ||
|
||||
(tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') {
|
||||
tmpdir = _PATH_TMP;
|
||||
}
|
||||
|
||||
len = strlen(tmpdir);
|
||||
|
||||
if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) {
|
||||
strcpy(basl_template, tmpdir);
|
||||
while (len > 0 && basl_template[len - 1] == '/')
|
||||
len--;
|
||||
basl_template[len] = '/';
|
||||
strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE);
|
||||
} else
|
||||
err = E2BIG;
|
||||
|
||||
if (!err) {
|
||||
/*
|
||||
* len has been intialized (and maybe adjusted) above
|
||||
*/
|
||||
if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 +
|
||||
sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) {
|
||||
strcpy(basl_stemplate, tmpdir);
|
||||
basl_stemplate[len] = '/';
|
||||
strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE);
|
||||
len = strlen(basl_stemplate);
|
||||
strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX);
|
||||
} else
|
||||
err = E2BIG;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static struct {
|
||||
int (*wsect)(FILE *fp);
|
||||
uint64_t offset;
|
||||
} basl_ftables[] =
|
||||
{
|
||||
{ basl_fwrite_rsdp, 0},
|
||||
{ basl_fwrite_rsdt, RSDT_OFFSET },
|
||||
{ basl_fwrite_xsdt, XSDT_OFFSET },
|
||||
{ basl_fwrite_madt, MADT_OFFSET },
|
||||
{ basl_fwrite_fadt, FADT_OFFSET },
|
||||
{ basl_fwrite_facs, FACS_OFFSET },
|
||||
{ basl_fwrite_dsdt, DSDT_OFFSET },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
int
|
||||
acpi_build(struct vmctx *ctx, int ncpu, int ioapic)
|
||||
{
|
||||
int err;
|
||||
int i;
|
||||
|
||||
err = 0;
|
||||
basl_ncpu = ncpu;
|
||||
|
||||
if (!ioapic) {
|
||||
fprintf(stderr, "ACPI tables require an ioapic\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* For debug, allow the user to have iasl compiler output sent
|
||||
* to stdout rather than /dev/null
|
||||
*/
|
||||
if (getenv("BHYVE_ACPI_VERBOSE_IASL"))
|
||||
basl_verbose_iasl = 1;
|
||||
|
||||
/*
|
||||
* Allow the user to keep the generated ASL files for debugging
|
||||
* instead of deleting them following use
|
||||
*/
|
||||
if (getenv("BHYVE_ACPI_KEEPTMPS"))
|
||||
basl_keep_temps = 1;
|
||||
|
||||
i = 0;
|
||||
err = basl_make_templates();
|
||||
|
||||
/*
|
||||
* Run through all the ASL files, compiling them and
|
||||
* copying them into guest memory
|
||||
*/
|
||||
while (!err && basl_ftables[i].wsect != NULL) {
|
||||
err = basl_compile(basl_ftables[i].wsect,
|
||||
basl_ftables[i].offset);
|
||||
i++;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
34
usr.sbin/bhyve/acpi.h
Normal file
34
usr.sbin/bhyve/acpi.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _ACPI_H_
|
||||
#define _ACPI_H_
|
||||
|
||||
int acpi_build(struct vmctx *ctx, int ncpu, int ioapic);
|
||||
|
||||
#endif /* _ACPI_H_ */
|
68
usr.sbin/bhyve/atpic.c
Normal file
68
usr.sbin/bhyve/atpic.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* FreeBSD only writes to the 8259 interrupt controllers to put them in a
|
||||
* shutdown state.
|
||||
*
|
||||
* So, we just ignore the writes.
|
||||
*/
|
||||
|
||||
#define IO_ICU1 0x20
|
||||
#define IO_ICU2 0xA0
|
||||
#define ICU_IMR_OFFSET 1
|
||||
|
||||
static int
|
||||
atpic_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
if (in)
|
||||
return (-1);
|
||||
|
||||
/* Pretend all writes to the 8259 are alright */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(atpic, IO_ICU1, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU1 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
788
usr.sbin/bhyve/bhyverun.c
Normal file
788
usr.sbin/bhyve/bhyverun.c
Normal file
@ -0,0 +1,788 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/segments.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <libgen.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "acpi.h"
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
#include "mem.h"
|
||||
#include "mevent.h"
|
||||
#include "mptbl.h"
|
||||
#include "pci_emul.h"
|
||||
#include "xmsr.h"
|
||||
#include "ioapic.h"
|
||||
#include "spinup_ap.h"
|
||||
|
||||
#define DEFAULT_GUEST_HZ 100
|
||||
#define DEFAULT_GUEST_TSLICE 200
|
||||
|
||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
||||
|
||||
#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */
|
||||
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
|
||||
#define VMEXIT_RESTART 2 /* restart current instruction */
|
||||
#define VMEXIT_ABORT 3 /* abort the vm run loop */
|
||||
#define VMEXIT_RESET 4 /* guest machine has reset */
|
||||
|
||||
#define MB (1024UL * 1024)
|
||||
#define GB (1024UL * MB)
|
||||
|
||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
|
||||
int guest_tslice = DEFAULT_GUEST_TSLICE;
|
||||
int guest_hz = DEFAULT_GUEST_HZ;
|
||||
char *vmname;
|
||||
|
||||
u_long lomem_sz;
|
||||
u_long himem_sz;
|
||||
|
||||
int guest_ncpus;
|
||||
|
||||
static int pincpu = -1;
|
||||
static int guest_vcpu_mux;
|
||||
static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
|
||||
|
||||
static int foundcpus;
|
||||
|
||||
static int strictio;
|
||||
|
||||
static int acpi;
|
||||
|
||||
static char *lomem_addr;
|
||||
static char *himem_addr;
|
||||
|
||||
static char *progname;
|
||||
static const int BSP = 0;
|
||||
|
||||
static int cpumask;
|
||||
|
||||
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
|
||||
|
||||
struct vm_exit vmexit[VM_MAXCPU];
|
||||
|
||||
struct fbsdstats {
|
||||
uint64_t vmexit_bogus;
|
||||
uint64_t vmexit_bogus_switch;
|
||||
uint64_t vmexit_hlt;
|
||||
uint64_t vmexit_pause;
|
||||
uint64_t vmexit_mtrap;
|
||||
uint64_t vmexit_paging;
|
||||
uint64_t cpu_switch_rotate;
|
||||
uint64_t cpu_switch_direct;
|
||||
int io_reset;
|
||||
} stats;
|
||||
|
||||
struct mt_vmm_info {
|
||||
pthread_t mt_thr;
|
||||
struct vmctx *mt_ctx;
|
||||
int mt_vcpu;
|
||||
} mt_vmm_info[VM_MAXCPU];
|
||||
|
||||
static void
|
||||
usage(int code)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-aehABHIP][-g <gdb port>][-z <hz>][-s <pci>]"
|
||||
"[-S <pci>][-p pincpu][-n <pci>][-m lowmem][-M highmem] <vm>\n"
|
||||
" -a: local apic is in XAPIC mode (default is X2APIC)\n"
|
||||
" -A: create an ACPI table\n"
|
||||
" -g: gdb port (default is %d and 0 means don't open)\n"
|
||||
" -c: # cpus (default 1)\n"
|
||||
" -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
|
||||
" -B: inject breakpoint exception on vm entry\n"
|
||||
" -H: vmexit from the guest on hlt\n"
|
||||
" -I: present an ioapic to the guest\n"
|
||||
" -P: vmexit from the guest on pause\n"
|
||||
" -e: exit on unhandled i/o access\n"
|
||||
" -h: help\n"
|
||||
" -z: guest hz (default is %d)\n"
|
||||
" -s: <slot,driver,configinfo> PCI slot config\n"
|
||||
" -S: <slot,driver,configinfo> legacy PCI slot config\n"
|
||||
" -m: lowmem in MB\n"
|
||||
" -M: highmem in MB\n"
|
||||
" -x: mux vcpus to 1 hcpu\n"
|
||||
" -t: mux vcpu timeslice hz (default %d)\n",
|
||||
progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
|
||||
DEFAULT_GUEST_TSLICE);
|
||||
exit(code);
|
||||
}
|
||||
|
||||
void *
|
||||
paddr_guest2host(uintptr_t gaddr)
|
||||
{
|
||||
if (lomem_sz == 0)
|
||||
return (NULL);
|
||||
|
||||
if (gaddr < lomem_sz) {
|
||||
return ((void *)(lomem_addr + gaddr));
|
||||
} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
|
||||
return ((void *)(himem_addr + gaddr - 4*GB));
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_disable_x2apic(void)
|
||||
{
|
||||
|
||||
return (disable_x2apic);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_pause(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_pause);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_hlt(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_hlt);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_muxed(void)
|
||||
{
|
||||
|
||||
return (guest_vcpu_mux);
|
||||
}
|
||||
|
||||
static void *
|
||||
fbsdrun_start_thread(void *param)
|
||||
{
|
||||
char tname[MAXCOMLEN + 1];
|
||||
struct mt_vmm_info *mtp;
|
||||
int vcpu;
|
||||
|
||||
mtp = param;
|
||||
vcpu = mtp->mt_vcpu;
|
||||
|
||||
snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
|
||||
pthread_set_name_np(mtp->mt_thr, tname);
|
||||
|
||||
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
|
||||
|
||||
/* not reached */
|
||||
exit(1);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (cpumask & (1 << vcpu)) {
|
||||
fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
|
||||
vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cpumask |= 1 << vcpu;
|
||||
foundcpus++;
|
||||
|
||||
/*
|
||||
* Set up the vmexit struct to allow execution to start
|
||||
* at the given RIP
|
||||
*/
|
||||
vmexit[vcpu].rip = rip;
|
||||
vmexit[vcpu].inst_length = 0;
|
||||
|
||||
if (vcpu == BSP || !guest_vcpu_mux){
|
||||
mt_vmm_info[vcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[vcpu].mt_vcpu = vcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fbsdrun_get_next_cpu(int curcpu)
|
||||
{
|
||||
|
||||
/*
|
||||
* Get the next available CPU. Assumes they arrive
|
||||
* in ascending order with no gaps.
|
||||
*/
|
||||
return ((curcpu + 1) % foundcpus);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_catch_reset(void)
|
||||
{
|
||||
stats.io_reset++;
|
||||
return (VMEXIT_RESET);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_catch_inout(void)
|
||||
{
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
|
||||
uint32_t eax)
|
||||
{
|
||||
#if PG_DEBUG /* put all types of debug here */
|
||||
if (eax == 0) {
|
||||
pause_noswitch = 1;
|
||||
} else if (eax == 1) {
|
||||
pause_noswitch = 0;
|
||||
} else {
|
||||
pause_noswitch = 0;
|
||||
if (eax == 5) {
|
||||
vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int error;
|
||||
int bytes, port, in, out;
|
||||
uint32_t eax;
|
||||
int vcpu;
|
||||
|
||||
vcpu = *pvcpu;
|
||||
|
||||
port = vme->u.inout.port;
|
||||
bytes = vme->u.inout.bytes;
|
||||
eax = vme->u.inout.eax;
|
||||
in = vme->u.inout.in;
|
||||
out = !in;
|
||||
|
||||
/* We don't deal with these */
|
||||
if (vme->u.inout.string || vme->u.inout.rep)
|
||||
return (VMEXIT_ABORT);
|
||||
|
||||
/* Special case of guest reset */
|
||||
if (out && port == 0x64 && (uint8_t)eax == 0xFE)
|
||||
return (vmexit_catch_reset());
|
||||
|
||||
/* Extra-special case of host notifications */
|
||||
if (out && port == GUEST_NIO_PORT)
|
||||
return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
|
||||
|
||||
error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
|
||||
if (error == 0 && in)
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
|
||||
|
||||
if (error == 0)
|
||||
return (VMEXIT_CONTINUE);
|
||||
else {
|
||||
fprintf(stderr, "Unhandled %s%c 0x%04x\n",
|
||||
in ? "in" : "out",
|
||||
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
|
||||
return (vmexit_catch_inout());
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code,
|
||||
*pvcpu);
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int newcpu;
|
||||
int retval = VMEXIT_CONTINUE;
|
||||
|
||||
newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
|
||||
|
||||
if (guest_vcpu_mux && *pvcpu != newcpu) {
|
||||
retval = VMEXIT_SWITCH;
|
||||
*pvcpu = newcpu;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int newcpu;
|
||||
int retval = VMEXIT_CONTINUE;
|
||||
|
||||
newcpu = spinup_ap(ctx, *pvcpu,
|
||||
vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
|
||||
|
||||
if (guest_vcpu_mux && *pvcpu != newcpu) {
|
||||
retval = VMEXIT_SWITCH;
|
||||
*pvcpu = newcpu;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
fprintf(stderr, "vm exit[%d]\n", *pvcpu);
|
||||
fprintf(stderr, "\treason\t\tVMX\n");
|
||||
fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
|
||||
fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
|
||||
fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error);
|
||||
fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
|
||||
fprintf(stderr, "\tqualification\t0x%016lx\n",
|
||||
vmexit->u.vmx.exit_qualification);
|
||||
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int bogus_noswitch = 1;
|
||||
|
||||
static int
|
||||
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_bogus++;
|
||||
|
||||
if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
|
||||
return (VMEXIT_RESTART);
|
||||
} else {
|
||||
stats.vmexit_bogus_switch++;
|
||||
vmexit->inst_length = 0;
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_hlt++;
|
||||
if (fbsdrun_muxed()) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
/*
|
||||
* Just continue execution with the next instruction. We use
|
||||
* the HLT VM exit as a way to be friendly with the host
|
||||
* scheduler.
|
||||
*/
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int pause_noswitch;
|
||||
|
||||
static int
|
||||
vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_pause++;
|
||||
|
||||
if (fbsdrun_muxed() && !pause_noswitch) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_mtrap++;
|
||||
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
int err;
|
||||
stats.vmexit_paging++;
|
||||
|
||||
err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa,
|
||||
&vmexit->u.paging.vie);
|
||||
|
||||
if (err) {
|
||||
if (err == EINVAL) {
|
||||
fprintf(stderr,
|
||||
"Failed to emulate instruction at 0x%lx\n",
|
||||
vmexit->rip);
|
||||
} else if (err == ESRCH) {
|
||||
fprintf(stderr, "Unhandled memory access to 0x%lx\n",
|
||||
vmexit->u.paging.gpa);
|
||||
}
|
||||
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static void
|
||||
sigalrm(int sig)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_timeslice(void)
|
||||
{
|
||||
struct sigaction sa;
|
||||
struct itimerval itv;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Setup a realtime timer to generate a SIGALRM at a
|
||||
* frequency of 'guest_tslice' ticks per second.
|
||||
*/
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = 0;
|
||||
sa.sa_handler = sigalrm;
|
||||
|
||||
error = sigaction(SIGALRM, &sa, NULL);
|
||||
assert(error == 0);
|
||||
|
||||
itv.it_interval.tv_sec = 0;
|
||||
itv.it_interval.tv_usec = 1000000 / guest_tslice;
|
||||
itv.it_value.tv_sec = 0;
|
||||
itv.it_value.tv_usec = 1000000 / guest_tslice;
|
||||
|
||||
error = setitimer(ITIMER_REAL, &itv, NULL);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_INOUT] = vmexit_inout,
|
||||
[VM_EXITCODE_VMX] = vmexit_vmx,
|
||||
[VM_EXITCODE_BOGUS] = vmexit_bogus,
|
||||
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
||||
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
[VM_EXITCODE_PAGING] = vmexit_paging,
|
||||
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
|
||||
};
|
||||
|
||||
static void
|
||||
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error, rc, prevcpu;
|
||||
|
||||
if (guest_vcpu_mux)
|
||||
setup_timeslice();
|
||||
|
||||
if (pincpu >= 0) {
|
||||
error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* It is possible that 'vmmctl' or some other process
|
||||
* has transitioned the vcpu to CANNOT_RUN state right
|
||||
* before we tried to transition it to RUNNING.
|
||||
*
|
||||
* This is expected to be temporary so just retry.
|
||||
*/
|
||||
if (errno == EBUSY)
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
prevcpu = vcpu;
|
||||
rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
|
||||
&vcpu);
|
||||
switch (rc) {
|
||||
case VMEXIT_SWITCH:
|
||||
assert(guest_vcpu_mux);
|
||||
if (vcpu == -1) {
|
||||
stats.cpu_switch_rotate++;
|
||||
vcpu = fbsdrun_get_next_cpu(prevcpu);
|
||||
} else {
|
||||
stats.cpu_switch_direct++;
|
||||
}
|
||||
/* fall through */
|
||||
case VMEXIT_CONTINUE:
|
||||
rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
|
||||
break;
|
||||
case VMEXIT_RESTART:
|
||||
rip = vmexit[vcpu].rip;
|
||||
break;
|
||||
case VMEXIT_RESET:
|
||||
exit(0);
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
|
||||
}
|
||||
|
||||
static int
|
||||
num_vcpus_allowed(struct vmctx *ctx)
|
||||
{
|
||||
int tmp, error;
|
||||
|
||||
error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
|
||||
|
||||
/*
|
||||
* The guest is allowed to spinup more than one processor only if the
|
||||
* UNRESTRICTED_GUEST capability is available.
|
||||
*/
|
||||
if (error == 0)
|
||||
return (VM_MAXCPU);
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int c, error, gdb_port, inject_bkpt, tmp, err, ioapic, bvmcons;
|
||||
int max_vcpus;
|
||||
struct vmctx *ctx;
|
||||
uint64_t rip;
|
||||
|
||||
bvmcons = 0;
|
||||
inject_bkpt = 0;
|
||||
progname = basename(argv[0]);
|
||||
gdb_port = DEFAULT_GDB_PORT;
|
||||
guest_ncpus = 1;
|
||||
ioapic = 0;
|
||||
|
||||
while ((c = getopt(argc, argv, "abehABHIPxp:g:c:z:s:S:n:m:M:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
disable_x2apic = 1;
|
||||
break;
|
||||
case 'A':
|
||||
acpi = 1;
|
||||
break;
|
||||
case 'b':
|
||||
bvmcons = 1;
|
||||
break;
|
||||
case 'B':
|
||||
inject_bkpt = 1;
|
||||
break;
|
||||
case 'x':
|
||||
guest_vcpu_mux = 1;
|
||||
break;
|
||||
case 'p':
|
||||
pincpu = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
guest_ncpus = atoi(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
gdb_port = atoi(optarg);
|
||||
break;
|
||||
case 'z':
|
||||
guest_hz = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
guest_tslice = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
pci_parse_slot(optarg, 0);
|
||||
break;
|
||||
case 'S':
|
||||
pci_parse_slot(optarg, 1);
|
||||
break;
|
||||
case 'm':
|
||||
lomem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'M':
|
||||
himem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'H':
|
||||
guest_vmexit_on_hlt = 1;
|
||||
break;
|
||||
case 'I':
|
||||
ioapic = 1;
|
||||
break;
|
||||
case 'P':
|
||||
guest_vmexit_on_pause = 1;
|
||||
break;
|
||||
case 'e':
|
||||
strictio = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage(0);
|
||||
default:
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc != 1)
|
||||
usage(1);
|
||||
|
||||
/* No need to mux if guest is uni-processor */
|
||||
if (guest_ncpus <= 1)
|
||||
guest_vcpu_mux = 0;
|
||||
|
||||
/* vmexit on hlt if guest is muxed */
|
||||
if (guest_vcpu_mux) {
|
||||
guest_vmexit_on_hlt = 1;
|
||||
guest_vmexit_on_pause = 1;
|
||||
}
|
||||
|
||||
vmname = argv[0];
|
||||
|
||||
ctx = vm_open(vmname);
|
||||
if (ctx == NULL) {
|
||||
perror("vm_open");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
max_vcpus = num_vcpus_allowed(ctx);
|
||||
if (guest_ncpus > max_vcpus) {
|
||||
fprintf(stderr, "%d vCPUs requested but only %d available\n",
|
||||
guest_ncpus, max_vcpus);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "VM exit on HLT not supported\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
|
||||
handler[VM_EXITCODE_HLT] = vmexit_hlt;
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
/*
|
||||
* pause exit support required for this mode
|
||||
*/
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
fprintf(stderr,
|
||||
"SMP mux requested, no pause support\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
|
||||
handler[VM_EXITCODE_PAUSE] = vmexit_pause;
|
||||
}
|
||||
|
||||
if (fbsdrun_disable_x2apic())
|
||||
err = vm_set_x2apic_state(ctx, BSP, X2APIC_DISABLED);
|
||||
else
|
||||
err = vm_set_x2apic_state(ctx, BSP, X2APIC_ENABLED);
|
||||
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (lomem_sz != 0) {
|
||||
lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
|
||||
if (lomem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
} else if (himem_sz != 0) {
|
||||
himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
|
||||
if (himem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
himem_sz = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
init_inout();
|
||||
init_pci(ctx);
|
||||
if (ioapic)
|
||||
ioapic_init(0);
|
||||
|
||||
if (gdb_port != 0)
|
||||
init_dbgport(gdb_port);
|
||||
|
||||
if (bvmcons)
|
||||
init_bvmcons();
|
||||
|
||||
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
|
||||
assert(error == 0);
|
||||
|
||||
if (inject_bkpt) {
|
||||
error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* build the guest tables, MP etc.
|
||||
*/
|
||||
mptable_build(ctx, guest_ncpus, ioapic);
|
||||
|
||||
if (acpi) {
|
||||
error = acpi_build(ctx, guest_ncpus, ioapic);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add CPU 0
|
||||
*/
|
||||
fbsdrun_addcpu(ctx, BSP, rip);
|
||||
|
||||
/*
|
||||
* Head off to the main event dispatch loop
|
||||
*/
|
||||
mevent_dispatch();
|
||||
|
||||
exit(1);
|
||||
}
|
53
usr.sbin/bhyve/bhyverun.h
Normal file
53
usr.sbin/bhyve/bhyverun.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _FBSDRUN_H_
|
||||
#define _FBSDRUN_H_
|
||||
|
||||
#ifndef CTASSERT /* Allow lint to override */
|
||||
#define CTASSERT(x) _CTASSERT(x, __LINE__)
|
||||
#define _CTASSERT(x, y) __CTASSERT(x, y)
|
||||
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
struct vmctx;
|
||||
extern int guest_hz;
|
||||
extern int guest_tslice;
|
||||
extern int guest_ncpus;
|
||||
extern char *vmname;
|
||||
|
||||
extern u_long lomem_sz, himem_sz;
|
||||
|
||||
void *paddr_guest2host(uintptr_t);
|
||||
|
||||
void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip);
|
||||
int fbsdrun_muxed(void);
|
||||
int fbsdrun_vmexit_on_hlt(void);
|
||||
int fbsdrun_vmexit_on_pause(void);
|
||||
int fbsdrun_disable_x2apic(void);
|
||||
#endif
|
140
usr.sbin/bhyve/consport.c
Normal file
140
usr.sbin/bhyve/consport.c
Normal file
@ -0,0 +1,140 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define BVM_CONSOLE_PORT 0x220
|
||||
#define BVM_CONS_SIG ('b' << 8 | 'v')
|
||||
|
||||
static struct termios tio_orig, tio_new;
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(void)
|
||||
{
|
||||
tcgetattr(STDIN_FILENO, &tio_orig);
|
||||
|
||||
cfmakeraw(&tio_new);
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
|
||||
|
||||
atexit(ttyclose);
|
||||
}
|
||||
|
||||
static bool
|
||||
tty_char_available(void)
|
||||
{
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(STDIN_FILENO, &rfds);
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
|
||||
return (true);
|
||||
} else {
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(void)
|
||||
{
|
||||
char rb;
|
||||
|
||||
if (tty_char_available()) {
|
||||
read(STDIN_FILENO, &rb, 1);
|
||||
return (rb & 0xff);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(unsigned char wb)
|
||||
{
|
||||
(void) write(STDOUT_FILENO, &wb, 1);
|
||||
}
|
||||
|
||||
static int
|
||||
console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
static int opened;
|
||||
|
||||
if (bytes == 2 && in) {
|
||||
*eax = BVM_CONS_SIG;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
if (!opened) {
|
||||
ttyopen();
|
||||
opened = 1;
|
||||
}
|
||||
|
||||
if (in)
|
||||
*eax = ttyread();
|
||||
else
|
||||
ttywrite(*eax);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct inout_port consport = {
|
||||
"bvmcons",
|
||||
BVM_CONSOLE_PORT,
|
||||
IOPORT_F_INOUT,
|
||||
console_handler
|
||||
};
|
||||
|
||||
void
|
||||
init_bvmcons(void)
|
||||
{
|
||||
|
||||
register_inout(&consport);
|
||||
}
|
138
usr.sbin/bhyve/dbgport.c
Normal file
138
usr.sbin/bhyve/dbgport.c
Normal file
@ -0,0 +1,138 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
|
||||
#define BVM_DBG_PORT 0x224
|
||||
#define BVM_DBG_SIG ('B' << 8 | 'V')
|
||||
|
||||
static int listen_fd, conn_fd;
|
||||
|
||||
static struct sockaddr_in sin;
|
||||
|
||||
static int
|
||||
dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
char ch;
|
||||
int nwritten, nread, printonce;
|
||||
|
||||
if (bytes == 2 && in) {
|
||||
*eax = BVM_DBG_SIG;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
again:
|
||||
printonce = 0;
|
||||
while (conn_fd < 0) {
|
||||
if (!printonce) {
|
||||
printf("Waiting for connection from gdb\r\n");
|
||||
printonce = 1;
|
||||
}
|
||||
conn_fd = accept(listen_fd, NULL, NULL);
|
||||
if (conn_fd >= 0)
|
||||
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
|
||||
else if (errno != EINTR)
|
||||
perror("accept");
|
||||
}
|
||||
|
||||
if (in) {
|
||||
nread = read(conn_fd, &ch, 1);
|
||||
if (nread == -1 && errno == EAGAIN)
|
||||
*eax = -1;
|
||||
else if (nread == 1)
|
||||
*eax = ch;
|
||||
else {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
ch = *eax;
|
||||
nwritten = write(conn_fd, &ch, 1);
|
||||
if (nwritten != 1) {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct inout_port dbgport = {
|
||||
"bvmdbg",
|
||||
BVM_DBG_PORT,
|
||||
IOPORT_F_INOUT,
|
||||
dbg_handler
|
||||
};
|
||||
|
||||
void
|
||||
init_dbgport(int sport)
|
||||
{
|
||||
conn_fd = -1;
|
||||
|
||||
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(sport);
|
||||
|
||||
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(listen_fd, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
register_inout(&dbgport);
|
||||
}
|
36
usr.sbin/bhyve/dbgport.h
Normal file
36
usr.sbin/bhyve/dbgport.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _DBGPORT_H_
|
||||
#define _DBGPORT_H_
|
||||
|
||||
#define DEFAULT_GDB_PORT 6466
|
||||
|
||||
void init_dbgport(int port);
|
||||
|
||||
#endif
|
65
usr.sbin/bhyve/elcr.c
Normal file
65
usr.sbin/bhyve/elcr.c
Normal file
@ -0,0 +1,65 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* EISA interrupt Level Control Register.
|
||||
*
|
||||
* This is a 16-bit register with one bit for each of the IRQ0 through IRQ15.
|
||||
* A level triggered irq is indicated by setting the corresponding bit to '1'.
|
||||
*/
|
||||
#define ELCR_PORT 0x4d0
|
||||
|
||||
static uint8_t elcr[2] = { 0x00, 0x00 };
|
||||
|
||||
static int
|
||||
elcr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
idx = port - ELCR_PORT;
|
||||
|
||||
if (in)
|
||||
*eax = elcr[idx];
|
||||
else
|
||||
elcr[idx] = *eax;
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(elcr, ELCR_PORT + 0, IOPORT_F_INOUT, elcr_handler);
|
||||
INOUT_PORT(elcr, ELCR_PORT + 1, IOPORT_F_INOUT, elcr_handler);
|
151
usr.sbin/bhyve/inout.c
Normal file
151
usr.sbin/bhyve/inout.c
Normal file
@ -0,0 +1,151 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
SET_DECLARE(inout_port_set, struct inout_port);
|
||||
|
||||
#define MAX_IOPORTS (1 << 16)
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
} inout_handlers[MAX_IOPORTS];
|
||||
|
||||
static int
|
||||
default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (in) {
|
||||
switch (bytes) {
|
||||
case 4:
|
||||
*eax = 0xffffffff;
|
||||
break;
|
||||
case 2:
|
||||
*eax = 0xffff;
|
||||
break;
|
||||
case 1:
|
||||
*eax = 0xff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, int strict)
|
||||
{
|
||||
int flags;
|
||||
uint32_t mask;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
|
||||
assert(port < MAX_IOPORTS);
|
||||
|
||||
handler = inout_handlers[port].handler;
|
||||
|
||||
if (strict && handler == default_inout)
|
||||
return (-1);
|
||||
|
||||
if (!in) {
|
||||
switch (bytes) {
|
||||
case 1:
|
||||
mask = 0xff;
|
||||
break;
|
||||
case 2:
|
||||
mask = 0xffff;
|
||||
break;
|
||||
default:
|
||||
mask = 0xffffffff;
|
||||
break;
|
||||
}
|
||||
*eax = *eax & mask;
|
||||
}
|
||||
|
||||
flags = inout_handlers[port].flags;
|
||||
arg = inout_handlers[port].arg;
|
||||
|
||||
if ((in && (flags & IOPORT_F_IN)) || (!in && (flags & IOPORT_F_OUT)))
|
||||
return ((*handler)(ctx, vcpu, in, port, bytes, eax, arg));
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
init_inout(void)
|
||||
{
|
||||
struct inout_port **iopp, *iop;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Set up the default handler for all ports
|
||||
*/
|
||||
for (i = 0; i < MAX_IOPORTS; i++) {
|
||||
inout_handlers[i].name = "default";
|
||||
inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT;
|
||||
inout_handlers[i].handler = default_inout;
|
||||
inout_handlers[i].arg = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Overwrite with specified handlers
|
||||
*/
|
||||
SET_FOREACH(iopp, inout_port_set) {
|
||||
iop = *iopp;
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
register_inout(struct inout_port *iop)
|
||||
{
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = iop->arg;
|
||||
|
||||
return (0);
|
||||
}
|
67
usr.sbin/bhyve/inout.h
Normal file
67
usr.sbin/bhyve/inout.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _INOUT_H_
|
||||
#define _INOUT_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
|
||||
int bytes, uint32_t *eax, void *arg);
|
||||
|
||||
struct inout_port {
|
||||
const char *name;
|
||||
int port;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
};
|
||||
#define IOPORT_F_IN 0x1
|
||||
#define IOPORT_F_OUT 0x2
|
||||
#define IOPORT_F_INOUT 0x3
|
||||
|
||||
#define INOUT_PORT(name, port, flags, handler) \
|
||||
static struct inout_port __CONCAT(__inout_port, __LINE__) = { \
|
||||
#name, \
|
||||
(port), \
|
||||
(flags), \
|
||||
(handler), \
|
||||
0 \
|
||||
}; \
|
||||
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
|
||||
|
||||
void init_inout(void);
|
||||
int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, int strict);
|
||||
int register_inout(struct inout_port *iop);
|
||||
|
||||
void init_bvmcons(void);
|
||||
|
||||
#endif /* _INOUT_H_ */
|
324
usr.sbin/bhyve/ioapic.c
Normal file
324
usr.sbin/bhyve/ioapic.c
Normal file
@ -0,0 +1,324 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <x86/apicreg.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "mem.h"
|
||||
#include "bhyverun.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define IOAPIC_PADDR 0xFEC00000
|
||||
|
||||
#define IOREGSEL 0x00
|
||||
#define IOWIN 0x10
|
||||
|
||||
#define REDIR_ENTRIES 16
|
||||
#define INTR_ASSERTED(ioapic, pin) ((ioapic)->pinstate[(pin)] == true)
|
||||
|
||||
struct ioapic {
|
||||
int inited;
|
||||
uint32_t id;
|
||||
uint64_t redtbl[REDIR_ENTRIES];
|
||||
bool pinstate[REDIR_ENTRIES];
|
||||
|
||||
uintptr_t paddr; /* gpa where the ioapic is mapped */
|
||||
uint32_t ioregsel;
|
||||
struct memory_region *region;
|
||||
};
|
||||
|
||||
static struct ioapic ioapics[1]; /* only a single ioapic for now */
|
||||
|
||||
static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr,
|
||||
int size, uint64_t *data);
|
||||
static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr,
|
||||
int size, uint64_t data);
|
||||
static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir,
|
||||
uintptr_t paddr, int size, uint64_t *val,
|
||||
void *arg1, long arg2);
|
||||
|
||||
static void
|
||||
ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate)
|
||||
{
|
||||
int vector, apicid, vcpu;
|
||||
uint32_t low, high;
|
||||
struct ioapic *ioapic;
|
||||
|
||||
ioapic = &ioapics[0]; /* assume a single ioapic */
|
||||
|
||||
if (pin < 0 || pin >= REDIR_ENTRIES)
|
||||
return;
|
||||
|
||||
/* Nothing to do if interrupt pin has not changed state */
|
||||
if (ioapic->pinstate[pin] == newstate)
|
||||
return;
|
||||
|
||||
ioapic->pinstate[pin] = newstate; /* record it */
|
||||
|
||||
/* Nothing to do if interrupt pin is deasserted */
|
||||
if (!INTR_ASSERTED(ioapic, pin))
|
||||
return;
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* We only deal with:
|
||||
* - edge triggered interrupts
|
||||
* - physical destination mode
|
||||
* - fixed delivery mode
|
||||
*/
|
||||
low = ioapic->redtbl[pin];
|
||||
high = ioapic->redtbl[pin] >> 32;
|
||||
if ((low & IOART_INTMASK) == IOART_INTMCLR &&
|
||||
(low & IOART_TRGRMOD) == IOART_TRGREDG &&
|
||||
(low & IOART_DESTMOD) == IOART_DESTPHY &&
|
||||
(low & IOART_DELMOD) == IOART_DELFIXED) {
|
||||
vector = low & IOART_INTVEC;
|
||||
apicid = high >> APIC_ID_SHIFT;
|
||||
if (apicid != 0xff) {
|
||||
/* unicast */
|
||||
vcpu = vm_apicid2vcpu(ctx, apicid);
|
||||
vm_lapic_irq(ctx, vcpu, vector);
|
||||
} else {
|
||||
/* broadcast */
|
||||
vcpu = 0;
|
||||
while (vcpu < guest_ncpus) {
|
||||
vm_lapic_irq(ctx, vcpu, vector);
|
||||
vcpu++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ioapic_deassert_pin(struct vmctx *ctx, int pin)
|
||||
{
|
||||
ioapic_set_pinstate(ctx, pin, false);
|
||||
}
|
||||
|
||||
void
|
||||
ioapic_assert_pin(struct vmctx *ctx, int pin)
|
||||
{
|
||||
ioapic_set_pinstate(ctx, pin, true);
|
||||
}
|
||||
|
||||
void
|
||||
ioapic_init(int which)
|
||||
{
|
||||
struct mem_range memp;
|
||||
struct ioapic *ioapic;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
assert(which == 0);
|
||||
|
||||
ioapic = &ioapics[which];
|
||||
assert(ioapic->inited == 0);
|
||||
|
||||
bzero(ioapic, sizeof(struct ioapic));
|
||||
|
||||
/* Initialize all redirection entries to mask all interrupts */
|
||||
for (i = 0; i < REDIR_ENTRIES; i++)
|
||||
ioapic->redtbl[i] = 0x0001000000010000UL;
|
||||
|
||||
ioapic->paddr = IOAPIC_PADDR;
|
||||
|
||||
/* Register emulated memory region */
|
||||
memp.name = "ioapic";
|
||||
memp.flags = MEM_F_RW;
|
||||
memp.handler = ioapic_region_handler;
|
||||
memp.arg1 = ioapic;
|
||||
memp.arg2 = which;
|
||||
memp.base = ioapic->paddr;
|
||||
memp.size = sizeof(struct IOAPIC);
|
||||
error = register_mem(&memp);
|
||||
|
||||
assert (error == 0);
|
||||
|
||||
ioapic->inited = 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
ioapic_read(struct ioapic *ioapic, uint32_t addr)
|
||||
{
|
||||
int regnum, pin, rshift;
|
||||
|
||||
assert(ioapic->inited);
|
||||
|
||||
regnum = addr & 0xff;
|
||||
switch (regnum) {
|
||||
case IOAPIC_ID:
|
||||
return (ioapic->id);
|
||||
break;
|
||||
case IOAPIC_VER:
|
||||
return ((REDIR_ENTRIES << MAXREDIRSHIFT) | 0x11);
|
||||
break;
|
||||
case IOAPIC_ARB:
|
||||
return (ioapic->id);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* redirection table entries */
|
||||
if (regnum >= IOAPIC_REDTBL &&
|
||||
regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
|
||||
pin = (regnum - IOAPIC_REDTBL) / 2;
|
||||
if ((regnum - IOAPIC_REDTBL) % 2)
|
||||
rshift = 32;
|
||||
else
|
||||
rshift = 0;
|
||||
|
||||
return (ioapic->redtbl[pin] >> rshift);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
ioapic_write(struct ioapic *ioapic, uint32_t addr, uint32_t data)
|
||||
{
|
||||
int regnum, pin, lshift;
|
||||
|
||||
assert(ioapic->inited);
|
||||
|
||||
regnum = addr & 0xff;
|
||||
switch (regnum) {
|
||||
case IOAPIC_ID:
|
||||
ioapic->id = data & APIC_ID_MASK;
|
||||
break;
|
||||
case IOAPIC_VER:
|
||||
case IOAPIC_ARB:
|
||||
/* readonly */
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* redirection table entries */
|
||||
if (regnum >= IOAPIC_REDTBL &&
|
||||
regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
|
||||
pin = (regnum - IOAPIC_REDTBL) / 2;
|
||||
if ((regnum - IOAPIC_REDTBL) % 2)
|
||||
lshift = 32;
|
||||
else
|
||||
lshift = 0;
|
||||
|
||||
ioapic->redtbl[pin] &= ~((uint64_t)0xffffffff << lshift);
|
||||
ioapic->redtbl[pin] |= ((uint64_t)data << lshift);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size,
|
||||
uint64_t *data)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = paddr - ioapic->paddr;
|
||||
|
||||
/*
|
||||
* The IOAPIC specification allows 32-bit wide accesses to the
|
||||
* IOREGSEL (offset 0) and IOWIN (offset 16) registers.
|
||||
*/
|
||||
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
|
||||
#if 1
|
||||
printf("invalid access to ioapic%d: size %d, offset %d\n",
|
||||
(int)(ioapic - ioapics), size, offset);
|
||||
#endif
|
||||
*data = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (offset == IOREGSEL)
|
||||
*data = ioapic->ioregsel;
|
||||
else
|
||||
*data = ioapic_read(ioapic, ioapic->ioregsel);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size,
|
||||
uint64_t data)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = paddr - ioapic->paddr;
|
||||
|
||||
/*
|
||||
* The ioapic specification allows 32-bit wide accesses to the
|
||||
* IOREGSEL (offset 0) and IOWIN (offset 16) registers.
|
||||
*/
|
||||
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
|
||||
#if 1
|
||||
printf("invalid access to ioapic%d: size %d, offset %d\n",
|
||||
(int)(ioapic - ioapics), size, offset);
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (offset == IOREGSEL)
|
||||
ioapic->ioregsel = data;
|
||||
else
|
||||
ioapic_write(ioapic, ioapic->ioregsel, data);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr,
|
||||
int size, uint64_t *val, void *arg1, long arg2)
|
||||
{
|
||||
struct ioapic *ioapic;
|
||||
int which;
|
||||
|
||||
ioapic = arg1;
|
||||
which = arg2;
|
||||
|
||||
assert(ioapic == &ioapics[which]);
|
||||
|
||||
if (dir == MEM_F_READ)
|
||||
ioapic_region_read(ioapic, paddr, size, val);
|
||||
else
|
||||
ioapic_region_write(ioapic, paddr, size, *val);
|
||||
|
||||
return (0);
|
||||
}
|
38
usr.sbin/bhyve/ioapic.h
Normal file
38
usr.sbin/bhyve/ioapic.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IOAPIC_H_
|
||||
#define _IOAPIC_H_
|
||||
|
||||
struct vmctx;
|
||||
|
||||
void ioapic_init(int num);
|
||||
void ioapic_deassert_pin(struct vmctx *ctx, int pin);
|
||||
void ioapic_assert_pin(struct vmctx *ctx, int pin);
|
||||
|
||||
#endif
|
218
usr.sbin/bhyve/mem.c
Normal file
218
usr.sbin/bhyve/mem.c
Normal file
@ -0,0 +1,218 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Memory ranges are represented with an RB tree. On insertion, the range
|
||||
* is checked for overlaps. On lookup, the key has the same base and limit
|
||||
* so it can be searched within the range.
|
||||
*
|
||||
* It is assumed that all setup of ranges takes place in single-threaded
|
||||
* mode before vCPUs have been started. As such, no locks are used on the
|
||||
* RB tree. If this is no longer the case, then a r/w lock could be used,
|
||||
* with readers on the lookup and a writer if the tree needs to be changed
|
||||
* (and per vCPU caches flushed)
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/tree.h>
|
||||
#include <sys/errno.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
struct mmio_rb_range {
|
||||
RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
|
||||
struct mem_range mr_param;
|
||||
uint64_t mr_base;
|
||||
uint64_t mr_end;
|
||||
};
|
||||
|
||||
struct mmio_rb_tree;
|
||||
RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
|
||||
|
||||
RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot;
|
||||
|
||||
/*
|
||||
* Per-vCPU cache. Since most accesses from a vCPU will be to
|
||||
* consecutive addresses in a range, it makes sense to cache the
|
||||
* result of a lookup.
|
||||
*/
|
||||
static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
|
||||
|
||||
static int
|
||||
mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
|
||||
{
|
||||
if (a->mr_end < b->mr_base)
|
||||
return (-1);
|
||||
else if (a->mr_base > b->mr_end)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry)
|
||||
{
|
||||
struct mmio_rb_range find, *res;
|
||||
|
||||
find.mr_base = find.mr_end = addr;
|
||||
|
||||
res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find);
|
||||
|
||||
if (res != NULL) {
|
||||
*entry = res;
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
static int
|
||||
mmio_rb_add(struct mmio_rb_range *new)
|
||||
{
|
||||
struct mmio_rb_range *overlap;
|
||||
|
||||
overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new);
|
||||
|
||||
if (overlap != NULL) {
|
||||
#ifdef RB_DEBUG
|
||||
printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
|
||||
new->mr_base, new->mr_end,
|
||||
overlap->mr_base, overlap->mr_end);
|
||||
#endif
|
||||
|
||||
return (EEXIST);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
mmio_rb_dump(void)
|
||||
{
|
||||
struct mmio_rb_range *np;
|
||||
|
||||
RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) {
|
||||
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
|
||||
np->mr_param.name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
|
||||
|
||||
static int
|
||||
mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
|
||||
{
|
||||
int error;
|
||||
struct mem_range *mr = arg;
|
||||
|
||||
error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
|
||||
rval, mr->arg1, mr->arg2);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
|
||||
{
|
||||
int error;
|
||||
struct mem_range *mr = arg;
|
||||
|
||||
error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
|
||||
&wval, mr->arg1, mr->arg2);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie)
|
||||
{
|
||||
struct mmio_rb_range *entry;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* First check the per-vCPU cache
|
||||
*/
|
||||
if (mmio_hint[vcpu] &&
|
||||
paddr >= mmio_hint[vcpu]->mr_base &&
|
||||
paddr <= mmio_hint[vcpu]->mr_end) {
|
||||
entry = mmio_hint[vcpu];
|
||||
} else
|
||||
entry = NULL;
|
||||
|
||||
if (entry == NULL) {
|
||||
if (mmio_rb_lookup(paddr, &entry))
|
||||
return (ESRCH);
|
||||
|
||||
/* Update the per-vCPU cache */
|
||||
mmio_hint[vcpu] = entry;
|
||||
}
|
||||
|
||||
assert(entry != NULL && entry == mmio_hint[vcpu]);
|
||||
|
||||
err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
|
||||
mem_read, mem_write, &entry->mr_param);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
register_mem(struct mem_range *memp)
|
||||
{
|
||||
struct mmio_rb_range *mrp;
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
mrp = malloc(sizeof(struct mmio_rb_range));
|
||||
|
||||
if (mrp != NULL) {
|
||||
mrp->mr_param = *memp;
|
||||
mrp->mr_base = memp->base;
|
||||
mrp->mr_end = memp->base + memp->size - 1;
|
||||
|
||||
err = mmio_rb_add(mrp);
|
||||
if (err)
|
||||
free(mrp);
|
||||
} else
|
||||
err = ENOMEM;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
init_mem(void)
|
||||
{
|
||||
|
||||
RB_INIT(&mmio_rbroot);
|
||||
}
|
57
usr.sbin/bhyve/mem.h
Normal file
57
usr.sbin/bhyve/mem.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEM_H_
|
||||
#define _MEM_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
|
||||
int size, uint64_t *val, void *arg1, long arg2);
|
||||
|
||||
struct mem_range {
|
||||
const char *name;
|
||||
int flags;
|
||||
mem_func_t handler;
|
||||
void *arg1;
|
||||
long arg2;
|
||||
uint64_t base;
|
||||
uint64_t size;
|
||||
};
|
||||
#define MEM_F_READ 0x1
|
||||
#define MEM_F_WRITE 0x2
|
||||
#define MEM_F_RW 0x3
|
||||
|
||||
void init_mem(void);
|
||||
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie);
|
||||
|
||||
int register_mem(struct mem_range *memp);
|
||||
|
||||
#endif /* _MEM_H_ */
|
432
usr.sbin/bhyve/mevent.c
Normal file
432
usr.sbin/bhyve/mevent.c
Normal file
@ -0,0 +1,432 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Micro event library for FreeBSD, designed for a single i/o thread
|
||||
* using kqueue, and having events be persistent by default.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/event.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define MEVENT_MAX 64
|
||||
|
||||
#define MEV_ENABLE 1
|
||||
#define MEV_DISABLE 2
|
||||
#define MEV_DEL_PENDING 3
|
||||
|
||||
extern char *vmname;
|
||||
|
||||
static pthread_t mevent_tid;
|
||||
static int mevent_pipefd[2];
|
||||
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct mevent {
|
||||
void (*me_func)(int, enum ev_type, void *);
|
||||
int me_fd;
|
||||
enum ev_type me_type;
|
||||
void *me_param;
|
||||
int me_cq;
|
||||
int me_state;
|
||||
int me_closefd;
|
||||
LIST_ENTRY(mevent) me_list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(listhead, mevent) global_head, change_head;
|
||||
|
||||
static void
|
||||
mevent_qlock(void)
|
||||
{
|
||||
pthread_mutex_lock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_qunlock(void)
|
||||
{
|
||||
pthread_mutex_unlock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_pipe_read(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
char buf[MEVENT_MAX];
|
||||
int status;
|
||||
|
||||
/*
|
||||
* Drain the pipe read side. The fd is non-blocking so this is
|
||||
* safe to do.
|
||||
*/
|
||||
do {
|
||||
status = read(fd, buf, sizeof(buf));
|
||||
} while (status == MEVENT_MAX);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_notify(void)
|
||||
{
|
||||
char c;
|
||||
|
||||
/*
|
||||
* If calling from outside the i/o thread, write a byte on the
|
||||
* pipe to force the i/o thread to exit the blocking kevent call.
|
||||
*/
|
||||
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
|
||||
write(mevent_pipefd[1], &c, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_filter(struct mevent *mevp)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = 0;
|
||||
|
||||
if (mevp->me_type == EVF_READ)
|
||||
retval = EVFILT_READ;
|
||||
|
||||
if (mevp->me_type == EVF_WRITE)
|
||||
retval = EVFILT_WRITE;
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_flags(struct mevent *mevp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (mevp->me_state) {
|
||||
case MEV_ENABLE:
|
||||
ret = EV_ADD;
|
||||
break;
|
||||
case MEV_DISABLE:
|
||||
ret = EV_DISABLE;
|
||||
break;
|
||||
case MEV_DEL_PENDING:
|
||||
ret = EV_DELETE;
|
||||
break;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_fflags(struct mevent *mevp)
|
||||
{
|
||||
/* XXX nothing yet, perhaps EV_EOF for reads ? */
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_build(int mfd, struct kevent *kev)
|
||||
{
|
||||
struct mevent *mevp, *tmpp;
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
|
||||
if (mevp->me_closefd) {
|
||||
/*
|
||||
* A close of the file descriptor will remove the
|
||||
* event
|
||||
*/
|
||||
close(mevp->me_fd);
|
||||
} else {
|
||||
kev[i].ident = mevp->me_fd;
|
||||
kev[i].filter = mevent_kq_filter(mevp);
|
||||
kev[i].flags = mevent_kq_flags(mevp);
|
||||
kev[i].fflags = mevent_kq_fflags(mevp);
|
||||
kev[i].data = 0;
|
||||
kev[i].udata = mevp;
|
||||
i++;
|
||||
}
|
||||
|
||||
mevp->me_cq = 0;
|
||||
LIST_REMOVE(mevp, me_list);
|
||||
|
||||
if (mevp->me_state == MEV_DEL_PENDING) {
|
||||
free(mevp);
|
||||
} else {
|
||||
LIST_INSERT_HEAD(&global_head, mevp, me_list);
|
||||
}
|
||||
|
||||
assert(i < MEVENT_MAX);
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (i);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_handle(struct kevent *kev, int numev)
|
||||
{
|
||||
struct mevent *mevp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numev; i++) {
|
||||
mevp = kev[i].udata;
|
||||
|
||||
/* XXX check for EV_ERROR ? */
|
||||
|
||||
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
|
||||
}
|
||||
}
|
||||
|
||||
struct mevent *
|
||||
mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *), void *param)
|
||||
{
|
||||
struct mevent *lp, *mevp;
|
||||
|
||||
if (fd < 0 || func == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
mevp = NULL;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Verify that the fd/type tuple is not present in any list
|
||||
*/
|
||||
LIST_FOREACH(lp, &global_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
LIST_FOREACH(lp, &change_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an entry, populate it, and add it to the change list.
|
||||
*/
|
||||
mevp = malloc(sizeof(struct mevent));
|
||||
if (mevp == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
memset(mevp, 0, sizeof(struct mevent));
|
||||
mevp->me_fd = fd;
|
||||
mevp->me_type = type;
|
||||
mevp->me_func = func;
|
||||
mevp->me_param = param;
|
||||
|
||||
LIST_INSERT_HEAD(&change_head, mevp, me_list);
|
||||
mevp->me_cq = 1;
|
||||
mevp->me_state = MEV_ENABLE;
|
||||
mevent_notify();
|
||||
|
||||
exit:
|
||||
mevent_qunlock();
|
||||
|
||||
return (mevp);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_update(struct mevent *evp, int newstate)
|
||||
{
|
||||
/*
|
||||
* It's not possible to enable/disable a deleted event
|
||||
*/
|
||||
if (evp->me_state == MEV_DEL_PENDING)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* No update needed if state isn't changing
|
||||
*/
|
||||
if (evp->me_state == newstate)
|
||||
return (0);
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
evp->me_state = newstate;
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_enable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_ENABLE));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_disable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_DISABLE));
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_delete_event(struct mevent *evp, int closefd)
|
||||
{
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there, and
|
||||
* mark as to be deleted.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
evp->me_state = MEV_DEL_PENDING;
|
||||
|
||||
if (closefd)
|
||||
evp->me_closefd = 1;
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 0));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete_close(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 1));
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_set_name(void)
|
||||
{
|
||||
char tname[MAXCOMLEN + 1];
|
||||
|
||||
snprintf(tname, sizeof(tname), "%s mevent", vmname);
|
||||
pthread_set_name_np(mevent_tid, tname);
|
||||
}
|
||||
|
||||
void
|
||||
mevent_dispatch(void)
|
||||
{
|
||||
struct kevent changelist[MEVENT_MAX];
|
||||
struct kevent eventlist[MEVENT_MAX];
|
||||
struct mevent *pipev;
|
||||
int mfd;
|
||||
int numev;
|
||||
int ret;
|
||||
|
||||
mevent_tid = pthread_self();
|
||||
mevent_set_name();
|
||||
|
||||
mfd = kqueue();
|
||||
assert(mfd > 0);
|
||||
|
||||
/*
|
||||
* Open the pipe that will be used for other threads to force
|
||||
* the blocking kqueue call to exit by writing to it. Set the
|
||||
* descriptor to non-blocking.
|
||||
*/
|
||||
ret = pipe(mevent_pipefd);
|
||||
if (ret < 0) {
|
||||
perror("pipe");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add internal event handler for the pipe write fd
|
||||
*/
|
||||
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
|
||||
assert(pipev != NULL);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Build changelist if required.
|
||||
* XXX the changelist can be put into the blocking call
|
||||
* to eliminate the extra syscall. Currently better for
|
||||
* debug.
|
||||
*/
|
||||
numev = mevent_build(mfd, changelist);
|
||||
if (numev) {
|
||||
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent change");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Block awaiting events
|
||||
*/
|
||||
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent monitor");
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle reported events
|
||||
*/
|
||||
mevent_handle(eventlist, ret);
|
||||
}
|
||||
}
|
49
usr.sbin/bhyve/mevent.h
Normal file
49
usr.sbin/bhyve/mevent.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEVENT_H_
|
||||
#define _MEVENT_H_
|
||||
|
||||
enum ev_type {
|
||||
EVF_READ,
|
||||
EVF_WRITE
|
||||
};
|
||||
|
||||
struct mevent;
|
||||
|
||||
struct mevent *mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *),
|
||||
void *param);
|
||||
int mevent_enable(struct mevent *evp);
|
||||
int mevent_disable(struct mevent *evp);
|
||||
int mevent_delete(struct mevent *evp);
|
||||
int mevent_delete_close(struct mevent *evp);
|
||||
|
||||
void mevent_dispatch(void);
|
||||
|
||||
#endif /* _MEVENT_H_ */
|
180
usr.sbin/bhyve/mevent_test.c
Normal file
180
usr.sbin/bhyve/mevent_test.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test program for the micro event library. Set up a simple TCP echo
|
||||
* service.
|
||||
*
|
||||
* cc mevent_test.c mevent.c -lpthread
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define TEST_PORT 4321
|
||||
|
||||
static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
#define MEVENT_ECHO
|
||||
|
||||
#ifdef MEVENT_ECHO
|
||||
struct esync {
|
||||
pthread_mutex_t e_mt;
|
||||
pthread_cond_t e_cond;
|
||||
};
|
||||
|
||||
static void
|
||||
echoer_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct esync *sync = param;
|
||||
|
||||
pthread_mutex_lock(&sync->e_mt);
|
||||
pthread_cond_signal(&sync->e_cond);
|
||||
pthread_mutex_unlock(&sync->e_mt);
|
||||
}
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
struct esync sync;
|
||||
struct mevent *mev;
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
pthread_mutex_init(&sync.e_mt, NULL);
|
||||
pthread_cond_init(&sync.e_cond, NULL);
|
||||
|
||||
pthread_mutex_lock(&sync.e_mt);
|
||||
|
||||
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
|
||||
if (mev == NULL) {
|
||||
printf("Could not allocate echoer event\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
|
||||
len = read(fd, buf, sizeof(buf));
|
||||
if (len > 0) {
|
||||
write(fd, buf, len);
|
||||
write(0, buf, len);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mevent_delete_close(mev);
|
||||
|
||||
pthread_mutex_unlock(&sync.e_mt);
|
||||
pthread_mutex_destroy(&sync.e_mt);
|
||||
pthread_cond_destroy(&sync.e_cond);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
while ((len = read(fd, buf, sizeof(buf))) > 0) {
|
||||
write(1, buf, len);
|
||||
}
|
||||
}
|
||||
#endif /* MEVENT_ECHO */
|
||||
|
||||
static void
|
||||
acceptor_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
pthread_cond_signal(&accept_condvar);
|
||||
pthread_mutex_unlock(&accept_mutex);
|
||||
}
|
||||
|
||||
static void *
|
||||
acceptor(void *param)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
pthread_t tid;
|
||||
int news;
|
||||
int s;
|
||||
|
||||
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(TEST_PORT);
|
||||
|
||||
if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(s, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
|
||||
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
|
||||
while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
|
||||
news = accept(s, NULL, NULL);
|
||||
if (news < 0) {
|
||||
perror("accept error");
|
||||
} else {
|
||||
printf("incoming connection, spawning thread\n");
|
||||
pthread_create(&tid, NULL, echoer,
|
||||
(void *)(uintptr_t)news);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
pthread_t tid;
|
||||
|
||||
pthread_create(&tid, NULL, acceptor, NULL);
|
||||
|
||||
mevent_dispatch();
|
||||
}
|
398
usr.sbin/bhyve/mptbl.c
Normal file
398
usr.sbin/bhyve/mptbl.c
Normal file
@ -0,0 +1,398 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <x86/mptable.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "mptbl.h"
|
||||
|
||||
#define MPTABLE_BASE 0xF0000
|
||||
|
||||
#define LAPIC_PADDR 0xFEE00000
|
||||
#define LAPIC_VERSION 16
|
||||
|
||||
#define IOAPIC_PADDR 0xFEC00000
|
||||
#define IOAPIC_VERSION 0x11
|
||||
|
||||
#define MP_SPECREV 4
|
||||
#define MPFP_SIG "_MP_"
|
||||
|
||||
/* Configuration header defines */
|
||||
#define MPCH_SIG "PCMP"
|
||||
#define MPCH_OEMID "BHyVe "
|
||||
#define MPCH_OEMID_LEN 8
|
||||
#define MPCH_PRODID "Hypervisor "
|
||||
#define MPCH_PRODID_LEN 12
|
||||
|
||||
/* Processor entry defines */
|
||||
#define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */
|
||||
#define MPEP_SIG_MODEL 26
|
||||
#define MPEP_SIG_STEPPING 5
|
||||
#define MPEP_SIG \
|
||||
((MPEP_SIG_FAMILY << 8) | \
|
||||
(MPEP_SIG_MODEL << 4) | \
|
||||
(MPEP_SIG_STEPPING))
|
||||
|
||||
#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */
|
||||
|
||||
/* Define processor entry struct since <x86/mptable.h> gets it wrong */
|
||||
typedef struct BPROCENTRY {
|
||||
u_char type;
|
||||
u_char apic_id;
|
||||
u_char apic_version;
|
||||
u_char cpu_flags;
|
||||
uint32_t cpu_signature;
|
||||
uint32_t feature_flags;
|
||||
uint32_t reserved1;
|
||||
uint32_t reserved2;
|
||||
} *bproc_entry_ptr;
|
||||
CTASSERT(sizeof(struct BPROCENTRY) == 20);
|
||||
|
||||
/* Bus entry defines */
|
||||
#define MPE_NUM_BUSES 2
|
||||
#define MPE_BUSNAME_LEN 6
|
||||
#define MPE_BUSNAME_ISA "ISA "
|
||||
#define MPE_BUSNAME_PCI "PCI "
|
||||
|
||||
static void *oem_tbl_start;
|
||||
static int oem_tbl_size;
|
||||
|
||||
static uint8_t
|
||||
mpt_compute_checksum(void *base, size_t len)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t sum;
|
||||
|
||||
for(bytes = base, sum = 0; len > 0; len--) {
|
||||
sum += *bytes++;
|
||||
}
|
||||
|
||||
return (256 - sum);
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa)
|
||||
{
|
||||
|
||||
memset(mpfp, 0, sizeof(*mpfp));
|
||||
memcpy(mpfp->signature, MPFP_SIG, 4);
|
||||
mpfp->pap = gpa + sizeof(*mpfp);
|
||||
mpfp->length = 1;
|
||||
mpfp->spec_rev = MP_SPECREV;
|
||||
mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp));
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_mpch(mpcth_t mpch)
|
||||
{
|
||||
|
||||
memset(mpch, 0, sizeof(*mpch));
|
||||
memcpy(mpch->signature, MPCH_SIG, 4);
|
||||
mpch->spec_rev = MP_SPECREV;
|
||||
memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN);
|
||||
memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN);
|
||||
mpch->apic_address = LAPIC_PADDR;
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_proc_entries(bproc_entry_ptr mpep, int ncpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpu; i++) {
|
||||
memset(mpep, 0, sizeof(*mpep));
|
||||
mpep->type = MPCT_ENTRY_PROCESSOR;
|
||||
mpep->apic_id = i; // XXX
|
||||
mpep->apic_version = LAPIC_VERSION;
|
||||
mpep->cpu_flags = PROCENTRY_FLAG_EN;
|
||||
if (i == 0)
|
||||
mpep->cpu_flags |= PROCENTRY_FLAG_BP;
|
||||
mpep->cpu_signature = MPEP_SIG;
|
||||
mpep->feature_flags = MPEP_FEATURES;
|
||||
mpep++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_bus_entries(bus_entry_ptr mpeb)
|
||||
{
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->type = MPCT_ENTRY_BUS;
|
||||
mpeb->bus_id = ISA;
|
||||
memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
|
||||
mpeb++;
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->type = MPCT_ENTRY_BUS;
|
||||
mpeb->bus_id = PCI;
|
||||
memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id)
|
||||
{
|
||||
|
||||
memset(mpei, 0, sizeof(*mpei));
|
||||
mpei->type = MPCT_ENTRY_IOAPIC;
|
||||
mpei->apic_id = id;
|
||||
mpei->apic_version = IOAPIC_VERSION;
|
||||
mpei->apic_flags = IOAPICENTRY_FLAG_EN;
|
||||
mpei->apic_address = IOAPIC_PADDR;
|
||||
}
|
||||
|
||||
#ifdef notyet
|
||||
static void
|
||||
mpt_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins, int id)
|
||||
{
|
||||
int pin;
|
||||
|
||||
/*
|
||||
* The following config is taken from kernel mptable.c
|
||||
* mptable_parse_default_config_ints(...), for now
|
||||
* just use the default config, tweek later if needed.
|
||||
*/
|
||||
|
||||
|
||||
/* Run through all 16 pins. */
|
||||
for (pin = 0; pin < num_pins; pin++) {
|
||||
memset(mpeii, 0, sizeof(*mpeii));
|
||||
mpeii->entry_type = MP_ENTRY_IOINT;
|
||||
mpeii->src_bus_id = MPE_BUSID_ISA;
|
||||
mpeii->dst_apic_id = id;
|
||||
|
||||
/*
|
||||
* All default configs route IRQs from bus 0 to the first 16
|
||||
* pins of the first I/O APIC with an APIC ID of 2.
|
||||
*/
|
||||
mpeii->dst_apic_intin = pin;
|
||||
switch (pin) {
|
||||
case 0:
|
||||
/* Pin 0 is an ExtINT pin. */
|
||||
mpeii->intr_type = MPEII_INTR_EXTINT;
|
||||
break;
|
||||
case 2:
|
||||
/* IRQ 0 is routed to pin 2. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = 0;
|
||||
break;
|
||||
case 5:
|
||||
case 10:
|
||||
case 11:
|
||||
/*
|
||||
* PCI Irqs set to level triggered.
|
||||
*/
|
||||
mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL;
|
||||
mpeii->src_bus_id = MPE_BUSID_PCI;
|
||||
default:
|
||||
/* All other pins are identity mapped. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = pin;
|
||||
break;
|
||||
}
|
||||
mpeii++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#define COPYSTR(dest, src, bytes) \
|
||||
memcpy(dest, src, bytes); \
|
||||
str[bytes] = 0;
|
||||
|
||||
static void
|
||||
mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch)
|
||||
{
|
||||
static char str[16];
|
||||
int i;
|
||||
char *cur;
|
||||
|
||||
union mpe {
|
||||
struct mpe_proc *proc;
|
||||
struct mpe_bus *bus;
|
||||
struct mpe_ioapic *ioapic;
|
||||
struct mpe_ioint *ioint;
|
||||
struct mpe_lint *lnit;
|
||||
char *p;
|
||||
};
|
||||
|
||||
union mpe mpe;
|
||||
|
||||
printf(" MP Floating Pointer :\n");
|
||||
COPYSTR(str, mpfp->signature, 4);
|
||||
printf("\tsignature:\t%s\n", str);
|
||||
printf("\tmpch paddr:\t%x\n", mpfp->mptable_paddr);
|
||||
printf("\tlength:\t%x\n", mpfp->length);
|
||||
printf("\tspecrec:\t%x\n", mpfp->specrev);
|
||||
printf("\tchecksum:\t%x\n", mpfp->checksum);
|
||||
printf("\tfeature1:\t%x\n", mpfp->feature1);
|
||||
printf("\tfeature2:\t%x\n", mpfp->feature2);
|
||||
printf("\tfeature3:\t%x\n", mpfp->feature3);
|
||||
printf("\tfeature4:\t%x\n", mpfp->feature4);
|
||||
|
||||
printf(" MP Configuration Header :\n");
|
||||
COPYSTR(str, mpch->signature, 4);
|
||||
printf(" signature: %s\n", str);
|
||||
printf(" length: %x\n", mpch->length);
|
||||
printf(" specrec: %x\n", mpch->specrev);
|
||||
printf(" checksum: %x\n", mpch->checksum);
|
||||
COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN);
|
||||
printf(" oemid: %s\n", str);
|
||||
COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN);
|
||||
printf(" prodid: %s\n", str);
|
||||
printf(" oem_ptr: %x\n", mpch->oem_ptr);
|
||||
printf(" oem_sz: %x\n", mpch->oem_sz);
|
||||
printf(" nr_entries: %x\n", mpch->nr_entries);
|
||||
printf(" apic paddr: %x\n", mpch->lapic_paddr);
|
||||
printf(" ext_length: %x\n", mpch->ext_length);
|
||||
printf(" ext_checksum: %x\n", mpch->ext_checksum);
|
||||
|
||||
cur = (char *)mpch + sizeof(*mpch);
|
||||
for (i = 0; i < mpch->nr_entries; i++) {
|
||||
mpe.p = cur;
|
||||
switch(*mpe.p) {
|
||||
case MP_ENTRY_PROC:
|
||||
printf(" MP Processor Entry :\n");
|
||||
printf(" lapic_id: %x\n", mpe.proc->lapic_id);
|
||||
printf(" lapic_version: %x\n", mpe.proc->lapic_version);
|
||||
printf(" proc_flags: %x\n", mpe.proc->proc_flags);
|
||||
printf(" proc_signature: %x\n", mpe.proc->proc_signature);
|
||||
printf(" feature_flags: %x\n", mpe.proc->feature_flags);
|
||||
cur += sizeof(struct mpe_proc);
|
||||
break;
|
||||
case MP_ENTRY_BUS:
|
||||
printf(" MP Bus Entry :\n");
|
||||
printf(" busid: %x\n", mpe.bus->busid);
|
||||
COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN);
|
||||
printf(" busname: %s\n", str);
|
||||
cur += sizeof(struct mpe_bus);
|
||||
break;
|
||||
case MP_ENTRY_IOAPIC:
|
||||
printf(" MP IOAPIC Entry :\n");
|
||||
printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id);
|
||||
printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version);
|
||||
printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags);
|
||||
printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr);
|
||||
cur += sizeof(struct mpe_ioapic);
|
||||
break;
|
||||
case MP_ENTRY_IOINT:
|
||||
printf(" MP IO Interrupt Entry :\n");
|
||||
printf(" intr_type: %x\n", mpe.ioint->intr_type);
|
||||
printf(" intr_flags: %x\n", mpe.ioint->intr_flags);
|
||||
printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id);
|
||||
printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq);
|
||||
printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id);
|
||||
printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin);
|
||||
cur += sizeof(struct mpe_ioint);
|
||||
break;
|
||||
case MP_ENTRY_LINT:
|
||||
printf(" MP Local Interrupt Entry :\n");
|
||||
cur += sizeof(struct mpe_lint);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
mptable_add_oemtbl(void *tbl, int tblsz)
|
||||
{
|
||||
|
||||
oem_tbl_start = tbl;
|
||||
oem_tbl_size = tblsz;
|
||||
}
|
||||
|
||||
int
|
||||
mptable_build(struct vmctx *ctx, int ncpu, int ioapic)
|
||||
{
|
||||
mpcth_t mpch;
|
||||
bus_entry_ptr mpeb;
|
||||
io_apic_entry_ptr mpei;
|
||||
bproc_entry_ptr mpep;
|
||||
mpfps_t mpfp;
|
||||
char *curraddr;
|
||||
char *startaddr;
|
||||
|
||||
if (paddr_guest2host(0) == NULL) {
|
||||
printf("mptable requires mapped mem\n");
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
startaddr = curraddr = paddr_guest2host(MPTABLE_BASE);
|
||||
|
||||
mpfp = (mpfps_t)curraddr;
|
||||
mpt_build_mpfp(mpfp, MPTABLE_BASE);
|
||||
curraddr += sizeof(*mpfp);
|
||||
|
||||
mpch = (mpcth_t)curraddr;
|
||||
mpt_build_mpch(mpch);
|
||||
curraddr += sizeof(*mpch);
|
||||
|
||||
mpep = (bproc_entry_ptr)curraddr;
|
||||
mpt_build_proc_entries(mpep, ncpu);
|
||||
curraddr += sizeof(*mpep) * ncpu;
|
||||
mpch->entry_count += ncpu;
|
||||
|
||||
mpeb = (bus_entry_ptr) curraddr;
|
||||
mpt_build_bus_entries(mpeb);
|
||||
curraddr += sizeof(*mpeb) * MPE_NUM_BUSES;
|
||||
mpch->entry_count += MPE_NUM_BUSES;
|
||||
|
||||
if (ioapic) {
|
||||
mpei = (io_apic_entry_ptr)curraddr;
|
||||
mpt_build_ioapic_entries(mpei, ncpu + 1);
|
||||
curraddr += sizeof(*mpei);
|
||||
mpch->entry_count++;
|
||||
}
|
||||
|
||||
#ifdef notyet
|
||||
mpt_build_ioint_entries((struct mpe_ioint*)curraddr, MPEII_MAX_IRQ,
|
||||
ncpu + 1);
|
||||
curraddr += sizeof(struct mpe_ioint) * MPEII_MAX_IRQ;
|
||||
mpch->entry_count += MPEII_MAX_IRQ;
|
||||
#endif
|
||||
|
||||
if (oem_tbl_start) {
|
||||
mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
|
||||
mpch->oem_table_size = oem_tbl_size;
|
||||
memcpy(curraddr, oem_tbl_start, oem_tbl_size);
|
||||
}
|
||||
|
||||
mpch->base_table_length = curraddr - (char *)mpch;
|
||||
mpch->checksum = mpt_compute_checksum(mpch, sizeof(*mpch));
|
||||
|
||||
return (0);
|
||||
}
|
35
usr.sbin/bhyve/mptbl.h
Normal file
35
usr.sbin/bhyve/mptbl.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MPTBL_H_
|
||||
#define _MPTBL_H_
|
||||
|
||||
int mptable_build(struct vmctx *ctx, int ncpu, int ioapic);
|
||||
void mptable_add_oemtbl(void *tbl, int tblsz);
|
||||
|
||||
#endif /* _MPTBL_H_ */
|
1117
usr.sbin/bhyve/pci_emul.c
Normal file
1117
usr.sbin/bhyve/pci_emul.c
Normal file
File diff suppressed because it is too large
Load Diff
216
usr.sbin/bhyve/pci_emul.h
Normal file
216
usr.sbin/bhyve/pci_emul.h
Normal file
@ -0,0 +1,216 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PCI_EMUL_H_
|
||||
#define _PCI_EMUL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
|
||||
#define PCIY_RESERVED 0x00
|
||||
|
||||
struct vmctx;
|
||||
struct pci_devinst;
|
||||
struct memory_region;
|
||||
|
||||
struct pci_devemu {
|
||||
char *pe_emu; /* Name of device emulation */
|
||||
|
||||
/* instance creation */
|
||||
int (*pe_init)(struct vmctx *, struct pci_devinst *,
|
||||
char *opts);
|
||||
|
||||
/* config space read/write callbacks */
|
||||
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t val);
|
||||
int (*pe_cfgread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t *retval);
|
||||
|
||||
/* BAR read/write callbacks */
|
||||
void (*pe_barwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size, uint64_t value);
|
||||
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size);
|
||||
};
|
||||
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
|
||||
|
||||
enum pcibar_type {
|
||||
PCIBAR_NONE,
|
||||
PCIBAR_IO,
|
||||
PCIBAR_MEM32,
|
||||
PCIBAR_MEM64,
|
||||
PCIBAR_MEMHI64
|
||||
};
|
||||
|
||||
struct pcibar {
|
||||
enum pcibar_type type; /* io or memory */
|
||||
uint64_t size;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
#define PI_NAMESZ 40
|
||||
|
||||
struct msix_table_entry {
|
||||
uint64_t addr;
|
||||
uint32_t msg_data;
|
||||
uint32_t vector_control;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* In case the structure is modified to hold extra information, use a define
|
||||
* for the size that should be emulated.
|
||||
*/
|
||||
#define MSIX_TABLE_ENTRY_SIZE 16
|
||||
#define MAX_MSIX_TABLE_SIZE 2048
|
||||
|
||||
struct pci_devinst {
|
||||
struct pci_devemu *pi_d;
|
||||
struct vmctx *pi_vmctx;
|
||||
uint8_t pi_bus, pi_slot, pi_func;
|
||||
uint8_t pi_lintr_pin;
|
||||
char pi_name[PI_NAMESZ];
|
||||
uint16_t pi_iobase;
|
||||
int pi_bar_getsize;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
int cpu;
|
||||
int vector;
|
||||
int msgnum;
|
||||
} pi_msi;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
int table_bar;
|
||||
int pba_bar;
|
||||
size_t table_offset;
|
||||
size_t table_size;
|
||||
int table_count;
|
||||
size_t pba_offset;
|
||||
struct msix_table_entry table[MAX_MSIX_TABLE_SIZE];
|
||||
} pi_msix;
|
||||
|
||||
void *pi_arg; /* devemu-private data */
|
||||
|
||||
u_char pi_cfgdata[PCI_REGMAX + 1];
|
||||
struct pcibar pi_bar[PCI_BARMAX + 1];
|
||||
};
|
||||
|
||||
struct msicap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t addrlo;
|
||||
uint32_t addrhi;
|
||||
uint16_t msgdata;
|
||||
} __packed;
|
||||
|
||||
struct msixcap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t table_offset;
|
||||
uint32_t pba_offset;
|
||||
} __packed;
|
||||
|
||||
void init_pci(struct vmctx *ctx);
|
||||
void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
void pci_callback(void);
|
||||
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
|
||||
enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx,
|
||||
uint64_t hostbase, enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
|
||||
int pci_is_legacy(struct pci_devinst *pi);
|
||||
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
|
||||
void pci_generate_msix(struct pci_devinst *pi, int msgnum);
|
||||
void pci_lintr_assert(struct pci_devinst *pi);
|
||||
void pci_lintr_deassert(struct pci_devinst *pi);
|
||||
int pci_lintr_request(struct pci_devinst *pi, int ivec);
|
||||
int pci_msi_enabled(struct pci_devinst *pi);
|
||||
int pci_msix_enabled(struct pci_devinst *pi);
|
||||
int pci_msi_msgnum(struct pci_devinst *pi);
|
||||
void pci_parse_slot(char *opt, int legacy);
|
||||
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
*(uint8_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
*(uint16_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
*(uint32_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline uint8_t
|
||||
pci_get_cfgdata8(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
return (*(uint8_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint16_t
|
||||
pci_get_cfgdata16(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
return (*(uint16_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint32_t
|
||||
pci_get_cfgdata32(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
return (*(uint32_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
#endif /* _PCI_EMUL_H_ */
|
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
@ -0,0 +1,52 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "pci_emul.h"
|
||||
|
||||
static int
|
||||
pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
|
||||
/* config space */
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_hostbridge = {
|
||||
.pe_emu = "hostbridge",
|
||||
.pe_init = pci_hostbridge_init,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_hostbridge);
|
724
usr.sbin/bhyve/pci_passthru.c
Normal file
724
usr.sbin/bhyve/pci_passthru.c
Normal file
@ -0,0 +1,724 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <dev/io/iodev.h>
|
||||
#include <machine/iodev.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
#include "pci_emul.h"
|
||||
#include "mem.h"
|
||||
|
||||
#ifndef _PATH_DEVPCI
|
||||
#define _PATH_DEVPCI "/dev/pci"
|
||||
#endif
|
||||
|
||||
#ifndef _PATH_DEVIO
|
||||
#define _PATH_DEVIO "/dev/io"
|
||||
#endif
|
||||
|
||||
#define LEGACY_SUPPORT 1
|
||||
|
||||
#define MSIX_TABLE_BIR_MASK 7
|
||||
#define MSIX_TABLE_OFFSET_MASK (~MSIX_TABLE_BIR_MASK);
|
||||
#define MSIX_TABLE_COUNT(x) (((x) & 0x7FF) + 1)
|
||||
#define MSIX_CAPLEN 12
|
||||
|
||||
static int pcifd = -1;
|
||||
static int iofd = -1;
|
||||
|
||||
struct passthru_softc {
|
||||
struct pci_devinst *psc_pi;
|
||||
struct pcibar psc_bar[PCI_BARMAX + 1];
|
||||
struct {
|
||||
int capoff;
|
||||
int msgctrl;
|
||||
int emulated;
|
||||
} psc_msi;
|
||||
struct {
|
||||
int capoff;
|
||||
} psc_msix;
|
||||
struct pcisel psc_sel;
|
||||
};
|
||||
|
||||
static int
|
||||
msi_caplen(int msgctrl)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = 10; /* minimum length of msi capability */
|
||||
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
len += 4;
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Ignore the 'mask' and 'pending' bits in the MSI capability.
|
||||
* We'll let the guest manipulate them directly.
|
||||
*/
|
||||
if (msgctrl & PCIM_MSICTRL_VECTOR)
|
||||
len += 10;
|
||||
#endif
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
read_config(const struct pcisel *sel, long reg, int width)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
|
||||
if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
|
||||
return (0); /* XXX */
|
||||
else
|
||||
return (pi.pi_data);
|
||||
}
|
||||
|
||||
static void
|
||||
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
pi.pi_data = data;
|
||||
|
||||
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
static int
|
||||
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
|
||||
{
|
||||
int capoff, i;
|
||||
struct msicap msicap;
|
||||
u_char *capdata;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, nextptr);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* Copy the msi capability structure in the last 16 bytes of the
|
||||
* config space. This is wrong because it could shadow something
|
||||
* useful to the device.
|
||||
*/
|
||||
capoff = 256 - roundup(sizeof(msicap), 4);
|
||||
capdata = (u_char *)&msicap;
|
||||
for (i = 0; i < sizeof(msicap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
return (capoff);
|
||||
}
|
||||
#endif /* LEGACY_SUPPORT */
|
||||
|
||||
static int
|
||||
cfginitmsi(struct passthru_softc *sc)
|
||||
{
|
||||
int ptr, capptr, cap, sts, caplen;
|
||||
uint32_t u32;
|
||||
struct pcisel sel;
|
||||
struct pci_devinst *pi;
|
||||
struct msixcap msixcap;
|
||||
uint32_t *msixcap_ptr;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
sel = sc->psc_sel;
|
||||
|
||||
/*
|
||||
* Parse the capabilities and cache the location of the MSI
|
||||
* and MSI-X capabilities.
|
||||
*/
|
||||
sts = read_config(&sel, PCIR_STATUS, 2);
|
||||
if (sts & PCIM_STATUS_CAPPRESENT) {
|
||||
ptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
while (ptr != 0 && ptr != 0xff) {
|
||||
cap = read_config(&sel, ptr + PCICAP_ID, 1);
|
||||
if (cap == PCIY_MSI) {
|
||||
/*
|
||||
* Copy the MSI capability into the config
|
||||
* space of the emulated pci device
|
||||
*/
|
||||
sc->psc_msi.capoff = ptr;
|
||||
sc->psc_msi.msgctrl = read_config(&sel,
|
||||
ptr + 2, 2);
|
||||
sc->psc_msi.emulated = 0;
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
capptr = ptr;
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, capptr, 4);
|
||||
pci_set_cfgdata32(pi, capptr, u32);
|
||||
caplen -= 4;
|
||||
capptr += 4;
|
||||
}
|
||||
} else if (cap == PCIY_MSIX) {
|
||||
/*
|
||||
* Copy the MSI-X capability
|
||||
*/
|
||||
sc->psc_msix.capoff = ptr;
|
||||
caplen = 12;
|
||||
msixcap_ptr = (uint32_t*) &msixcap;
|
||||
capptr = ptr;
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, capptr, 4);
|
||||
*msixcap_ptr = u32;
|
||||
pci_set_cfgdata32(pi, capptr, u32);
|
||||
caplen -= 4;
|
||||
capptr += 4;
|
||||
msixcap_ptr++;
|
||||
}
|
||||
}
|
||||
ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (sc->psc_msix.capoff != 0) {
|
||||
pi->pi_msix.pba_bar =
|
||||
msixcap.pba_offset & MSIX_TABLE_BIR_MASK;
|
||||
pi->pi_msix.pba_offset =
|
||||
msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK;
|
||||
pi->pi_msix.table_bar =
|
||||
msixcap.table_offset & MSIX_TABLE_BIR_MASK;
|
||||
pi->pi_msix.table_offset =
|
||||
msixcap.table_offset & MSIX_TABLE_OFFSET_MASK;
|
||||
pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If the passthrough device does not support MSI then craft a
|
||||
* MSI capability for it. We link the new MSI capability at the
|
||||
* head of the list of capabilities.
|
||||
*/
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
|
||||
int origptr, msiptr;
|
||||
origptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
msiptr = passthru_add_msicap(pi, 1, origptr);
|
||||
sc->psc_msi.capoff = msiptr;
|
||||
sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
|
||||
sc->psc_msi.emulated = 1;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Make sure one of the capabilities is present */
|
||||
if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct msix_table_entry *entry;
|
||||
uint8_t *src8;
|
||||
uint16_t *src16;
|
||||
uint32_t *src32;
|
||||
uint64_t *src64;
|
||||
uint64_t data;
|
||||
size_t entry_offset;
|
||||
int index;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
||||
index = offset / MSIX_TABLE_ENTRY_SIZE;
|
||||
entry = &pi->pi_msix.table[index];
|
||||
|
||||
switch(size) {
|
||||
case 1:
|
||||
src8 = (uint8_t *)((void *)entry + entry_offset);
|
||||
data = *src8;
|
||||
break;
|
||||
case 2:
|
||||
src16 = (uint16_t *)((void *)entry + entry_offset);
|
||||
data = *src16;
|
||||
break;
|
||||
case 4:
|
||||
src32 = (uint32_t *)((void *)entry + entry_offset);
|
||||
data = *src32;
|
||||
break;
|
||||
case 8:
|
||||
src64 = (uint64_t *)((void *)entry + entry_offset);
|
||||
data = *src64;
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (data);
|
||||
}
|
||||
|
||||
static void
|
||||
msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
|
||||
uint64_t offset, int size, uint64_t data)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct msix_table_entry *entry;
|
||||
uint32_t *dest;
|
||||
size_t entry_offset;
|
||||
uint32_t vector_control;
|
||||
int error, index;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
||||
index = offset / MSIX_TABLE_ENTRY_SIZE;
|
||||
entry = &pi->pi_msix.table[index];
|
||||
|
||||
/* Only 4 byte naturally-aligned writes are supported */
|
||||
assert(size == 4);
|
||||
assert(entry_offset % 4 == 0);
|
||||
|
||||
vector_control = entry->vector_control;
|
||||
dest = (uint32_t *)((void *)entry + entry_offset);
|
||||
*dest = data;
|
||||
/* If MSI-X hasn't been enabled, do nothing */
|
||||
if (pi->pi_msix.enabled) {
|
||||
/* If the entry is masked, don't set it up */
|
||||
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
|
||||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
|
||||
error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev,
|
||||
sc->psc_sel.pc_func,
|
||||
index, entry->msg_data,
|
||||
entry->vector_control,
|
||||
entry->addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
|
||||
{
|
||||
int idx;
|
||||
size_t table_size;
|
||||
vm_paddr_t start;
|
||||
size_t len;
|
||||
struct pci_devinst *pi = sc->psc_pi;
|
||||
|
||||
/*
|
||||
* If the MSI-X table BAR maps memory intended for
|
||||
* other uses, it is at least assured that the table
|
||||
* either resides in its own page within the region,
|
||||
* or it resides in a page shared with only the PBA.
|
||||
*/
|
||||
if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar &&
|
||||
((pi->pi_msix.pba_offset - pi->pi_msix.table_offset) < 4096)) {
|
||||
/* Need to also emulate the PBA, not supported yet */
|
||||
printf("Unsupported MSI-X table and PBA in same page\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* May need to split the BAR into 3 regions:
|
||||
* Before the MSI-X table, the MSI-X table, and after it
|
||||
* XXX for now, assume that the table is not in the middle
|
||||
*/
|
||||
table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
|
||||
pi->pi_msix.table_size = table_size;
|
||||
idx = pi->pi_msix.table_bar;
|
||||
|
||||
/* Round up to page size */
|
||||
table_size = (table_size + 0x1000) & ~0xFFF;
|
||||
if (pi->pi_msix.table_offset == 0) {
|
||||
/* Map everything after the MSI-X table */
|
||||
start = pi->pi_bar[idx].addr + table_size;
|
||||
len = pi->pi_bar[idx].size - table_size;
|
||||
} else {
|
||||
/* Map everything before the MSI-X table */
|
||||
start = pi->pi_bar[idx].addr;
|
||||
len = pi->pi_msix.table_offset;
|
||||
}
|
||||
return (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
start, len, base + table_size));
|
||||
}
|
||||
|
||||
static int
|
||||
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
|
||||
{
|
||||
int i, error;
|
||||
struct pci_devinst *pi;
|
||||
struct pci_bar_io bar;
|
||||
enum pcibar_type bartype;
|
||||
uint64_t base;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
|
||||
/*
|
||||
* Initialize BAR registers
|
||||
*/
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
bzero(&bar, sizeof(bar));
|
||||
bar.pbi_sel = sc->psc_sel;
|
||||
bar.pbi_reg = PCIR_BAR(i);
|
||||
|
||||
if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
|
||||
continue;
|
||||
|
||||
if (PCI_BAR_IO(bar.pbi_base)) {
|
||||
bartype = PCIBAR_IO;
|
||||
base = bar.pbi_base & PCIM_BAR_IO_BASE;
|
||||
} else {
|
||||
switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
|
||||
case PCIM_BAR_MEM_64:
|
||||
bartype = PCIBAR_MEM64;
|
||||
break;
|
||||
default:
|
||||
bartype = PCIBAR_MEM32;
|
||||
break;
|
||||
}
|
||||
base = bar.pbi_base & PCIM_BAR_MEM_BASE;
|
||||
}
|
||||
|
||||
/* Cache information about the "real" BAR */
|
||||
sc->psc_bar[i].type = bartype;
|
||||
sc->psc_bar[i].size = bar.pbi_length;
|
||||
sc->psc_bar[i].addr = base;
|
||||
|
||||
/* Allocate the BAR in the guest I/O or MMIO space */
|
||||
error = pci_emul_alloc_pbar(pi, i, base, bartype,
|
||||
bar.pbi_length);
|
||||
if (error)
|
||||
return (-1);
|
||||
|
||||
/* The MSI-X table needs special handling */
|
||||
if (i == pi->pi_msix.table_bar) {
|
||||
error = init_msix_table(ctx, sc, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
} else if (bartype != PCIBAR_IO) {
|
||||
/* Map the physical MMIO space in the guest MMIO space */
|
||||
error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* 64-bit BAR takes up two slots so skip the next one.
|
||||
*/
|
||||
if (bartype == PCIBAR_MEM64) {
|
||||
i++;
|
||||
assert(i <= PCI_BARMAX);
|
||||
sc->psc_bar[i].type = PCIBAR_MEMHI64;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
error = 1;
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&sc->psc_sel, sizeof(struct pcisel));
|
||||
sc->psc_sel.pc_bus = bus;
|
||||
sc->psc_sel.pc_dev = slot;
|
||||
sc->psc_sel.pc_func = func;
|
||||
|
||||
if (cfginitmsi(sc) != 0)
|
||||
goto done;
|
||||
|
||||
if (cfginitbar(ctx, sc) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int bus, slot, func, error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = NULL;
|
||||
error = 1;
|
||||
|
||||
if (pcifd < 0) {
|
||||
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
|
||||
if (pcifd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (iofd < 0) {
|
||||
iofd = open(_PATH_DEVIO, O_RDWR, 0);
|
||||
if (iofd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (opts == NULL ||
|
||||
sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
|
||||
goto done;
|
||||
|
||||
if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
|
||||
goto done;
|
||||
|
||||
sc = malloc(sizeof(struct passthru_softc));
|
||||
memset(sc, 0, sizeof(struct passthru_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->psc_pi = pi;
|
||||
|
||||
/* initialize config space */
|
||||
if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
if (error) {
|
||||
free(sc);
|
||||
vm_unassign_pptdev(ctx, bus, slot, func);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
bar_access(int coff)
|
||||
{
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msicap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
int caplen;
|
||||
|
||||
if (sc->psc_msi.capoff == 0)
|
||||
return (0);
|
||||
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
|
||||
if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msixcap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
if (sc->psc_msix.capoff == 0)
|
||||
return (0);
|
||||
|
||||
return (coff >= sc->psc_msix.capoff &&
|
||||
coff < sc->psc_msix.capoff + MSIX_CAPLEN);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int coff, int bytes, uint32_t *rv)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs and MSI capability is emulated.
|
||||
*/
|
||||
if (bar_access(coff) || msicap_access(sc, coff))
|
||||
return (-1);
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* Emulate PCIR_CAP_PTR if this device does not support MSI capability
|
||||
* natively.
|
||||
*/
|
||||
if (sc->psc_msi.emulated) {
|
||||
if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
|
||||
return (-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Everything else just read from the device's config space */
|
||||
*rv = read_config(&sc->psc_sel, coff, bytes);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int coff, int bytes, uint32_t val)
|
||||
{
|
||||
int error, msix_table_entries, i;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs are emulated
|
||||
*/
|
||||
if (bar_access(coff))
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* MSI capability is emulated
|
||||
*/
|
||||
if (msicap_access(sc, coff)) {
|
||||
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
|
||||
|
||||
error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
|
||||
pi->pi_msi.vector, pi->pi_msi.msgnum);
|
||||
if (error != 0) {
|
||||
printf("vm_setup_msi returned error %d\r\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (msixcap_access(sc, coff)) {
|
||||
msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
|
||||
if (pi->pi_msix.enabled) {
|
||||
msix_table_entries = pi->pi_msix.table_count;
|
||||
for (i = 0; i < msix_table_entries; i++) {
|
||||
error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev,
|
||||
sc->psc_sel.pc_func, i,
|
||||
pi->pi_msix.table[i].msg_data,
|
||||
pi->pi_msix.table[i].vector_control,
|
||||
pi->pi_msix.table[i].addr);
|
||||
|
||||
if (error) {
|
||||
printf("vm_setup_msix returned error %d\r\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If this device does not support MSI natively then we cannot let
|
||||
* the guest disable legacy interrupts from the device. It is the
|
||||
* legacy interrupt that is triggering the virtual MSI to the guest.
|
||||
*/
|
||||
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
|
||||
if (coff == PCIR_COMMAND && bytes == 2)
|
||||
val &= ~PCIM_CMD_INTxDIS;
|
||||
}
|
||||
#endif
|
||||
|
||||
write_config(&sc->psc_sel, coff, bytes, val);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
if (pi->pi_msix.table_bar == baridx) {
|
||||
msix_table_write(ctx, vcpu, sc, offset, size, value);
|
||||
} else {
|
||||
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_WRITE;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = value;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
uint64_t val;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
if (pi->pi_msix.table_bar == baridx) {
|
||||
val = msix_table_read(sc, offset, size);
|
||||
} else {
|
||||
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_READ;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = 0;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
|
||||
val = pio.val;
|
||||
}
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
struct pci_devemu passthru = {
|
||||
.pe_emu = "passthru",
|
||||
.pe_init = passthru_init,
|
||||
.pe_cfgwrite = passthru_cfgwrite,
|
||||
.pe_cfgread = passthru_cfgread,
|
||||
.pe_barwrite = passthru_write,
|
||||
.pe_barread = passthru_read,
|
||||
};
|
||||
PCI_EMUL_SET(passthru);
|
626
usr.sbin/bhyve/pci_uart.c
Normal file
626
usr.sbin/bhyve/pci_uart.c
Normal file
@ -0,0 +1,626 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
#include <dev/ic/ns16550.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "mevent.h"
|
||||
|
||||
#define COM1_BASE 0x3F8
|
||||
#define COM1_IRQ 4
|
||||
#define COM2_BASE 0x2F8
|
||||
#define COM2_IRQ 3
|
||||
|
||||
#define DEFAULT_RCLK 1843200
|
||||
#define DEFAULT_BAUD 9600
|
||||
|
||||
#define FCR_RX_MASK 0xC0
|
||||
|
||||
#define MCR_OUT1 0x04
|
||||
#define MCR_OUT2 0x08
|
||||
|
||||
#define MSR_DELTA_MASK 0x0f
|
||||
|
||||
#ifndef REG_SCR
|
||||
#define REG_SCR com_scr
|
||||
#endif
|
||||
|
||||
#define FIFOSZ 16
|
||||
|
||||
/*
|
||||
* Pick a PCI vid/did of a chip with a single uart at
|
||||
* BAR0, that most versions of FreeBSD can understand:
|
||||
* Siig CyberSerial 1-port.
|
||||
*/
|
||||
#define COM_VENDOR 0x131f
|
||||
#define COM_DEV 0x2000
|
||||
|
||||
static int pci_uart_stdio; /* stdio in use for i/o */
|
||||
|
||||
static int pci_uart_nldevs; /* number of legacy devices - 2 max */
|
||||
|
||||
static struct {
|
||||
uint64_t baddr;
|
||||
int vector;
|
||||
} pci_uart_lres[] = {
|
||||
{ COM1_BASE, COM1_IRQ},
|
||||
{ COM2_BASE, COM2_IRQ},
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
struct fifo {
|
||||
uint8_t buf[FIFOSZ];
|
||||
int rindex; /* index to read from */
|
||||
int windex; /* index to write to */
|
||||
int num; /* number of characters in the fifo */
|
||||
int size; /* size of the fifo */
|
||||
};
|
||||
|
||||
struct pci_uart_softc {
|
||||
struct pci_devinst *pi;
|
||||
pthread_mutex_t mtx; /* protects all softc elements */
|
||||
uint8_t data; /* Data register (R/W) */
|
||||
uint8_t ier; /* Interrupt enable register (R/W) */
|
||||
uint8_t lcr; /* Line control register (R/W) */
|
||||
uint8_t mcr; /* Modem control register (R/W) */
|
||||
uint8_t lsr; /* Line status register (R/W) */
|
||||
uint8_t msr; /* Modem status register (R/W) */
|
||||
uint8_t fcr; /* FIFO control register (W) */
|
||||
uint8_t scr; /* Scratch register (R/W) */
|
||||
|
||||
uint8_t dll; /* Baudrate divisor latch LSB */
|
||||
uint8_t dlh; /* Baudrate divisor latch MSB */
|
||||
|
||||
struct fifo rxfifo;
|
||||
|
||||
int opened;
|
||||
int stdio;
|
||||
bool thre_int_pending; /* THRE interrupt pending */
|
||||
};
|
||||
|
||||
static void pci_uart_drain(int fd, enum ev_type ev, void *arg);
|
||||
|
||||
static struct termios tio_orig, tio_new; /* I/O Terminals */
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(void)
|
||||
{
|
||||
tcgetattr(STDIN_FILENO, &tio_orig);
|
||||
|
||||
cfmakeraw(&tio_new);
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
|
||||
|
||||
atexit(ttyclose);
|
||||
}
|
||||
|
||||
static bool
|
||||
tty_char_available(void)
|
||||
{
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(STDIN_FILENO, &rfds);
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0 ) {
|
||||
return (true);
|
||||
} else {
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(void)
|
||||
{
|
||||
char rb;
|
||||
|
||||
if (tty_char_available()) {
|
||||
read(STDIN_FILENO, &rb, 1);
|
||||
return (rb & 0xff);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(unsigned char wb)
|
||||
{
|
||||
(void) write(STDIN_FILENO, &wb, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
fifo_reset(struct fifo *fifo, int size)
|
||||
{
|
||||
bzero(fifo, sizeof(struct fifo));
|
||||
fifo->size = size;
|
||||
}
|
||||
|
||||
static int
|
||||
fifo_putchar(struct fifo *fifo, uint8_t ch)
|
||||
{
|
||||
|
||||
if (fifo->num < fifo->size) {
|
||||
fifo->buf[fifo->windex] = ch;
|
||||
fifo->windex = (fifo->windex + 1) % fifo->size;
|
||||
fifo->num++;
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
fifo_getchar(struct fifo *fifo)
|
||||
{
|
||||
int c;
|
||||
|
||||
if (fifo->num > 0) {
|
||||
c = fifo->buf[fifo->rindex];
|
||||
fifo->rindex = (fifo->rindex + 1) % fifo->size;
|
||||
fifo->num--;
|
||||
return (c);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
fifo_numchars(struct fifo *fifo)
|
||||
{
|
||||
|
||||
return (fifo->num);
|
||||
}
|
||||
|
||||
static int
|
||||
fifo_available(struct fifo *fifo)
|
||||
{
|
||||
|
||||
return (fifo->num < fifo->size);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_opentty(struct pci_uart_softc *sc)
|
||||
{
|
||||
struct mevent *mev;
|
||||
|
||||
assert(sc->opened == 0);
|
||||
assert(sc->stdio);
|
||||
|
||||
ttyopen();
|
||||
mev = mevent_add(STDIN_FILENO, EVF_READ, pci_uart_drain, sc);
|
||||
assert(mev);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_legacy_res(uint64_t *bar, int *ivec)
|
||||
{
|
||||
if (pci_uart_lres[pci_uart_nldevs].baddr != 0) {
|
||||
*bar = pci_uart_lres[pci_uart_nldevs].baddr;
|
||||
*ivec = pci_uart_lres[pci_uart_nldevs].vector;
|
||||
pci_uart_nldevs++;
|
||||
} else {
|
||||
/* TODO: print warning ? */
|
||||
*bar = 0;
|
||||
*ivec= -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The IIR returns a prioritized interrupt reason:
|
||||
* - receive data available
|
||||
* - transmit holding register empty
|
||||
* - modem status change
|
||||
*
|
||||
* Return an interrupt reason if one is available.
|
||||
*/
|
||||
static int
|
||||
pci_uart_intr_reason(struct pci_uart_softc *sc)
|
||||
{
|
||||
|
||||
if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0)
|
||||
return (IIR_RLS);
|
||||
else if (fifo_numchars(&sc->rxfifo) > 0 && (sc->ier & IER_ERXRDY) != 0)
|
||||
return (IIR_RXTOUT);
|
||||
else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0)
|
||||
return (IIR_TXRDY);
|
||||
else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0)
|
||||
return (IIR_MLSC);
|
||||
else
|
||||
return (IIR_NOPEND);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_reset(struct pci_uart_softc *sc)
|
||||
{
|
||||
uint16_t divisor;
|
||||
|
||||
divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
|
||||
sc->dll = divisor;
|
||||
sc->dlh = divisor >> 16;
|
||||
|
||||
fifo_reset(&sc->rxfifo, 1); /* no fifo until enabled by software */
|
||||
}
|
||||
|
||||
/*
|
||||
* Toggle the COM port's intr pin depending on whether or not we have an
|
||||
* interrupt condition to report to the processor.
|
||||
*/
|
||||
static void
|
||||
pci_uart_toggle_intr(struct pci_uart_softc *sc)
|
||||
{
|
||||
uint8_t intr_reason;
|
||||
|
||||
intr_reason = pci_uart_intr_reason(sc);
|
||||
|
||||
if (intr_reason == IIR_NOPEND)
|
||||
pci_lintr_deassert(sc->pi);
|
||||
else
|
||||
pci_lintr_assert(sc->pi);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_drain(int fd, enum ev_type ev, void *arg)
|
||||
{
|
||||
struct pci_uart_softc *sc;
|
||||
int ch;
|
||||
|
||||
sc = arg;
|
||||
|
||||
assert(fd == STDIN_FILENO);
|
||||
assert(ev == EVF_READ);
|
||||
|
||||
/*
|
||||
* This routine is called in the context of the mevent thread
|
||||
* to take out the softc lock to protect against concurrent
|
||||
* access from a vCPU i/o exit
|
||||
*/
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
if ((sc->mcr & MCR_LOOPBACK) != 0) {
|
||||
(void) ttyread();
|
||||
} else {
|
||||
while (fifo_available(&sc->rxfifo) &&
|
||||
((ch = ttyread()) != -1)) {
|
||||
fifo_putchar(&sc->rxfifo, ch);
|
||||
}
|
||||
pci_uart_toggle_intr(sc);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct pci_uart_softc *sc;
|
||||
int fifosz;
|
||||
uint8_t msr;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
assert(baridx == 0);
|
||||
assert(size == 1);
|
||||
|
||||
/* Open terminal */
|
||||
if (!sc->opened && sc->stdio) {
|
||||
pci_uart_opentty(sc);
|
||||
sc->opened = 1;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
/*
|
||||
* Take care of the special case DLAB accesses first
|
||||
*/
|
||||
if ((sc->lcr & LCR_DLAB) != 0) {
|
||||
if (offset == REG_DLL) {
|
||||
sc->dll = value;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (offset == REG_DLH) {
|
||||
sc->dlh = value;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case REG_DATA:
|
||||
if (sc->mcr & MCR_LOOPBACK) {
|
||||
if (fifo_putchar(&sc->rxfifo, value) != 0)
|
||||
sc->lsr |= LSR_OE;
|
||||
} else if (sc->stdio) {
|
||||
ttywrite(value);
|
||||
} /* else drop on floor */
|
||||
sc->thre_int_pending = true;
|
||||
break;
|
||||
case REG_IER:
|
||||
/*
|
||||
* Apply mask so that bits 4-7 are 0
|
||||
* Also enables bits 0-3 only if they're 1
|
||||
*/
|
||||
sc->ier = value & 0x0F;
|
||||
break;
|
||||
case REG_FCR:
|
||||
/*
|
||||
* When moving from FIFO and 16450 mode and vice versa,
|
||||
* the FIFO contents are reset.
|
||||
*/
|
||||
if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) {
|
||||
fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1;
|
||||
fifo_reset(&sc->rxfifo, fifosz);
|
||||
}
|
||||
|
||||
/*
|
||||
* The FCR_ENABLE bit must be '1' for the programming
|
||||
* of other FCR bits to be effective.
|
||||
*/
|
||||
if ((value & FCR_ENABLE) == 0) {
|
||||
sc->fcr = 0;
|
||||
} else {
|
||||
if ((value & FCR_RCV_RST) != 0)
|
||||
fifo_reset(&sc->rxfifo, FIFOSZ);
|
||||
|
||||
sc->fcr = value &
|
||||
(FCR_ENABLE | FCR_DMA | FCR_RX_MASK);
|
||||
}
|
||||
break;
|
||||
case REG_LCR:
|
||||
sc->lcr = value;
|
||||
break;
|
||||
case REG_MCR:
|
||||
/* Apply mask so that bits 5-7 are 0 */
|
||||
sc->mcr = value & 0x1F;
|
||||
|
||||
msr = 0;
|
||||
if (sc->mcr & MCR_LOOPBACK) {
|
||||
/*
|
||||
* In the loopback mode certain bits from the
|
||||
* MCR are reflected back into MSR
|
||||
*/
|
||||
if (sc->mcr & MCR_RTS)
|
||||
msr |= MSR_CTS;
|
||||
if (sc->mcr & MCR_DTR)
|
||||
msr |= MSR_DSR;
|
||||
if (sc->mcr & MCR_OUT1)
|
||||
msr |= MSR_RI;
|
||||
if (sc->mcr & MCR_OUT2)
|
||||
msr |= MSR_DCD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect if there has been any change between the
|
||||
* previous and the new value of MSR. If there is
|
||||
* then assert the appropriate MSR delta bit.
|
||||
*/
|
||||
if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS))
|
||||
sc->msr |= MSR_DCTS;
|
||||
if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR))
|
||||
sc->msr |= MSR_DDSR;
|
||||
if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD))
|
||||
sc->msr |= MSR_DDCD;
|
||||
if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0)
|
||||
sc->msr |= MSR_TERI;
|
||||
|
||||
/*
|
||||
* Update the value of MSR while retaining the delta
|
||||
* bits.
|
||||
*/
|
||||
sc->msr &= MSR_DELTA_MASK;
|
||||
sc->msr |= msr;
|
||||
break;
|
||||
case REG_LSR:
|
||||
/*
|
||||
* Line status register is not meant to be written to
|
||||
* during normal operation.
|
||||
*/
|
||||
break;
|
||||
case REG_MSR:
|
||||
/*
|
||||
* As far as I can tell MSR is a read-only register.
|
||||
*/
|
||||
break;
|
||||
case REG_SCR:
|
||||
sc->scr = value;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
pci_uart_toggle_intr(sc);
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
struct pci_uart_softc *sc;
|
||||
uint8_t iir, intr_reason;
|
||||
uint64_t reg;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
assert(baridx == 0);
|
||||
assert(size == 1);
|
||||
|
||||
/* Open terminal */
|
||||
if (!sc->opened && sc->stdio) {
|
||||
pci_uart_opentty(sc);
|
||||
sc->opened = 1;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
/*
|
||||
* Take care of the special case DLAB accesses first
|
||||
*/
|
||||
if ((sc->lcr & LCR_DLAB) != 0) {
|
||||
if (offset == REG_DLL) {
|
||||
reg = sc->dll;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (offset == REG_DLH) {
|
||||
reg = sc->dlh;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case REG_DATA:
|
||||
reg = fifo_getchar(&sc->rxfifo);
|
||||
break;
|
||||
case REG_IER:
|
||||
reg = sc->ier;
|
||||
break;
|
||||
case REG_IIR:
|
||||
iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0;
|
||||
|
||||
intr_reason = pci_uart_intr_reason(sc);
|
||||
|
||||
/*
|
||||
* Deal with side effects of reading the IIR register
|
||||
*/
|
||||
if (intr_reason == IIR_TXRDY)
|
||||
sc->thre_int_pending = false;
|
||||
|
||||
iir |= intr_reason;
|
||||
|
||||
reg = iir;
|
||||
break;
|
||||
case REG_LCR:
|
||||
reg = sc->lcr;
|
||||
break;
|
||||
case REG_MCR:
|
||||
reg = sc->mcr;
|
||||
break;
|
||||
case REG_LSR:
|
||||
/* Transmitter is always ready for more data */
|
||||
sc->lsr |= LSR_TEMT | LSR_THRE;
|
||||
|
||||
/* Check for new receive data */
|
||||
if (fifo_numchars(&sc->rxfifo) > 0)
|
||||
sc->lsr |= LSR_RXRDY;
|
||||
else
|
||||
sc->lsr &= ~LSR_RXRDY;
|
||||
|
||||
reg = sc->lsr;
|
||||
|
||||
/* The LSR_OE bit is cleared on LSR read */
|
||||
sc->lsr &= ~LSR_OE;
|
||||
break;
|
||||
case REG_MSR:
|
||||
/*
|
||||
* MSR delta bits are cleared on read
|
||||
*/
|
||||
reg = sc->msr;
|
||||
sc->msr &= ~MSR_DELTA_MASK;
|
||||
break;
|
||||
case REG_SCR:
|
||||
reg = sc->scr;
|
||||
break;
|
||||
default:
|
||||
reg = 0xFF;
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
pci_uart_toggle_intr(sc);
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
|
||||
return (reg);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct pci_uart_softc *sc;
|
||||
uint64_t bar;
|
||||
int ivec;
|
||||
|
||||
sc = malloc(sizeof(struct pci_uart_softc));
|
||||
memset(sc, 0, sizeof(struct pci_uart_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->pi = pi;
|
||||
|
||||
pthread_mutex_init(&sc->mtx, NULL);
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM);
|
||||
if (pci_is_legacy(pi)) {
|
||||
pci_uart_legacy_res(&bar, &ivec);
|
||||
pci_emul_alloc_pbar(pi, 0, bar, PCIBAR_IO, 8);
|
||||
} else {
|
||||
ivec = -1;
|
||||
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, 8);
|
||||
}
|
||||
pci_lintr_request(pi, ivec);
|
||||
|
||||
if (opts != NULL && !strcmp("stdio", opts) && !pci_uart_stdio) {
|
||||
pci_uart_stdio = 1;
|
||||
sc->stdio = 1;
|
||||
}
|
||||
|
||||
pci_uart_reset(sc);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_com = {
|
||||
.pe_emu = "uart",
|
||||
.pe_init = pci_uart_init,
|
||||
.pe_barwrite = pci_uart_write,
|
||||
.pe_barread = pci_uart_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_com);
|
534
usr.sbin/bhyve/pci_virtio_block.c
Normal file
534
usr.sbin/bhyve/pci_virtio_block.c
Normal file
@ -0,0 +1,534 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/disk.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTBLK_RINGSZ 64
|
||||
|
||||
#define VTBLK_CFGSZ 28
|
||||
|
||||
#define VTBLK_R_CFG VTCFG_R_CFG0
|
||||
#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
|
||||
#define VTBLK_R_MAX VTBLK_R_CFG_END
|
||||
|
||||
#define VTBLK_REGSZ VTBLK_R_MAX+1
|
||||
|
||||
#define VTBLK_MAXSEGS 32
|
||||
|
||||
#define VTBLK_S_OK 0
|
||||
#define VTBLK_S_IOERR 1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTBLK_S_HOSTCAPS \
|
||||
( 0x00000004 | /* host maximum request segments */ \
|
||||
0x10000000 ) /* supports indirect descriptors */
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Config space
|
||||
*/
|
||||
struct vtblk_config {
|
||||
uint64_t vbc_capacity;
|
||||
uint32_t vbc_size_max;
|
||||
uint32_t vbc_seg_max;
|
||||
uint16_t vbc_geom_c;
|
||||
uint8_t vbc_geom_h;
|
||||
uint8_t vbc_geom_s;
|
||||
uint32_t vbc_blk_size;
|
||||
uint32_t vbc_sectors_max;
|
||||
} __packed;
|
||||
CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
|
||||
|
||||
/*
|
||||
* Fixed-size block header
|
||||
*/
|
||||
struct virtio_blk_hdr {
|
||||
#define VBH_OP_READ 0
|
||||
#define VBH_OP_WRITE 1
|
||||
uint32_t vbh_type;
|
||||
uint32_t vbh_ioprio;
|
||||
uint64_t vbh_sector;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtblk_debug;
|
||||
#define DPRINTF(params) if (pci_vtblk_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtblk_softc {
|
||||
struct pci_devinst *vbsc_pi;
|
||||
int vbsc_fd;
|
||||
int vbsc_status;
|
||||
int vbsc_isr;
|
||||
int vbsc_lastq;
|
||||
uint32_t vbsc_features;
|
||||
uint64_t vbsc_pfn;
|
||||
struct vring_hqueue vbsc_q;
|
||||
struct vtblk_config vbsc_cfg;
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
|
||||
{
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtblk: device reset requested !\n"));
|
||||
}
|
||||
|
||||
sc->vbsc_status = value;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTBLK_MAXSEGS];
|
||||
struct virtio_blk_hdr *vbh;
|
||||
struct virtio_desc *vd, *vid;
|
||||
struct virtio_used *vu;
|
||||
uint8_t *status;
|
||||
int i;
|
||||
int err;
|
||||
int iolen;
|
||||
int nsegs;
|
||||
int uidx, aidx, didx;
|
||||
int writeop;
|
||||
off_t offset;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Verify that the descriptor is indirect, and obtain
|
||||
* the pointer to the indirect descriptor.
|
||||
* There has to be space for at least 3 descriptors
|
||||
* in the indirect descriptor array: the block header,
|
||||
* 1 or more data descriptors, and a status byte.
|
||||
*/
|
||||
assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
|
||||
|
||||
nsegs = vd->vd_len / sizeof(struct virtio_desc);
|
||||
assert(nsegs >= 3);
|
||||
assert(nsegs < VTBLK_MAXSEGS + 2);
|
||||
|
||||
vid = paddr_guest2host(vd->vd_addr);
|
||||
assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* The first descriptor will be the read-only fixed header
|
||||
*/
|
||||
vbh = paddr_guest2host(vid[0].vd_addr);
|
||||
assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
|
||||
assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
|
||||
|
||||
writeop = (vbh->vbh_type == VBH_OP_WRITE);
|
||||
|
||||
offset = vbh->vbh_sector * DEV_BSIZE;
|
||||
|
||||
/*
|
||||
* Build up the iovec based on the guest's data descriptors
|
||||
*/
|
||||
for (i = 1, iolen = 0; i < nsegs - 1; i++) {
|
||||
iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr);
|
||||
iov[i-1].iov_len = vid[i].vd_len;
|
||||
iolen += vid[i].vd_len;
|
||||
|
||||
assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* - write op implies read-only descriptor,
|
||||
* - read op implies write-only descriptor,
|
||||
* therefore test the inverse of the descriptor bit
|
||||
* to the op.
|
||||
*/
|
||||
assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
|
||||
writeop);
|
||||
}
|
||||
|
||||
/* Lastly, get the address of the status byte */
|
||||
status = paddr_guest2host(vid[nsegs - 1].vd_addr);
|
||||
assert(vid[nsegs - 1].vd_len == 1);
|
||||
assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
|
||||
assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
|
||||
|
||||
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
|
||||
writeop ? "write" : "read", iolen, nsegs - 2, offset));
|
||||
|
||||
if (writeop){
|
||||
err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
} else {
|
||||
err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
}
|
||||
|
||||
*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
|
||||
|
||||
/*
|
||||
* Return the single indirect descriptor back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = 1;
|
||||
hq->hq_cur_aidx++;
|
||||
*hq->hq_used_idx += 1;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vbsc_q;
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtblk_proc(sc, hq);
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 &&
|
||||
sc->vbsc_isr == 0) {
|
||||
sc->vbsc_isr = 1;
|
||||
pci_generate_msi(sc->vbsc_pi, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
|
||||
sc->vbsc_pfn = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vbsc_q;
|
||||
hq->hq_size = VTBLK_RINGSZ;
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct stat sbuf;
|
||||
struct pci_vtblk_softc *sc;
|
||||
off_t size;
|
||||
int fd;
|
||||
int sectsz;
|
||||
|
||||
if (opts == NULL) {
|
||||
printf("virtio-block: backing device required\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* The supplied backing file has to exist
|
||||
*/
|
||||
fd = open(opts, O_RDWR);
|
||||
if (fd < 0) {
|
||||
perror("Could not open backing file");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (fstat(fd, &sbuf) < 0) {
|
||||
perror("Could not stat backing file");
|
||||
close(fd);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with raw devices
|
||||
*/
|
||||
size = sbuf.st_size;
|
||||
sectsz = DEV_BSIZE;
|
||||
if (S_ISCHR(sbuf.st_mode)) {
|
||||
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
|
||||
ioctl(fd, DIOCGSECTORSIZE, §sz)) {
|
||||
perror("Could not fetch dev blk/sector size");
|
||||
close(fd);
|
||||
return (1);
|
||||
}
|
||||
assert(size != 0);
|
||||
assert(sectsz != 0);
|
||||
}
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtblk_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtblk_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vbsc_pi = pi;
|
||||
sc->vbsc_fd = fd;
|
||||
|
||||
/* setup virtio block config space */
|
||||
sc->vbsc_cfg.vbc_capacity = size / sectsz;
|
||||
sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
|
||||
sc->vbsc_cfg.vbc_blk_size = sectsz;
|
||||
sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
|
||||
sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
|
||||
sc->vbsc_cfg.vbc_geom_h = 0;
|
||||
sc->vbsc_cfg.vbc_geom_s = 0;
|
||||
sc->vbsc_cfg.vbc_sectors_max = 0;
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
|
||||
assert(baridx == 0);
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtblk_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
sc->vbsc_lastq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value == 0);
|
||||
pci_vtblk_qnotify(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtblk_update_status(sc, value);
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
void *ptr;
|
||||
uint32_t value;
|
||||
|
||||
assert(baridx == 0);
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_pfn >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vbsc_lastq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = 0; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_isr;
|
||||
sc->vbsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
assert(size + offset <= (VTBLK_R_CFG_END + 1));
|
||||
ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
|
||||
if (size == 1) {
|
||||
value = *(uint8_t *) ptr;
|
||||
} else if (size == 2) {
|
||||
value = *(uint16_t *) ptr;
|
||||
} else {
|
||||
value = *(uint32_t *) ptr;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vblk = {
|
||||
.pe_emu = "virtio-blk",
|
||||
.pe_init = pci_vtblk_init,
|
||||
.pe_barwrite = pci_vtblk_write,
|
||||
.pe_barread = pci_vtblk_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vblk);
|
781
usr.sbin/bhyve/pci_virtio_net.c
Normal file
781
usr.sbin/bhyve/pci_virtio_net.c
Normal file
@ -0,0 +1,781 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <md5.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "mevent.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTNET_RINGSZ 256
|
||||
|
||||
#define VTNET_MAXSEGS 32
|
||||
|
||||
/*
|
||||
* PCI config-space register offsets
|
||||
*/
|
||||
#define VTNET_R_CFG0 20
|
||||
#define VTNET_R_CFG1 21
|
||||
#define VTNET_R_CFG2 22
|
||||
#define VTNET_R_CFG3 23
|
||||
#define VTNET_R_CFG4 24
|
||||
#define VTNET_R_CFG5 25
|
||||
#define VTNET_R_CFG6 26
|
||||
#define VTNET_R_CFG7 27
|
||||
#define VTNET_R_MAX 27
|
||||
|
||||
#define VTNET_REGSZ VTNET_R_MAX+1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTNET_S_HOSTCAPS \
|
||||
( 0x00000020 | /* host supplies MAC */ \
|
||||
0x00008000 | /* host can merge Rx buffers */ \
|
||||
0x00010000 ) /* config status available */
|
||||
|
||||
/*
|
||||
* Queue definitions.
|
||||
*/
|
||||
#define VTNET_RXQ 0
|
||||
#define VTNET_TXQ 1
|
||||
#define VTNET_CTLQ 2
|
||||
|
||||
#define VTNET_MAXQ 3
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Fixed network header size
|
||||
*/
|
||||
struct virtio_net_rxhdr {
|
||||
uint8_t vrh_flags;
|
||||
uint8_t vrh_gso_type;
|
||||
uint16_t vrh_hdr_len;
|
||||
uint16_t vrh_gso_size;
|
||||
uint16_t vrh_csum_start;
|
||||
uint16_t vrh_csum_offset;
|
||||
uint16_t vrh_bufs;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtnet_debug;
|
||||
#define DPRINTF(params) if (pci_vtnet_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtnet_softc {
|
||||
struct pci_devinst *vsc_pi;
|
||||
pthread_mutex_t vsc_mtx;
|
||||
struct mevent *vsc_mevp;
|
||||
|
||||
int vsc_curq;
|
||||
int vsc_status;
|
||||
int vsc_isr;
|
||||
int vsc_tapfd;
|
||||
int vsc_rx_ready;
|
||||
int vsc_rxpend;
|
||||
|
||||
uint32_t vsc_features;
|
||||
uint8_t vsc_macaddr[6];
|
||||
|
||||
uint64_t vsc_pfn[VTNET_MAXQ];
|
||||
struct vring_hqueue vsc_hq[VTNET_MAXQ];
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
pci_vtnet_qsize(int qnum)
|
||||
{
|
||||
/* XXX no ctl queue currently */
|
||||
if (qnum == VTNET_CTLQ) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* XXX fixed currently. Maybe different for tx/rx/ctl */
|
||||
return (VTNET_RINGSZ);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
|
||||
assert(ring < VTNET_MAXQ);
|
||||
|
||||
hq = &sc->vsc_hq[ring];
|
||||
|
||||
/*
|
||||
* Reset all soft state
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
|
||||
{
|
||||
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtnet: device reset requested !\n"));
|
||||
pci_vtnet_ring_reset(sc, VTNET_RXQ);
|
||||
pci_vtnet_ring_reset(sc, VTNET_TXQ);
|
||||
sc->vsc_rx_ready = 0;
|
||||
}
|
||||
|
||||
sc->vsc_status = value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the tap device
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
char pad[60];
|
||||
|
||||
if (sc->vsc_tapfd == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
memset(pad, 0, 60 - len);
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) writev(sc->vsc_tapfd, iov, iovcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when there is read activity on the tap file descriptor.
|
||||
* Each buffer posted by the guest is assumed to be able to contain
|
||||
* an entire ethernet frame + rx header.
|
||||
* MP note: the dummybuf is only used for discarding frames, so there
|
||||
* is no need for it to be per-vtnet or locked.
|
||||
*/
|
||||
static uint8_t dummybuf[2048];
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
struct vring_hqueue *hq;
|
||||
struct virtio_net_rxhdr *vrx;
|
||||
uint8_t *buf;
|
||||
int i;
|
||||
int len;
|
||||
int ndescs;
|
||||
int didx, uidx, aidx; /* descriptor, avail and used index */
|
||||
|
||||
/*
|
||||
* Should never be called without a valid tap fd
|
||||
*/
|
||||
assert(sc->vsc_tapfd != -1);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up.
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of available rx buffers
|
||||
*/
|
||||
hq = &sc->vsc_hq[VTNET_RXQ];
|
||||
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0) {
|
||||
/*
|
||||
* Need to wait for host notification to read
|
||||
*/
|
||||
if (sc->vsc_rxpend == 0) {
|
||||
WPRINTF(("vtnet: no rx descriptors !\n"));
|
||||
sc->vsc_rxpend = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the packet and try later
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
aidx = hq->hq_cur_aidx;
|
||||
uidx = *hq->hq_used_idx;
|
||||
for (i = 0; i < ndescs; i++) {
|
||||
/*
|
||||
* 'aidx' indexes into the an array of descriptor indexes
|
||||
*/
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr);
|
||||
buf = (uint8_t *)(vrx + 1);
|
||||
|
||||
len = read(sc->vsc_tapfd, buf,
|
||||
vd->vd_len - sizeof(struct virtio_net_rxhdr));
|
||||
|
||||
if (len < 0 && errno == EWOULDBLOCK) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers, which is always 1 without TSO
|
||||
* support.
|
||||
*/
|
||||
memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
|
||||
vrx->vrh_bufs = 1;
|
||||
|
||||
/*
|
||||
* Write this descriptor into the used ring
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
|
||||
uidx++;
|
||||
aidx++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the used pointer, and signal an interrupt if allowed
|
||||
*/
|
||||
*hq->hq_used_idx = uidx;
|
||||
hq->hq_cur_aidx = aidx;
|
||||
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
pci_vtnet_tap_rx(sc);
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
/*
|
||||
* A qnotify means that the rx process can now begin
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
sc->vsc_rx_ready = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the rx queue was empty, attempt to receive a
|
||||
* packet that was previously blocked due to no rx bufs
|
||||
* available
|
||||
*/
|
||||
if (sc->vsc_rxpend) {
|
||||
WPRINTF(("vtnet: rx resumed\n\r"));
|
||||
sc->vsc_rxpend = 0;
|
||||
pci_vtnet_tap_rx(sc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
int i;
|
||||
int plen;
|
||||
int tlen;
|
||||
int uidx, aidx, didx;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Run through the chain of descriptors, ignoring the
|
||||
* first header descriptor. However, include the header
|
||||
* length in the total length that will be put into the
|
||||
* used queue.
|
||||
*/
|
||||
tlen = vd->vd_len;
|
||||
vd = &hq->hq_dtable[vd->vd_next];
|
||||
|
||||
for (i = 0, plen = 0;
|
||||
i < VTNET_MAXSEGS;
|
||||
i++, vd = &hq->hq_dtable[vd->vd_next]) {
|
||||
iov[i].iov_base = paddr_guest2host(vd->vd_addr);
|
||||
iov[i].iov_len = vd->vd_len;
|
||||
plen += vd->vd_len;
|
||||
tlen += vd->vd_len;
|
||||
|
||||
if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
|
||||
break;
|
||||
}
|
||||
assert(i < VTNET_MAXSEGS);
|
||||
|
||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1));
|
||||
pci_vtnet_tap_tx(sc, iov, i + 1, plen);
|
||||
|
||||
/*
|
||||
* Return this chain back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = tlen;
|
||||
hq->hq_cur_aidx = aidx + 1;
|
||||
*hq->hq_used_idx = uidx + 1;
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_txq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ];
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtnet_proctx(sc, hq);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
|
||||
DPRINTF(("vtnet: control qnotify!\n\r"));
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
int qnum = sc->vsc_curq;
|
||||
|
||||
assert(qnum < VTNET_MAXQ);
|
||||
|
||||
sc->vsc_pfn[qnum] = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vsc_hq[qnum];
|
||||
hq->hq_size = pci_vtnet_qsize(qnum);
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
MD5_CTX mdctx;
|
||||
unsigned char digest[16];
|
||||
char nstr[80];
|
||||
struct pci_vtnet_softc *sc;
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtnet_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtnet_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vsc_pi = pi;
|
||||
|
||||
pthread_mutex_init(&sc->vsc_mtx, NULL);
|
||||
|
||||
/*
|
||||
* Attempt to open the tap device
|
||||
*/
|
||||
sc->vsc_tapfd = -1;
|
||||
if (opts != NULL) {
|
||||
char tbuf[80];
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strlcat(tbuf, opts, sizeof(tbuf));
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
} else {
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_tap_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The MAC address is the standard NetApp OUI of 00-a0-98,
|
||||
* followed by an MD5 of the vm name. The slot/func number is
|
||||
* prepended to this for slots other than 1:0, so that
|
||||
* a bootloader can netboot from the equivalent of slot 1.
|
||||
*/
|
||||
if (pi->pi_slot == 1 && pi->pi_func == 0) {
|
||||
strncpy(nstr, vmname, sizeof(nstr));
|
||||
} else {
|
||||
snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
|
||||
pi->pi_func, vmname);
|
||||
}
|
||||
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, nstr, strlen(nstr));
|
||||
MD5Final(digest, &mdctx);
|
||||
|
||||
sc->vsc_macaddr[0] = 0x00;
|
||||
sc->vsc_macaddr[1] = 0xa0;
|
||||
sc->vsc_macaddr[2] = 0x98;
|
||||
sc->vsc_macaddr[3] = digest[0];
|
||||
sc->vsc_macaddr[4] = digest[1];
|
||||
sc->vsc_macaddr[5] = digest[2];
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function pointer array to handle queue notifications
|
||||
*/
|
||||
static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = {
|
||||
pci_vtnet_ping_rxq,
|
||||
pci_vtnet_ping_txq,
|
||||
pci_vtnet_ping_ctlq
|
||||
};
|
||||
|
||||
static void
|
||||
pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
void *ptr;
|
||||
|
||||
assert(baridx == 0);
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_write: 2big, offset %ld size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vsc_features = value & VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtnet_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
sc->vsc_curq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
(*pci_vtnet_qnotify[value])(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtnet_update_status(sc, value);
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
assert((size + offset) <= (VTNET_R_CFG5 + 1));
|
||||
ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0];
|
||||
/*
|
||||
* The driver is allowed to change the MAC address
|
||||
*/
|
||||
sc->vsc_macaddr[offset - VTNET_R_CFG0] = value;
|
||||
if (size == 1) {
|
||||
*(uint8_t *) ptr = value;
|
||||
} else if (size == 2) {
|
||||
*(uint16_t *) ptr = value;
|
||||
} else {
|
||||
*(uint32_t *) ptr = value;
|
||||
}
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTNET_R_CFG6:
|
||||
case VTNET_R_CFG7:
|
||||
DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
void *ptr;
|
||||
uint64_t value;
|
||||
|
||||
assert(baridx == 0);
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_read: 2big, offset %ld size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
assert(size == 2);
|
||||
value = pci_vtnet_qsize(sc->vsc_curq);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_isr;
|
||||
sc->vsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
assert((size + offset) <= (VTNET_R_CFG5 + 1));
|
||||
ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0];
|
||||
if (size == 1) {
|
||||
value = *(uint8_t *) ptr;
|
||||
} else if (size == 2) {
|
||||
value = *(uint16_t *) ptr;
|
||||
} else {
|
||||
value = *(uint32_t *) ptr;
|
||||
}
|
||||
break;
|
||||
case VTNET_R_CFG6:
|
||||
assert(size != 4);
|
||||
value = 0x01; /* XXX link always up */
|
||||
break;
|
||||
case VTNET_R_CFG7:
|
||||
assert(size == 1);
|
||||
value = 0; /* XXX link status in LSB */
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vnet = {
|
||||
.pe_emu = "virtio-net",
|
||||
.pe_init = pci_vtnet_init,
|
||||
.pe_barwrite = pci_vtnet_write,
|
||||
.pe_barread = pci_vtnet_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vnet);
|
198
usr.sbin/bhyve/pit_8254.c
Normal file
198
usr.sbin/bhyve/pit_8254.c
Normal file
@ -0,0 +1,198 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/clock.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "inout.h"
|
||||
#include "pit_8254.h"
|
||||
|
||||
#define TIMER_SEL_MASK 0xc0
|
||||
#define TIMER_RW_MASK 0x30
|
||||
#define TIMER_MODE_MASK 0x0f
|
||||
#define TIMER_SEL_READBACK 0xc0
|
||||
|
||||
#define TIMER_DIV(freq, hz) (((freq) + (hz) / 2) / (hz))
|
||||
|
||||
#define PIT_8254_FREQ 1193182
|
||||
static const int nsecs_per_tick = 1000000000 / PIT_8254_FREQ;
|
||||
|
||||
struct counter {
|
||||
struct timeval tv; /* uptime when counter was loaded */
|
||||
uint16_t initial; /* initial counter value */
|
||||
uint8_t cr[2];
|
||||
uint8_t ol[2];
|
||||
int crbyte;
|
||||
int olbyte;
|
||||
};
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static void
|
||||
latch(struct counter *c)
|
||||
{
|
||||
struct timeval tv2;
|
||||
uint16_t lval;
|
||||
uint64_t delta_nsecs, delta_ticks;
|
||||
|
||||
/* cannot latch a new value until the old one has been consumed */
|
||||
if (c->olbyte != 0)
|
||||
return;
|
||||
|
||||
if (c->initial == 0 || c->initial == 1) {
|
||||
/*
|
||||
* XXX the program that runs the VM can be stopped and
|
||||
* restarted at any time. This means that state that was
|
||||
* created by the guest is destroyed between invocations
|
||||
* of the program.
|
||||
*
|
||||
* If the counter's initial value is not programmed we
|
||||
* assume a value that would be set to generate 'guest_hz'
|
||||
* interrupts per second.
|
||||
*/
|
||||
c->initial = TIMER_DIV(PIT_8254_FREQ, guest_hz);
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
|
||||
(void)gettimeofday(&tv2, NULL);
|
||||
timevalsub(&tv2, &c->tv);
|
||||
delta_nsecs = tv2.tv_sec * 1000000000 + tv2.tv_usec * 1000;
|
||||
delta_ticks = delta_nsecs / nsecs_per_tick;
|
||||
|
||||
lval = c->initial - delta_ticks % c->initial;
|
||||
c->olbyte = 2;
|
||||
c->ol[1] = lval; /* LSB */
|
||||
c->ol[0] = lval >> 8; /* MSB */
|
||||
}
|
||||
|
||||
static int
|
||||
pit_8254_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int sel, rw, mode;
|
||||
uint8_t val;
|
||||
struct counter *c;
|
||||
|
||||
static struct counter counter[3];
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
val = *eax;
|
||||
|
||||
if (port == TIMER_MODE) {
|
||||
assert(in == 0);
|
||||
sel = val & TIMER_SEL_MASK;
|
||||
rw = val & TIMER_RW_MASK;
|
||||
mode = val & TIMER_MODE_MASK;
|
||||
|
||||
if (sel == TIMER_SEL_READBACK)
|
||||
return (-1);
|
||||
if (rw != TIMER_LATCH && rw != TIMER_16BIT)
|
||||
return (-1);
|
||||
|
||||
if (rw != TIMER_LATCH) {
|
||||
/*
|
||||
* Counter mode is not affected when issuing a
|
||||
* latch command.
|
||||
*/
|
||||
if (mode != TIMER_RATEGEN && mode != TIMER_SQWAVE)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
c = &counter[sel >> 6];
|
||||
if (rw == TIMER_LATCH)
|
||||
latch(c);
|
||||
else
|
||||
c->olbyte = 0; /* reset latch after reprogramming */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* counter ports */
|
||||
assert(port >= TIMER_CNTR0 && port <= TIMER_CNTR2);
|
||||
c = &counter[port - TIMER_CNTR0];
|
||||
|
||||
if (in) {
|
||||
/*
|
||||
* XXX
|
||||
* The spec says that once the output latch is completely
|
||||
* read it should revert to "following" the counter. We don't
|
||||
* do this because it is hard and any reasonable OS should
|
||||
* always latch the counter before trying to read it.
|
||||
*/
|
||||
if (c->olbyte == 0)
|
||||
c->olbyte = 2;
|
||||
*eax = c->ol[--c->olbyte];
|
||||
} else {
|
||||
c->cr[c->crbyte++] = *eax;
|
||||
if (c->crbyte == 2) {
|
||||
c->crbyte = 0;
|
||||
c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8;
|
||||
if (c->initial == 0)
|
||||
c->initial = 0xffff;
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(8254, TIMER_MODE, IOPORT_F_OUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR0, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR1, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR2, IOPORT_F_INOUT, pit_8254_handler);
|
45
usr.sbin/bhyve/pit_8254.h
Normal file
45
usr.sbin/bhyve/pit_8254.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PIT_8254_H_
|
||||
#define _PIT_8254_H_
|
||||
|
||||
/*
|
||||
* Borrowed from amd64/include/timerreg.h because in that file it is
|
||||
* conditionally compiled for #ifdef _KERNEL only.
|
||||
*/
|
||||
|
||||
#include <dev/ic/i8253reg.h>
|
||||
|
||||
#define IO_TIMER1 0x40 /* 8253 Timer #1 */
|
||||
#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0)
|
||||
#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1)
|
||||
#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2)
|
||||
#define TIMER_MODE (IO_TIMER1 + TIMER_REG_MODE)
|
||||
|
||||
#endif /* _PIT_8254_H_ */
|
108
usr.sbin/bhyve/pmtmr.c
Normal file
108
usr.sbin/bhyve/pmtmr.c
Normal file
@ -0,0 +1,108 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/time.h>
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* The ACPI Power Management timer is a free-running 24- or 32-bit
|
||||
* timer with a frequency of 3.579545MHz
|
||||
*
|
||||
* This implementation will be 32-bits
|
||||
*/
|
||||
|
||||
#define IO_PMTMR 0x408 /* 4-byte i/o port for the timer */
|
||||
|
||||
#define PMTMR_FREQ 3579545 /* 3.579545MHz */
|
||||
|
||||
static pthread_mutex_t pmtmr_mtx;
|
||||
static uint64_t pmtmr_tscf;
|
||||
static uint64_t pmtmr_old;
|
||||
static uint64_t pmtmr_tsc_old;
|
||||
|
||||
static uint32_t
|
||||
pmtmr_val(void)
|
||||
{
|
||||
uint64_t pmtmr_tsc_new;
|
||||
uint64_t pmtmr_new;
|
||||
static int inited = 0;
|
||||
|
||||
if (!inited) {
|
||||
size_t len;
|
||||
uint32_t tmpf;
|
||||
|
||||
inited = 1;
|
||||
pthread_mutex_init(&pmtmr_mtx, NULL);
|
||||
len = sizeof(tmpf);
|
||||
sysctlbyname("machdep.tsc_freq", &tmpf, &len,
|
||||
NULL, 0);
|
||||
pmtmr_tscf = tmpf;
|
||||
pmtmr_tsc_old = rdtsc();
|
||||
pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ;
|
||||
return (pmtmr_old);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&pmtmr_mtx);
|
||||
pmtmr_tsc_new = rdtsc();
|
||||
pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf +
|
||||
pmtmr_old;
|
||||
pmtmr_old = pmtmr_new;
|
||||
pmtmr_tsc_old = pmtmr_tsc_new;
|
||||
pthread_mutex_unlock(&pmtmr_mtx);
|
||||
|
||||
return (pmtmr_new);
|
||||
}
|
||||
|
||||
static int
|
||||
pmtmr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 1);
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
*eax = pmtmr_val();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
|
||||
|
51
usr.sbin/bhyve/post.c
Normal file
51
usr.sbin/bhyve/post.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
static int
|
||||
post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 1);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
*eax = 0xff; /* return some garbage */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
|
274
usr.sbin/bhyve/rtc.c
Normal file
274
usr.sbin/bhyve/rtc.c
Normal file
@ -0,0 +1,274 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define IO_RTC 0x70
|
||||
|
||||
#define RTC_SEC 0x00 /* seconds */
|
||||
#define RTC_MIN 0x02
|
||||
#define RTC_HRS 0x04
|
||||
#define RTC_WDAY 0x06
|
||||
#define RTC_DAY 0x07
|
||||
#define RTC_MONTH 0x08
|
||||
#define RTC_YEAR 0x09
|
||||
#define RTC_CENTURY 0x32 /* current century */
|
||||
|
||||
#define RTC_STATUSA 0xA
|
||||
#define RTCSA_TUP 0x80 /* time update, don't look now */
|
||||
|
||||
#define RTC_STATUSB 0xB
|
||||
#define RTCSB_DST 0x01
|
||||
#define RTCSB_24HR 0x02
|
||||
#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */
|
||||
#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
|
||||
#define RTCSB_HALT 0x80 /* stop clock updates */
|
||||
|
||||
#define RTC_INTR 0x0c /* status register C (R) interrupt source */
|
||||
|
||||
#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
|
||||
#define RTCSD_PWR 0x80 /* clock power OK */
|
||||
|
||||
#define RTC_DIAG 0x0e
|
||||
|
||||
#define RTC_RSTCODE 0x0f
|
||||
|
||||
#define RTC_EQUIPMENT 0x14
|
||||
|
||||
static int addr;
|
||||
|
||||
/* XXX initialize these to default values as they would be from BIOS */
|
||||
static uint8_t status_a, status_b, rstcode;
|
||||
|
||||
static u_char const bin2bcd_data[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
||||
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
|
||||
};
|
||||
#define bin2bcd(bin) (bin2bcd_data[bin])
|
||||
|
||||
#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 0);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
switch (*eax) {
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
case RTC_STATUSA:
|
||||
case RTC_STATUSB:
|
||||
case RTC_INTR:
|
||||
case RTC_STATUSD:
|
||||
case RTC_DIAG:
|
||||
case RTC_RSTCODE:
|
||||
case RTC_EQUIPMENT:
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
addr = *eax;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int hour;
|
||||
time_t t;
|
||||
struct timeval cur, delta;
|
||||
|
||||
static struct timeval last;
|
||||
static struct tm tm;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
gettimeofday(&cur, NULL);
|
||||
|
||||
/*
|
||||
* Increment the cached time only once per second so we can guarantee
|
||||
* that the guest has at least one second to read the hour:min:sec
|
||||
* separately and still get a coherent view of the time.
|
||||
*/
|
||||
delta = cur;
|
||||
timevalsub(&delta, &last);
|
||||
if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
|
||||
t = cur.tv_sec;
|
||||
localtime_r(&t, &tm);
|
||||
last = cur;
|
||||
}
|
||||
|
||||
if (in) {
|
||||
switch (addr) {
|
||||
case RTC_SEC:
|
||||
*eax = rtcout(tm.tm_sec);
|
||||
return (0);
|
||||
case RTC_MIN:
|
||||
*eax = rtcout(tm.tm_min);
|
||||
return (0);
|
||||
case RTC_HRS:
|
||||
if (status_b & RTCSB_24HR)
|
||||
hour = tm.tm_hour;
|
||||
else
|
||||
hour = (tm.tm_hour % 12) + 1;
|
||||
|
||||
*eax = rtcout(hour);
|
||||
|
||||
/*
|
||||
* If we are representing time in the 12-hour format
|
||||
* then set the MSB to indicate PM.
|
||||
*/
|
||||
if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
|
||||
*eax |= 0x80;
|
||||
|
||||
return (0);
|
||||
case RTC_WDAY:
|
||||
*eax = rtcout(tm.tm_wday + 1);
|
||||
return (0);
|
||||
case RTC_DAY:
|
||||
*eax = rtcout(tm.tm_mday);
|
||||
return (0);
|
||||
case RTC_MONTH:
|
||||
*eax = rtcout(tm.tm_mon + 1);
|
||||
return (0);
|
||||
case RTC_YEAR:
|
||||
*eax = rtcout(tm.tm_year % 100);
|
||||
return (0);
|
||||
case RTC_CENTURY:
|
||||
*eax = rtcout(tm.tm_year / 100);
|
||||
break;
|
||||
case RTC_STATUSA:
|
||||
*eax = status_a;
|
||||
return (0);
|
||||
case RTC_INTR:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_STATUSD:
|
||||
*eax = RTCSD_PWR;
|
||||
return (0);
|
||||
case RTC_DIAG:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_RSTCODE:
|
||||
*eax = rstcode;
|
||||
return (0);
|
||||
case RTC_EQUIPMENT:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
switch (addr) {
|
||||
case RTC_STATUSA:
|
||||
status_a = *eax & ~RTCSA_TUP;
|
||||
break;
|
||||
case RTC_STATUSB:
|
||||
/* XXX not implemented yet XXX */
|
||||
if (*eax & RTCSB_PINTR)
|
||||
return (-1);
|
||||
status_b = *eax;
|
||||
break;
|
||||
case RTC_RSTCODE:
|
||||
rstcode = *eax;
|
||||
break;
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
/*
|
||||
* Ignore writes to the time of day registers
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(rtc, IO_RTC, IOPORT_F_OUT, rtc_addr_handler);
|
||||
INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
|
119
usr.sbin/bhyve/spinup_ap.c
Normal file
119
usr.sbin/bhyve/spinup_ap.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "spinup_ap.h"
|
||||
|
||||
static void
|
||||
spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip)
|
||||
{
|
||||
int vector, error;
|
||||
uint16_t cs;
|
||||
uint64_t desc_base;
|
||||
uint32_t desc_limit, desc_access;
|
||||
|
||||
vector = *rip >> PAGE_SHIFT;
|
||||
*rip = 0;
|
||||
|
||||
/*
|
||||
* Update the %cs and %rip of the guest so that it starts
|
||||
* executing real mode code at at 'vector << 12'.
|
||||
*/
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
|
||||
assert(error == 0);
|
||||
|
||||
error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
|
||||
&desc_limit, &desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
desc_base = vector << PAGE_SHIFT;
|
||||
error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
cs = (vector << PAGE_SHIFT) >> 4;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
int
|
||||
spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
assert(newcpu != 0);
|
||||
assert(newcpu < guest_ncpus);
|
||||
|
||||
error = vcpu_reset(ctx, newcpu);
|
||||
assert(error == 0);
|
||||
|
||||
/* Set up capabilities */
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
if (fbsdrun_disable_x2apic())
|
||||
error = vm_set_x2apic_state(ctx, newcpu, X2APIC_DISABLED);
|
||||
else
|
||||
error = vm_set_x2apic_state(ctx, newcpu, X2APIC_ENABLED);
|
||||
assert(error == 0);
|
||||
|
||||
/*
|
||||
* Enable the 'unrestricted guest' mode for 'newcpu'.
|
||||
*
|
||||
* Set up the processor state in power-on 16-bit mode, with the CS:IP
|
||||
* init'd to the specified low-mem 4K page.
|
||||
*/
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
|
||||
assert(error == 0);
|
||||
|
||||
spinup_ap_realmode(ctx, newcpu, &rip);
|
||||
|
||||
fbsdrun_addcpu(ctx, newcpu, rip);
|
||||
|
||||
return (newcpu);
|
||||
}
|
34
usr.sbin/bhyve/spinup_ap.h
Normal file
34
usr.sbin/bhyve/spinup_ap.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SPINUP_AP_H_
|
||||
#define _SPINUP_AP_H_
|
||||
|
||||
int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip);
|
||||
|
||||
#endif
|
60
usr.sbin/bhyve/uart.c
Normal file
60
usr.sbin/bhyve/uart.c
Normal file
@ -0,0 +1,60 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define COM1 0x3F8
|
||||
#define COM2 0x2F8
|
||||
|
||||
#define REG_IIR 2
|
||||
|
||||
static int
|
||||
com_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* COM port is not implemented so we return 0xFF for all registers
|
||||
*/
|
||||
*eax = 0xFF;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(uart, COM1 + REG_IIR, IOPORT_F_IN, com_handler);
|
||||
INOUT_PORT(uart, COM2 + REG_IIR, IOPORT_F_IN, com_handler);
|
85
usr.sbin/bhyve/virtio.h
Normal file
85
usr.sbin/bhyve/virtio.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VIRTIO_H_
|
||||
#define _VIRTIO_H_
|
||||
|
||||
#define VRING_ALIGN 4096
|
||||
|
||||
#define VRING_DESC_F_NEXT (1 << 0)
|
||||
#define VRING_DESC_F_WRITE (1 << 1)
|
||||
#define VRING_DESC_F_INDIRECT (1 << 2)
|
||||
|
||||
#define VRING_AVAIL_F_NO_INTERRUPT 1
|
||||
|
||||
struct virtio_desc {
|
||||
uint64_t vd_addr;
|
||||
uint32_t vd_len;
|
||||
uint16_t vd_flags;
|
||||
uint16_t vd_next;
|
||||
} __packed;
|
||||
|
||||
struct virtio_used {
|
||||
uint32_t vu_idx;
|
||||
uint32_t vu_tlen;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* PFN register shift amount
|
||||
*/
|
||||
#define VRING_PFN 12
|
||||
|
||||
/*
|
||||
* Virtio device types
|
||||
*/
|
||||
#define VIRTIO_TYPE_NET 1
|
||||
#define VIRTIO_TYPE_BLOCK 2
|
||||
|
||||
/*
|
||||
* PCI vendor/device IDs
|
||||
*/
|
||||
#define VIRTIO_VENDOR 0x1AF4
|
||||
#define VIRTIO_DEV_NET 0x1000
|
||||
#define VIRTIO_DEV_BLOCK 0x1001
|
||||
|
||||
/*
|
||||
* PCI config space constants
|
||||
*/
|
||||
#define VTCFG_R_HOSTCAP 0
|
||||
#define VTCFG_R_GUESTCAP 4
|
||||
#define VTCFG_R_PFN 8
|
||||
#define VTCFG_R_QNUM 12
|
||||
#define VTCFG_R_QSEL 14
|
||||
#define VTCFG_R_QNOTIFY 16
|
||||
#define VTCFG_R_STATUS 18
|
||||
#define VTCFG_R_ISR 19
|
||||
#define VTCFG_R_CFG0 20 /* No MSI-X */
|
||||
#define VTCFG_R_CFG1 24 /* With MSI-X */
|
||||
#define VTCFG_R_MSIX 20
|
||||
|
||||
#endif /* _VIRTIO_H_ */
|
48
usr.sbin/bhyve/xmsr.c
Normal file
48
usr.sbin/bhyve/xmsr.c
Normal file
@ -0,0 +1,48 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "xmsr.h"
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
|
||||
{
|
||||
|
||||
printf("Unknown WRMSR code %x, val %lx, cpu %d\n", code, val, vcpu);
|
||||
exit(1);
|
||||
}
|
34
usr.sbin/bhyve/xmsr.h
Normal file
34
usr.sbin/bhyve/xmsr.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _XMSR_H_
|
||||
#define _XMSR_H_
|
||||
|
||||
int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user