Import of bhyve hypervisor and utilities, part 1.
vmm.ko - kernel module for VT-x, VT-d and hypervisor control bhyve - user-space sequencer and i/o emulation vmmctl - dump of hypervisor register state libvmm - front-end to vmm.ko chardev interface bhyve was designed and implemented by Neel Natu. Thanks to the following folk from NetApp who helped to make this available: Joe CaraDonna Peter Snyder Jeff Heller Sandeep Mann Steve Miller Brian Pawlowski
This commit is contained in:
parent
1430f46faf
commit
d45b7f14ae
@ -102,6 +102,7 @@ SUBDIR= ${SUBDIR_ORDERED} \
|
||||
${_libusbhid} \
|
||||
${_libusb} \
|
||||
${_libvgl} \
|
||||
${_libvmmapi} \
|
||||
libwrap \
|
||||
liby \
|
||||
libz \
|
||||
@ -177,6 +178,7 @@ _libncp= libncp
|
||||
.endif
|
||||
_libsmb= libsmb
|
||||
_libvgl= libvgl
|
||||
_libvmmapi= libvmmapi
|
||||
.endif
|
||||
|
||||
.if ${MACHINE_ARCH} == "powerpc"
|
||||
|
9
lib/libvmmapi/Makefile
Normal file
9
lib/libvmmapi/Makefile
Normal file
@ -0,0 +1,9 @@
|
||||
# $FreeBSD$
|
||||
|
||||
LIB= vmmapi
|
||||
SRCS= vmmapi.c vmmapi_freebsd.c mptable.c
|
||||
INCS= vmmapi.h
|
||||
|
||||
CFLAGS+= -I${.CURDIR}
|
||||
|
||||
.include <bsd.lib.mk>
|
336
lib/libvmmapi/mptable.c
Normal file
336
lib/libvmmapi/mptable.c
Normal file
@ -0,0 +1,336 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
#include "mptable.h"
|
||||
|
||||
#define LAPIC_PADDR (0xFEE00000)
|
||||
#define LAPIC_VERSION (16)
|
||||
|
||||
#define IOAPIC_PADDR (0xFEC00000)
|
||||
#define IOAPIC_VERSION (0x11)
|
||||
|
||||
extern int errno;
|
||||
|
||||
static uint8_t
|
||||
mp_compute_checksum(void *base, size_t len)
|
||||
{
|
||||
uint8_t *bytes = base;
|
||||
uint8_t sum = 0;
|
||||
for(; len > 0; len--) {
|
||||
sum += *bytes++;
|
||||
}
|
||||
return 256 - sum;
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_mpfp(struct mp_floating_pointer *mpfp, vm_paddr_t mpfp_gpa)
|
||||
{
|
||||
memset(mpfp, 0, sizeof(*mpfp));
|
||||
memcpy(mpfp->signature, MPFP_SIGNATURE, MPFP_SIGNATURE_LEN);
|
||||
mpfp->mptable_paddr = mpfp_gpa + sizeof(*mpfp);
|
||||
mpfp->specrev = MP_SPECREV;
|
||||
mpfp->feature2 = 0;
|
||||
mpfp->checksum = mp_compute_checksum(mpfp, sizeof(*mpfp));
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_mpch(struct mp_config_hdr *mpch)
|
||||
{
|
||||
memset(mpch, 0, sizeof(*mpch));
|
||||
mpch->specrev = MP_SPECREV;
|
||||
memcpy(mpch->signature, MPCH_SIGNATURE, MPCH_SIGNATURE_LEN);
|
||||
memcpy(mpch->oemid, MPCH_OEMID, MPCH_OEMID_LEN);
|
||||
memcpy(mpch->prodid, MPCH_PRODID, MPCH_PRODID_LEN);
|
||||
mpch->lapic_paddr = LAPIC_PADDR;
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_proc_entries(struct mpe_proc *mpep, int num_proc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_proc; i++) {
|
||||
memset(mpep, 0, sizeof(*mpep));
|
||||
mpep->entry_type = MP_ENTRY_PROC;
|
||||
mpep->lapic_id = i; // XXX
|
||||
mpep->lapic_version = LAPIC_VERSION;
|
||||
mpep->proc_flags = (i == 0)?MPEP_FLAGS_BSP:0;
|
||||
mpep->proc_flags |= MPEP_FLAGS_EN;
|
||||
mpep->proc_signature = MPEP_SIGNATURE;
|
||||
mpep->feature_flags = MPEP_FEATURES;
|
||||
mpep++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_bus_entries(struct mpe_bus *mpeb)
|
||||
{
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->entry_type = MP_ENTRY_BUS;
|
||||
mpeb->busid = MPE_BUSID_ISA;
|
||||
memcpy(mpeb->busname, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
|
||||
mpeb++;
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->entry_type = MP_ENTRY_BUS;
|
||||
mpeb->busid = MPE_BUSID_PCI;
|
||||
memcpy(mpeb->busname, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_ioapic_entries(struct mpe_ioapic *mpei)
|
||||
{
|
||||
memset(mpei, 0, sizeof(*mpei));
|
||||
mpei->entry_type = MP_ENTRY_IOAPIC;
|
||||
mpei->ioapic_id = MPE_IOAPIC_ID;
|
||||
mpei->ioapic_version = IOAPIC_VERSION;
|
||||
mpei->ioapic_flags = MPE_IOAPIC_FLAG_EN;
|
||||
mpei->ioapic_paddr = IOAPIC_PADDR;
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins)
|
||||
{
|
||||
int pin;
|
||||
|
||||
/*
|
||||
* The following config is taken from kernel mptable.c
|
||||
* mptable_parse_default_config_ints(...), for now
|
||||
* just use the default config, tweek later if needed.
|
||||
*/
|
||||
|
||||
|
||||
/* Run through all 16 pins. */
|
||||
for (pin = 0; pin < num_pins; pin++) {
|
||||
memset(mpeii, 0, sizeof(*mpeii));
|
||||
mpeii->entry_type = MP_ENTRY_IOINT;
|
||||
mpeii->src_bus_id = MPE_BUSID_ISA;
|
||||
mpeii->dst_apic_id = MPE_IOAPIC_ID;
|
||||
|
||||
/*
|
||||
* All default configs route IRQs from bus 0 to the first 16 pins
|
||||
* of the first I/O APIC with an APIC ID of 2.
|
||||
*/
|
||||
mpeii->dst_apic_intin = pin;
|
||||
switch (pin) {
|
||||
case 0:
|
||||
/* Pin 0 is an ExtINT pin. */
|
||||
mpeii->intr_type = MPEII_INTR_EXTINT;
|
||||
break;
|
||||
case 2:
|
||||
/* IRQ 0 is routed to pin 2. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = 0;
|
||||
break;
|
||||
case 5:
|
||||
case 10:
|
||||
case 11:
|
||||
/*
|
||||
* PCI Irqs set to level triggered.
|
||||
*/
|
||||
mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL;
|
||||
mpeii->src_bus_id = MPE_BUSID_PCI;
|
||||
default:
|
||||
/* All other pins are identity mapped. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = pin;
|
||||
break;
|
||||
}
|
||||
mpeii++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#define COPYSTR(dest, src, bytes) \
|
||||
memcpy(dest, src, bytes); \
|
||||
str[bytes] = 0;
|
||||
|
||||
|
||||
static void
|
||||
mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch)
|
||||
{
|
||||
static char str[16];
|
||||
int i;
|
||||
char *cur;
|
||||
|
||||
union mpe {
|
||||
struct mpe_proc *proc;
|
||||
struct mpe_bus *bus;
|
||||
struct mpe_ioapic *ioapic;
|
||||
struct mpe_ioint *ioint;
|
||||
struct mpe_lint *lnit;
|
||||
char *p;
|
||||
};
|
||||
|
||||
union mpe mpe;
|
||||
|
||||
printf(" MP Floating Pointer :\n");
|
||||
COPYSTR(str, mpfp->signature, 4);
|
||||
printf(" signature: %s\n", str);
|
||||
printf(" mpch paddr: %x\n", mpfp->mptable_paddr);
|
||||
printf(" length: %x\n", mpfp->length);
|
||||
printf(" specrec: %x\n", mpfp->specrev);
|
||||
printf(" checksum: %x\n", mpfp->checksum);
|
||||
printf(" feature1: %x\n", mpfp->feature1);
|
||||
printf(" feature2: %x\n", mpfp->feature2);
|
||||
printf(" feature3: %x\n", mpfp->feature3);
|
||||
printf(" feature4: %x\n", mpfp->feature4);
|
||||
|
||||
printf(" MP Configuration Header :\n");
|
||||
COPYSTR(str, mpch->signature, 4);
|
||||
printf(" signature: %s\n", str);
|
||||
printf(" length: %x\n", mpch->length);
|
||||
printf(" specrec: %x\n", mpch->specrev);
|
||||
printf(" checksum: %x\n", mpch->checksum);
|
||||
COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN);
|
||||
printf(" oemid: %s\n", str);
|
||||
COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN);
|
||||
printf(" prodid: %s\n", str);
|
||||
printf(" oem_ptr: %x\n", mpch->oem_ptr);
|
||||
printf(" oem_sz: %x\n", mpch->oem_sz);
|
||||
printf(" nr_entries: %x\n", mpch->nr_entries);
|
||||
printf(" apic paddr: %x\n", mpch->lapic_paddr);
|
||||
printf(" ext_length: %x\n", mpch->ext_length);
|
||||
printf(" ext_checksum: %x\n", mpch->ext_checksum);
|
||||
|
||||
cur = (char *)mpch + sizeof(*mpch);
|
||||
for (i = 0; i < mpch->nr_entries; i++) {
|
||||
mpe.p = cur;
|
||||
switch(*mpe.p) {
|
||||
case MP_ENTRY_PROC:
|
||||
printf(" MP Processor Entry :\n");
|
||||
printf(" lapic_id: %x\n", mpe.proc->lapic_id);
|
||||
printf(" lapic_version: %x\n", mpe.proc->lapic_version);
|
||||
printf(" proc_flags: %x\n", mpe.proc->proc_flags);
|
||||
printf(" proc_signature: %x\n", mpe.proc->proc_signature);
|
||||
printf(" feature_flags: %x\n", mpe.proc->feature_flags);
|
||||
cur += sizeof(struct mpe_proc);
|
||||
break;
|
||||
case MP_ENTRY_BUS:
|
||||
printf(" MP Bus Entry :\n");
|
||||
printf(" busid: %x\n", mpe.bus->busid);
|
||||
COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN);
|
||||
printf(" busname: %s\n", str);
|
||||
cur += sizeof(struct mpe_bus);
|
||||
break;
|
||||
case MP_ENTRY_IOAPIC:
|
||||
printf(" MP IOAPIC Entry :\n");
|
||||
printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id);
|
||||
printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version);
|
||||
printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags);
|
||||
printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr);
|
||||
cur += sizeof(struct mpe_ioapic);
|
||||
break;
|
||||
case MP_ENTRY_IOINT:
|
||||
printf(" MP IO Interrupt Entry :\n");
|
||||
printf(" intr_type: %x\n", mpe.ioint->intr_type);
|
||||
printf(" intr_flags: %x\n", mpe.ioint->intr_flags);
|
||||
printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id);
|
||||
printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq);
|
||||
printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id);
|
||||
printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin);
|
||||
cur += sizeof(struct mpe_ioint);
|
||||
break;
|
||||
case MP_ENTRY_LINT:
|
||||
printf(" MP Local Interrupt Entry :\n");
|
||||
cur += sizeof(struct mpe_lint);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu,
|
||||
void *oemp, int oemsz)
|
||||
{
|
||||
struct mp_config_hdr *mpch;
|
||||
char *mapaddr;
|
||||
char *startaddr;
|
||||
int error;
|
||||
|
||||
mapaddr = vm_map_memory(ctx, gpa, len);
|
||||
if (mapaddr == MAP_FAILED) {
|
||||
printf("%s\n", strerror(errno));
|
||||
goto err;
|
||||
}
|
||||
startaddr = mapaddr;
|
||||
|
||||
mp_build_mpfp((struct mp_floating_pointer*) mapaddr, gpa);
|
||||
mapaddr += sizeof(struct mp_floating_pointer);
|
||||
|
||||
mpch = (struct mp_config_hdr*)mapaddr;
|
||||
mp_build_mpch(mpch);
|
||||
mapaddr += sizeof(struct mp_config_hdr);
|
||||
|
||||
mp_build_proc_entries((struct mpe_proc*) mapaddr, ncpu);
|
||||
mapaddr += (sizeof(struct mpe_proc)*ncpu);
|
||||
mpch->nr_entries += ncpu;
|
||||
|
||||
mp_build_bus_entries((struct mpe_bus*)mapaddr);
|
||||
mapaddr += (sizeof(struct mpe_bus)*MPE_NUM_BUSES);
|
||||
mpch->nr_entries += MPE_NUM_BUSES;
|
||||
#if 0
|
||||
mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr);
|
||||
mapaddr += sizeof(struct mpe_ioapic);
|
||||
mpch->nr_entries++;
|
||||
|
||||
mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ);
|
||||
mapaddr += sizeof(struct mpe_ioint)*MPEII_MAX_IRQ;
|
||||
mpch->nr_entries += MPEII_MAX_IRQ;
|
||||
|
||||
#endif
|
||||
if (oemp) {
|
||||
mpch->oem_ptr = mapaddr - startaddr + gpa;
|
||||
mpch->oem_sz = oemsz;
|
||||
memcpy(mapaddr, oemp, oemsz);
|
||||
}
|
||||
mpch->length = (mapaddr) - ((char*) mpch);
|
||||
mpch->checksum = mp_compute_checksum(mpch, sizeof(*mpch));
|
||||
|
||||
|
||||
// mptable_dump((struct mp_floating_pointer*)startaddr, mpch);
|
||||
err:
|
||||
return (error);
|
||||
}
|
171
lib/libvmmapi/mptable.h
Normal file
171
lib/libvmmapi/mptable.h
Normal file
@ -0,0 +1,171 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MPTABLE_h_
|
||||
#define _MPTABLE_h_
|
||||
|
||||
#define MP_SPECREV (4) // MP spec revision 1.1
|
||||
|
||||
/*
|
||||
* MP Floating Pointer Structure
|
||||
*/
|
||||
#define MPFP_SIGNATURE "_MP_"
|
||||
#define MPFP_SIGNATURE_LEN (4)
|
||||
#define MPFP_FEATURE2 (0x80) // IMCR is present
|
||||
struct mp_floating_pointer {
|
||||
uint8_t signature[MPFP_SIGNATURE_LEN];
|
||||
uint32_t mptable_paddr;
|
||||
uint8_t length;
|
||||
uint8_t specrev;
|
||||
uint8_t checksum;
|
||||
uint8_t feature1;
|
||||
uint8_t feature2;
|
||||
uint8_t feature3;
|
||||
uint8_t feature4;
|
||||
uint8_t feature5;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* MP Configuration Table Header
|
||||
*/
|
||||
#define MPCH_SIGNATURE "PCMP"
|
||||
#define MPCH_SIGNATURE_LEN (4)
|
||||
|
||||
#define MPCH_OEMID "NETAPP "
|
||||
#define MPCH_OEMID_LEN (8)
|
||||
#define MPCH_PRODID "vFiler "
|
||||
#define MPCH_PRODID_LEN (12)
|
||||
|
||||
struct mp_config_hdr {
|
||||
uint8_t signature[MPCH_SIGNATURE_LEN];
|
||||
uint16_t length;
|
||||
uint8_t specrev;
|
||||
uint8_t checksum;
|
||||
uint8_t oemid[MPCH_OEMID_LEN];
|
||||
uint8_t prodid[MPCH_PRODID_LEN];
|
||||
uint32_t oem_ptr;
|
||||
uint16_t oem_sz;
|
||||
uint16_t nr_entries;
|
||||
uint32_t lapic_paddr;
|
||||
uint16_t ext_length;
|
||||
uint8_t ext_checksum;
|
||||
uint8_t reserved;
|
||||
};
|
||||
|
||||
#define MP_ENTRY_PROC (0)
|
||||
#define MP_ENTRY_BUS (1)
|
||||
#define MP_ENTRY_IOAPIC (2)
|
||||
#define MP_ENTRY_IOINT (3)
|
||||
#define MP_ENTRY_LINT (4)
|
||||
|
||||
/*
|
||||
* MP Processor Entry
|
||||
*/
|
||||
|
||||
#define MPEP_FLAGS_EN (0x1)
|
||||
#define MPEP_FLAGS_BSP (0x2)
|
||||
|
||||
#define MPEP_SIG_FAMILY (6)
|
||||
#define MPEP_SIG_MODEL (26)
|
||||
#define MPEP_SIG_STEPPING (5)
|
||||
#define MPEP_SIGNATURE ((MPEP_SIG_FAMILY << 8) | (MPEP_SIG_MODEL << 4) \
|
||||
| (MPEP_SIG_STEPPING))
|
||||
|
||||
#define MPEP_FEATURES (0xBFEBFBFF) // Value from Intel i7 CPUID
|
||||
|
||||
struct mpe_proc {
|
||||
uint8_t entry_type;
|
||||
uint8_t lapic_id;
|
||||
uint8_t lapic_version;
|
||||
uint8_t proc_flags;
|
||||
uint32_t proc_signature;
|
||||
uint32_t feature_flags;
|
||||
uint8_t reserved[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* MP Bus Entry
|
||||
*/
|
||||
|
||||
#define MPE_NUM_BUSES (2)
|
||||
#define MPE_BUSNAME_LEN (6)
|
||||
#define MPE_BUSID_ISA (0)
|
||||
#define MPE_BUSID_PCI (1)
|
||||
#define MPE_BUSNAME_ISA "ISA "
|
||||
#define MPE_BUSNAME_PCI "PCI "
|
||||
struct mpe_bus {
|
||||
uint8_t entry_type;
|
||||
uint8_t busid;
|
||||
uint8_t busname[MPE_BUSNAME_LEN];
|
||||
};
|
||||
|
||||
/*
|
||||
* MP IO APIC Entry
|
||||
*/
|
||||
#define MPE_IOAPIC_ID (2)
|
||||
#define MPE_IOAPIC_FLAG_EN (1)
|
||||
struct mpe_ioapic {
|
||||
uint8_t entry_type;
|
||||
uint8_t ioapic_id;
|
||||
uint8_t ioapic_version;
|
||||
uint8_t ioapic_flags;
|
||||
uint32_t ioapic_paddr;
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* MP IO Interrupt Assignment Entry
|
||||
*/
|
||||
#define MPEII_INTR_INT (0)
|
||||
#define MPEII_INTR_NMI (1)
|
||||
#define MPEII_INTR_SMI (2)
|
||||
#define MPEII_INTR_EXTINT (3)
|
||||
#define MPEII_PCI_IRQ_MASK (0x0c20U) /* IRQ 5,10,11 are PCI connected */
|
||||
#define MPEII_MAX_IRQ (16)
|
||||
#define MPEII_FLAGS_TRIGMODE_LEVEL (0x3)
|
||||
struct mpe_ioint {
|
||||
uint8_t entry_type;
|
||||
uint8_t intr_type;
|
||||
uint16_t intr_flags;
|
||||
uint8_t src_bus_id;
|
||||
uint8_t src_bus_irq;
|
||||
uint8_t dst_apic_id;
|
||||
uint8_t dst_apic_intin;
|
||||
};
|
||||
|
||||
/*
|
||||
* MP Local Interrupt Assignment Entry
|
||||
*/
|
||||
struct mpe_lint {
|
||||
uint8_t entry_type;
|
||||
};
|
||||
|
||||
int vm_build_mptable(struct vmctx *ctxt, vm_paddr_t gpa, int len,
|
||||
int ncpu, void *oemp, int oemsz);
|
||||
#endif /* _MPTABLE_h_ */
|
647
lib/libvmmapi/vmmapi.c
Normal file
647
lib/libvmmapi/vmmapi.c
Normal file
@ -0,0 +1,647 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
#include "mptable.h"
|
||||
|
||||
#ifndef CR4_VMXE
|
||||
#define CR4_VMXE (1UL << 13)
|
||||
#endif
|
||||
|
||||
#define BIOS_ROM_BASE (0xf0000)
|
||||
#define BIOS_ROM_SIZE (0x10000)
|
||||
|
||||
struct vmctx {
|
||||
int fd;
|
||||
char *name;
|
||||
};
|
||||
|
||||
#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
|
||||
#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
|
||||
|
||||
static int
|
||||
vm_device_open(const char *name)
|
||||
{
|
||||
int fd, len;
|
||||
char *vmfile;
|
||||
|
||||
len = strlen("/dev/vmm/") + strlen(name) + 1;
|
||||
vmfile = malloc(len);
|
||||
assert(vmfile != NULL);
|
||||
snprintf(vmfile, len, "/dev/vmm/%s", name);
|
||||
|
||||
/* Open the device file */
|
||||
fd = open(vmfile, O_RDWR, 0);
|
||||
|
||||
free(vmfile);
|
||||
return (fd);
|
||||
}
|
||||
|
||||
int
|
||||
vm_create(const char *name)
|
||||
{
|
||||
|
||||
return (CREATE((char *)name));
|
||||
}
|
||||
|
||||
struct vmctx *
|
||||
vm_open(const char *name)
|
||||
{
|
||||
struct vmctx *vm;
|
||||
|
||||
vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
|
||||
assert(vm != NULL);
|
||||
|
||||
vm->fd = -1;
|
||||
vm->name = (char *)(vm + 1);
|
||||
strcpy(vm->name, name);
|
||||
|
||||
if ((vm->fd = vm_device_open(vm->name)) < 0)
|
||||
goto err;
|
||||
|
||||
return (vm);
|
||||
err:
|
||||
vm_destroy(vm);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
vm_destroy(struct vmctx *vm)
|
||||
{
|
||||
assert(vm != NULL);
|
||||
|
||||
DESTROY(vm->name);
|
||||
if (vm->fd >= 0)
|
||||
close(vm->fd);
|
||||
free(vm);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa,
|
||||
vm_paddr_t *ret_hpa, size_t *ret_len)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
|
||||
*ret_hpa = seg.hpa;
|
||||
*ret_len = seg.len;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
/*
|
||||
* Create and optionally map 'len' bytes of memory at guest
|
||||
* physical address 'gpa'
|
||||
*/
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
seg.len = len;
|
||||
error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
|
||||
if (error == 0 && mapaddr != NULL) {
|
||||
*mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
char *
|
||||
vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
|
||||
/* Map 'len' bytes of memory at guest physical address 'gpa' */
|
||||
return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
vmsegdesc.desc.base = base;
|
||||
vmsegdesc.desc.limit = limit;
|
||||
vmsegdesc.desc.access = access;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
if (error == 0) {
|
||||
*base = vmsegdesc.desc.base;
|
||||
*limit = vmsegdesc.desc.limit;
|
||||
*access = vmsegdesc.desc.access;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
vmreg.regval = val;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
|
||||
*ret_val = vmreg.regval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
|
||||
*host_cpuid = vmpin.host_cpuid;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
vmpin.host_cpuid = host_cpuid;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
|
||||
{
|
||||
int error;
|
||||
struct vm_run vmrun;
|
||||
|
||||
bzero(&vmrun, sizeof(vmrun));
|
||||
vmrun.cpuid = vcpu;
|
||||
vmrun.rip = rip;
|
||||
|
||||
error = ioctl(ctx->fd, VM_RUN, &vmrun);
|
||||
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code, int error_code_valid)
|
||||
{
|
||||
struct vm_event ev;
|
||||
|
||||
bzero(&ev, sizeof(ev));
|
||||
ev.cpuid = vcpu;
|
||||
ev.type = type;
|
||||
ev.vector = vector;
|
||||
ev.error_code = error_code;
|
||||
ev.error_code_valid = error_code_valid;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1));
|
||||
}
|
||||
|
||||
int
|
||||
vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz)
|
||||
{
|
||||
|
||||
return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu,
|
||||
oemtbl, oemtblsz));
|
||||
}
|
||||
|
||||
int
|
||||
vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
|
||||
{
|
||||
struct vm_lapic_irq vmirq;
|
||||
|
||||
bzero(&vmirq, sizeof(vmirq));
|
||||
vmirq.cpuid = vcpu;
|
||||
vmirq.vector = vector;
|
||||
|
||||
return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_nmi(struct vmctx *ctx, int vcpu)
|
||||
{
|
||||
struct vm_nmi vmnmi;
|
||||
|
||||
bzero(&vmnmi, sizeof(vmnmi));
|
||||
vmnmi.cpuid = vcpu;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
|
||||
}
|
||||
|
||||
int
|
||||
vm_capability_name2type(const char *capname)
|
||||
{
|
||||
int i;
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int type;
|
||||
} capstrmap[] = {
|
||||
{ "hlt_exit", VM_CAP_HALT_EXIT },
|
||||
{ "mtrap_exit", VM_CAP_MTRAP_EXIT },
|
||||
{ "pause_exit", VM_CAP_PAUSE_EXIT },
|
||||
{ "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
|
||||
if (strcmp(capstrmap[i].name, capname) == 0)
|
||||
return (capstrmap[i].type);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval)
|
||||
{
|
||||
int error;
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
|
||||
*retval = vmcap.capval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
|
||||
{
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
vmcap.capval = val;
|
||||
|
||||
return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
|
||||
}
|
||||
|
||||
int
|
||||
vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
struct vm_pptdev_mmio pptmmio;
|
||||
|
||||
bzero(&pptmmio, sizeof(pptmmio));
|
||||
pptmmio.bus = bus;
|
||||
pptmmio.slot = slot;
|
||||
pptmmio.func = func;
|
||||
pptmmio.gpa = gpa;
|
||||
pptmmio.len = len;
|
||||
pptmmio.hpa = hpa;
|
||||
|
||||
return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
{
|
||||
struct vm_pptdev_msi pptmsi;
|
||||
|
||||
bzero(&pptmsi, sizeof(pptmsi));
|
||||
pptmsi.vcpu = vcpu;
|
||||
pptmsi.bus = bus;
|
||||
pptmsi.slot = slot;
|
||||
pptmsi.func = func;
|
||||
pptmsi.destcpu = destcpu;
|
||||
pptmsi.vector = vector;
|
||||
pptmsi.numvec = numvec;
|
||||
|
||||
return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
|
||||
}
|
||||
|
||||
uint64_t *
|
||||
vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries)
|
||||
{
|
||||
int error;
|
||||
|
||||
static struct vm_stats vmstats;
|
||||
|
||||
vmstats.cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_STATS, &vmstats);
|
||||
if (error == 0) {
|
||||
if (ret_entries)
|
||||
*ret_entries = vmstats.num_entries;
|
||||
if (ret_tv)
|
||||
*ret_tv = vmstats.tv;
|
||||
return (vmstats.statbuf);
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
const char *
|
||||
vm_get_stat_desc(struct vmctx *ctx, int index)
|
||||
{
|
||||
int error;
|
||||
|
||||
static struct vm_stat_desc statdesc;
|
||||
|
||||
statdesc.index = index;
|
||||
if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
|
||||
return (statdesc.desc);
|
||||
else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Intel Vol 3a:
|
||||
* Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
|
||||
*/
|
||||
int
|
||||
vcpu_reset(struct vmctx *vmctx, int vcpu)
|
||||
{
|
||||
int error;
|
||||
uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t sel;
|
||||
|
||||
zero = 0;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
rip = 0xfff0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
cr0 = CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = CR4_VMXE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* CS: present, r/w, accessed, 16-bit, byte granularity, usable
|
||||
*/
|
||||
desc_base = 0xffff0000;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0xf000;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
|
||||
*/
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* General purpose registers */
|
||||
rdx = 0xf00;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
/* GDTR, IDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
/* TR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x00000082;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
|
||||
desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX cr2, debug registers */
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
98
lib/libvmmapi/vmmapi.h
Normal file
98
lib/libvmmapi/vmmapi.h
Normal file
@ -0,0 +1,98 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMMAPI_H_
|
||||
#define _VMMAPI_H_
|
||||
|
||||
struct vmctx;
|
||||
|
||||
int vm_create(const char *name);
|
||||
struct vmctx *vm_open(const char *name);
|
||||
void vm_destroy(struct vmctx *ctx);
|
||||
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa,
|
||||
vm_paddr_t *ret_hpa, size_t *ret_len);
|
||||
/*
|
||||
* Create a memory segment of 'len' bytes in the guest physical address space
|
||||
* at offset 'gpa'.
|
||||
*
|
||||
* If 'mapaddr' is not NULL then this region is mmap'ed into the address
|
||||
* space of the calling process. If there is an mmap error then *mapaddr
|
||||
* will be set to MAP_FAILED.
|
||||
*/
|
||||
|
||||
int vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
|
||||
char **mapaddr);
|
||||
char * vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
|
||||
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access);
|
||||
int vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access);
|
||||
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
|
||||
int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
|
||||
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
|
||||
struct vm_exit *ret_vmexit);
|
||||
int vm_build_tables(struct vmctx *ctxt, int ncpus, void *oemtbl,
|
||||
int oemtblsz);
|
||||
int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector);
|
||||
int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code);
|
||||
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
|
||||
int vm_inject_nmi(struct vmctx *ctx, int vcpu);
|
||||
int vm_capability_name2type(const char *capname);
|
||||
int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval);
|
||||
int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int val);
|
||||
int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int dest, int vector, int numvec);
|
||||
|
||||
/*
|
||||
* Return a pointer to the statistics buffer. Note that this is not MT-safe.
|
||||
*/
|
||||
uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries);
|
||||
const char *vm_get_stat_desc(struct vmctx *ctx, int index);
|
||||
|
||||
/* Reset vcpu register state */
|
||||
int vcpu_reset(struct vmctx *ctx, int vcpu);
|
||||
|
||||
/*
|
||||
* FreeBSD specific APIs
|
||||
*/
|
||||
int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp);
|
||||
void vm_setup_freebsd_gdt(uint64_t *gdtr);
|
||||
#endif /* _VMMAPI_H_ */
|
187
lib/libvmmapi/vmmapi_freebsd.c
Normal file
187
lib/libvmmapi/vmmapi_freebsd.c
Normal file
@ -0,0 +1,187 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
|
||||
#ifndef CR4_VMXE
|
||||
#define CR4_VMXE (1UL << 13)
|
||||
#endif
|
||||
|
||||
#define DESC_UNUSABLE 0x00010000
|
||||
|
||||
#define GUEST_NULL_SEL 0
|
||||
#define GUEST_CODE_SEL 1
|
||||
#define GUEST_DATA_SEL 2
|
||||
#define GUEST_GDTR_LIMIT (3 * 8 - 1)
|
||||
|
||||
void
|
||||
vm_setup_freebsd_gdt(uint64_t *gdtr)
|
||||
{
|
||||
gdtr[GUEST_NULL_SEL] = 0;
|
||||
gdtr[GUEST_CODE_SEL] = 0x0020980000000000;
|
||||
gdtr[GUEST_DATA_SEL] = 0x0000900000000000;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the 'vcpu' register set such that it will begin execution at
|
||||
* 'rip' in long mode.
|
||||
*/
|
||||
int
|
||||
vm_setup_freebsd_registers(struct vmctx *vmctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp)
|
||||
{
|
||||
int error;
|
||||
uint64_t cr0, cr4, efer, rflags, desc_base;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t gsel;
|
||||
|
||||
cr0 = CR0_PE | CR0_PG | CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = CR4_PAE | CR4_VMXE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
efer = EFER_LME | EFER_LMA;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, efer)))
|
||||
goto done;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_base = 0;
|
||||
desc_limit = 0;
|
||||
desc_access = 0x0000209B;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_access = 0x00000093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* XXX TR is pointing to null selector even though we set the
|
||||
* TSS segment to be usable with a base address and limit of 0.
|
||||
*/
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0,
|
||||
DESC_UNUSABLE);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_CODE_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_DATA_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX TR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* entry point */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
/* page table base */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, cr3)) != 0)
|
||||
goto done;
|
||||
|
||||
desc_base = gdtbase;
|
||||
desc_limit = GUEST_GDTR_LIMIT;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, 0);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
@ -155,6 +155,7 @@ LIBUSB?= ${DESTDIR}${LIBDIR}/libusb.a
|
||||
LIBUTIL?= ${DESTDIR}${LIBDIR}/libutil.a
|
||||
LIBUUTIL?= ${DESTDIR}${LIBDIR}/libuutil.a
|
||||
LIBVGL?= ${DESTDIR}${LIBDIR}/libvgl.a
|
||||
LIBVMMAPI?= ${DESTDIR}${LIBDIR}/libvmmapi.a
|
||||
LIBWRAP?= ${DESTDIR}${LIBDIR}/libwrap.a
|
||||
LIBXPG4?= ${DESTDIR}${LIBDIR}/libxpg4.a
|
||||
LIBY?= ${DESTDIR}${LIBDIR}/liby.a
|
||||
|
@ -297,6 +297,7 @@
|
||||
*/
|
||||
#define APICBASE_RESERVED 0x000006ff
|
||||
#define APICBASE_BSP 0x00000100
|
||||
#define APICBASE_X2APIC 0x00000400
|
||||
#define APICBASE_ENABLED 0x00000800
|
||||
#define APICBASE_ADDRESS 0xfffff000
|
||||
|
||||
|
268
sys/amd64/include/vmm.h
Normal file
268
sys/amd64/include/vmm.h
Normal file
@ -0,0 +1,268 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD: vmm.h 482 2011-05-09 21:22:43Z grehan $
|
||||
*/
|
||||
|
||||
#ifndef _VMM_H_
|
||||
#define _VMM_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define VM_MAX_NAMELEN 32
|
||||
|
||||
struct vm;
|
||||
struct vm_memory_segment;
|
||||
struct seg_desc;
|
||||
struct vm_exit;
|
||||
struct vm_run;
|
||||
struct vlapic;
|
||||
|
||||
typedef int (*vmm_init_func_t)(void);
|
||||
typedef int (*vmm_cleanup_func_t)(void);
|
||||
typedef void * (*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
|
||||
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
|
||||
struct vm_exit *vmexit);
|
||||
typedef void (*vmi_cleanup_func_t)(void *vmi);
|
||||
typedef int (*vmi_mmap_func_t)(void *vmi, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
size_t length, vm_memattr_t attr,
|
||||
int prot, boolean_t superpages_ok);
|
||||
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
|
||||
uint64_t *retval);
|
||||
typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
|
||||
uint64_t val);
|
||||
typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
|
||||
struct seg_desc *desc);
|
||||
typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
|
||||
struct seg_desc *desc);
|
||||
typedef int (*vmi_inject_event_t)(void *vmi, int vcpu,
|
||||
int type, int vector,
|
||||
uint32_t code, int code_valid);
|
||||
typedef int (*vmi_inject_nmi_t)(void *vmi, int vcpu);
|
||||
typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
|
||||
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
|
||||
|
||||
struct vmm_ops {
|
||||
vmm_init_func_t init; /* module wide initialization */
|
||||
vmm_cleanup_func_t cleanup;
|
||||
|
||||
vmi_init_func_t vminit; /* vm-specific initialization */
|
||||
vmi_run_func_t vmrun;
|
||||
vmi_cleanup_func_t vmcleanup;
|
||||
vmi_mmap_func_t vmmmap;
|
||||
vmi_get_register_t vmgetreg;
|
||||
vmi_set_register_t vmsetreg;
|
||||
vmi_get_desc_t vmgetdesc;
|
||||
vmi_set_desc_t vmsetdesc;
|
||||
vmi_inject_event_t vminject;
|
||||
vmi_inject_nmi_t vmnmi;
|
||||
vmi_get_cap_t vmgetcap;
|
||||
vmi_set_cap_t vmsetcap;
|
||||
};
|
||||
|
||||
extern struct vmm_ops vmm_ops_intel;
|
||||
extern struct vmm_ops vmm_ops_amd;
|
||||
|
||||
struct vm *vm_create(const char *name);
|
||||
void vm_destroy(struct vm *vm);
|
||||
const char *vm_name(struct vm *vm);
|
||||
int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa);
|
||||
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
|
||||
vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
|
||||
int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
||||
struct vm_memory_segment *seg);
|
||||
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *ret_desc);
|
||||
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *desc);
|
||||
int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
|
||||
int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
|
||||
int vm_run(struct vm *vm, struct vm_run *vmrun);
|
||||
int vm_inject_event(struct vm *vm, int vcpu, int type,
|
||||
int vector, uint32_t error_code, int error_code_valid);
|
||||
int vm_inject_nmi(struct vm *vm, int vcpu);
|
||||
uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
|
||||
struct vlapic *vm_lapic(struct vm *vm, int cpu);
|
||||
int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
|
||||
int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
|
||||
void vm_activate_cpu(struct vm *vm, int vcpu);
|
||||
cpumask_t vm_active_cpus(struct vm *vm);
|
||||
|
||||
/*
|
||||
* Return 1 if device indicated by bus/slot/func is supposed to be a
|
||||
* pci passthrough device.
|
||||
*
|
||||
* Return 0 otherwise.
|
||||
*/
|
||||
int vmm_is_pptdev(int bus, int slot, int func);
|
||||
|
||||
void *vm_iommu_domain(struct vm *vm);
|
||||
|
||||
#define VCPU_STOPPED 0
|
||||
#define VCPU_RUNNING 1
|
||||
void vm_set_run_state(struct vm *vm, int vcpu, int running);
|
||||
int vm_get_run_state(struct vm *vm, int vcpu, int *hostcpu);
|
||||
|
||||
void *vcpu_stats(struct vm *vm, int vcpu);
|
||||
|
||||
static int __inline
|
||||
vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
|
||||
{
|
||||
return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
|
||||
}
|
||||
|
||||
static cpumask_t __inline
|
||||
vcpu_mask(int vcpuid)
|
||||
{
|
||||
return ((cpumask_t)1 << vcpuid);
|
||||
}
|
||||
|
||||
#endif /* KERNEL */
|
||||
|
||||
#define VM_MAXCPU 8 /* maximum virtual cpus */
|
||||
|
||||
/*
|
||||
* Identifiers for events that can be injected into the VM
|
||||
*/
|
||||
enum vm_event_type {
|
||||
VM_EVENT_NONE,
|
||||
VM_HW_INTR,
|
||||
VM_NMI,
|
||||
VM_HW_EXCEPTION,
|
||||
VM_SW_INTR,
|
||||
VM_PRIV_SW_EXCEPTION,
|
||||
VM_SW_EXCEPTION,
|
||||
VM_EVENT_MAX
|
||||
};
|
||||
|
||||
/*
|
||||
* Identifiers for architecturally defined registers.
|
||||
*/
|
||||
enum vm_reg_name {
|
||||
VM_REG_GUEST_RAX,
|
||||
VM_REG_GUEST_RBX,
|
||||
VM_REG_GUEST_RCX,
|
||||
VM_REG_GUEST_RDX,
|
||||
VM_REG_GUEST_RSI,
|
||||
VM_REG_GUEST_RDI,
|
||||
VM_REG_GUEST_RBP,
|
||||
VM_REG_GUEST_R8,
|
||||
VM_REG_GUEST_R9,
|
||||
VM_REG_GUEST_R10,
|
||||
VM_REG_GUEST_R11,
|
||||
VM_REG_GUEST_R12,
|
||||
VM_REG_GUEST_R13,
|
||||
VM_REG_GUEST_R14,
|
||||
VM_REG_GUEST_R15,
|
||||
VM_REG_GUEST_CR0,
|
||||
VM_REG_GUEST_CR3,
|
||||
VM_REG_GUEST_CR4,
|
||||
VM_REG_GUEST_DR7,
|
||||
VM_REG_GUEST_RSP,
|
||||
VM_REG_GUEST_RIP,
|
||||
VM_REG_GUEST_RFLAGS,
|
||||
VM_REG_GUEST_ES,
|
||||
VM_REG_GUEST_CS,
|
||||
VM_REG_GUEST_SS,
|
||||
VM_REG_GUEST_DS,
|
||||
VM_REG_GUEST_FS,
|
||||
VM_REG_GUEST_GS,
|
||||
VM_REG_GUEST_LDTR,
|
||||
VM_REG_GUEST_TR,
|
||||
VM_REG_GUEST_IDTR,
|
||||
VM_REG_GUEST_GDTR,
|
||||
VM_REG_GUEST_EFER,
|
||||
VM_REG_LAST
|
||||
};
|
||||
|
||||
/*
|
||||
* Identifiers for optional vmm capabilities
|
||||
*/
|
||||
enum vm_cap_type {
|
||||
VM_CAP_HALT_EXIT,
|
||||
VM_CAP_MTRAP_EXIT,
|
||||
VM_CAP_PAUSE_EXIT,
|
||||
VM_CAP_UNRESTRICTED_GUEST,
|
||||
VM_CAP_MAX
|
||||
};
|
||||
|
||||
/*
|
||||
* The 'access' field has the format specified in Table 21-2 of the Intel
|
||||
* Architecture Manual vol 3b.
|
||||
*
|
||||
* XXX The contents of the 'access' field are architecturally defined except
|
||||
* bit 16 - Segment Unusable.
|
||||
*/
|
||||
struct seg_desc {
|
||||
uint64_t base;
|
||||
uint32_t limit;
|
||||
uint32_t access;
|
||||
};
|
||||
|
||||
enum vm_exitcode {
|
||||
VM_EXITCODE_INOUT,
|
||||
VM_EXITCODE_VMX,
|
||||
VM_EXITCODE_BOGUS,
|
||||
VM_EXITCODE_RDMSR,
|
||||
VM_EXITCODE_WRMSR,
|
||||
VM_EXITCODE_HLT,
|
||||
VM_EXITCODE_MTRAP,
|
||||
VM_EXITCODE_PAUSE,
|
||||
VM_EXITCODE_MAX,
|
||||
};
|
||||
|
||||
struct vm_exit {
|
||||
enum vm_exitcode exitcode;
|
||||
int inst_length; /* 0 means unknown */
|
||||
uint64_t rip;
|
||||
union {
|
||||
struct {
|
||||
uint16_t bytes:3; /* 1 or 2 or 4 */
|
||||
uint16_t in:1; /* out is 0, in is 1 */
|
||||
uint16_t string:1;
|
||||
uint16_t rep:1;
|
||||
uint16_t port;
|
||||
uint32_t eax; /* valid for out */
|
||||
} inout;
|
||||
/*
|
||||
* VMX specific payload. Used when there is no "better"
|
||||
* exitcode to represent the VM-exit.
|
||||
*/
|
||||
struct {
|
||||
int error; /* vmx inst error */
|
||||
uint32_t exit_reason;
|
||||
uint64_t exit_qualification;
|
||||
} vmx;
|
||||
struct {
|
||||
uint32_t code; /* ecx value */
|
||||
uint64_t wval;
|
||||
} msr;
|
||||
} u;
|
||||
};
|
||||
|
||||
#endif /* _VMM_H_ */
|
191
sys/amd64/include/vmm_dev.h
Normal file
191
sys/amd64/include/vmm_dev.h
Normal file
@ -0,0 +1,191 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD: vmm_dev.h 482 2011-05-09 21:22:43Z grehan $
|
||||
*/
|
||||
|
||||
#ifndef _VMM_DEV_H_
|
||||
#define _VMM_DEV_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
void vmmdev_init(void);
|
||||
void vmmdev_cleanup(void);
|
||||
#endif
|
||||
|
||||
struct vm_memory_segment {
|
||||
vm_paddr_t hpa; /* out */
|
||||
vm_paddr_t gpa; /* in */
|
||||
size_t len; /* in */
|
||||
};
|
||||
|
||||
struct vm_register {
|
||||
int cpuid;
|
||||
int regnum; /* enum vm_reg_name */
|
||||
uint64_t regval;
|
||||
};
|
||||
|
||||
struct vm_seg_desc { /* data or code segment */
|
||||
int cpuid;
|
||||
int regnum; /* enum vm_reg_name */
|
||||
struct seg_desc desc;
|
||||
};
|
||||
|
||||
struct vm_pin {
|
||||
int vm_cpuid;
|
||||
int host_cpuid; /* -1 to unpin */
|
||||
};
|
||||
|
||||
struct vm_run {
|
||||
int cpuid;
|
||||
uint64_t rip; /* start running here */
|
||||
struct vm_exit vm_exit;
|
||||
};
|
||||
|
||||
struct vm_event {
|
||||
int cpuid;
|
||||
enum vm_event_type type;
|
||||
int vector;
|
||||
uint32_t error_code;
|
||||
int error_code_valid;
|
||||
};
|
||||
|
||||
struct vm_lapic_irq {
|
||||
int cpuid;
|
||||
int vector;
|
||||
};
|
||||
|
||||
struct vm_capability {
|
||||
int cpuid;
|
||||
enum vm_cap_type captype;
|
||||
int capval;
|
||||
int allcpus;
|
||||
};
|
||||
|
||||
struct vm_pptdev {
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
};
|
||||
|
||||
struct vm_pptdev_mmio {
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
vm_paddr_t gpa;
|
||||
vm_paddr_t hpa;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
struct vm_pptdev_msi {
|
||||
int vcpu;
|
||||
int bus;
|
||||
int slot;
|
||||
int func;
|
||||
int numvec; /* 0 means disabled */
|
||||
int vector;
|
||||
int destcpu;
|
||||
};
|
||||
|
||||
struct vm_nmi {
|
||||
int cpuid;
|
||||
};
|
||||
|
||||
#define MAX_VM_STATS 64
|
||||
struct vm_stats {
|
||||
int cpuid; /* in */
|
||||
int num_entries; /* out */
|
||||
struct timeval tv;
|
||||
uint64_t statbuf[MAX_VM_STATS];
|
||||
};
|
||||
|
||||
struct vm_stat_desc {
|
||||
int index; /* in */
|
||||
char desc[128]; /* out */
|
||||
};
|
||||
|
||||
enum {
|
||||
IOCNUM_RUN,
|
||||
IOCNUM_SET_PINNING,
|
||||
IOCNUM_GET_PINNING,
|
||||
IOCNUM_MAP_MEMORY,
|
||||
IOCNUM_GET_MEMORY_SEG,
|
||||
IOCNUM_SET_REGISTER,
|
||||
IOCNUM_GET_REGISTER,
|
||||
IOCNUM_SET_SEGMENT_DESCRIPTOR,
|
||||
IOCNUM_GET_SEGMENT_DESCRIPTOR,
|
||||
IOCNUM_INJECT_EVENT,
|
||||
IOCNUM_LAPIC_IRQ,
|
||||
IOCNUM_SET_CAPABILITY,
|
||||
IOCNUM_GET_CAPABILITY,
|
||||
IOCNUM_BIND_PPTDEV,
|
||||
IOCNUM_UNBIND_PPTDEV,
|
||||
IOCNUM_MAP_PPTDEV_MMIO,
|
||||
IOCNUM_PPTDEV_MSI,
|
||||
IOCNUM_INJECT_NMI,
|
||||
IOCNUM_VM_STATS,
|
||||
IOCNUM_VM_STAT_DESC,
|
||||
};
|
||||
|
||||
#define VM_RUN \
|
||||
_IOWR('v', IOCNUM_RUN, struct vm_run)
|
||||
#define VM_SET_PINNING \
|
||||
_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
|
||||
#define VM_GET_PINNING \
|
||||
_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
|
||||
#define VM_MAP_MEMORY \
|
||||
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
|
||||
#define VM_GET_MEMORY_SEG \
|
||||
_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
|
||||
#define VM_SET_REGISTER \
|
||||
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
|
||||
#define VM_GET_REGISTER \
|
||||
_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
|
||||
#define VM_SET_SEGMENT_DESCRIPTOR \
|
||||
_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
|
||||
#define VM_GET_SEGMENT_DESCRIPTOR \
|
||||
_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
|
||||
#define VM_INJECT_EVENT \
|
||||
_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
|
||||
#define VM_LAPIC_IRQ \
|
||||
_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
|
||||
#define VM_SET_CAPABILITY \
|
||||
_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
|
||||
#define VM_GET_CAPABILITY \
|
||||
_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
|
||||
#define VM_BIND_PPTDEV \
|
||||
_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
|
||||
#define VM_UNBIND_PPTDEV \
|
||||
_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
|
||||
#define VM_MAP_PPTDEV_MMIO \
|
||||
_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
|
||||
#define VM_PPTDEV_MSI \
|
||||
_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
|
||||
#define VM_INJECT_NMI \
|
||||
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
|
||||
#define VM_STATS \
|
||||
_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
|
||||
#define VM_STAT_DESC \
|
||||
_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
|
||||
#endif
|
247
sys/amd64/vmm/amd/amdv.c
Normal file
247
sys/amd64/vmm/amd/amdv.c
Normal file
@ -0,0 +1,247 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "io/iommu.h"
|
||||
|
||||
static int
|
||||
amdv_init(void)
|
||||
{
|
||||
|
||||
printf("amdv_init: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_cleanup(void)
|
||||
{
|
||||
|
||||
printf("amdv_cleanup: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void *
|
||||
amdv_vminit(struct vm *vm)
|
||||
{
|
||||
|
||||
printf("amdv_vminit: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmrun(void *arg, int vcpu, register_t rip, struct vm_exit *vmexit)
|
||||
{
|
||||
|
||||
printf("amdv_vmrun: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
amdv_vmcleanup(void *arg)
|
||||
{
|
||||
|
||||
printf("amdv_vmcleanup: not implemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
|
||||
printf("amdv_vmmmap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
|
||||
{
|
||||
|
||||
printf("amdv_setreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_inject_event(void *vmi, int vcpu, int type, int vector,
|
||||
uint32_t error_code, int error_code_valid)
|
||||
{
|
||||
|
||||
printf("amdv_inject_event: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_nmi(void *arg, int vcpu)
|
||||
{
|
||||
|
||||
printf("amdv_nmi: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getcap(void *arg, int vcpu, int type, int *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setcap(void *arg, int vcpu, int type, int val)
|
||||
{
|
||||
|
||||
printf("amdv_setcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
struct vmm_ops vmm_ops_amd = {
|
||||
amdv_init,
|
||||
amdv_cleanup,
|
||||
amdv_vminit,
|
||||
amdv_vmrun,
|
||||
amdv_vmcleanup,
|
||||
amdv_vmmmap,
|
||||
amdv_getreg,
|
||||
amdv_setreg,
|
||||
amdv_getdesc,
|
||||
amdv_setdesc,
|
||||
amdv_inject_event,
|
||||
amdv_nmi,
|
||||
amdv_getcap,
|
||||
amdv_setcap
|
||||
};
|
||||
|
||||
static int
|
||||
amd_iommu_init(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_init: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_cleanup(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_cleanup: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_enable(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_enable: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_disable(void)
|
||||
{
|
||||
|
||||
printf("amd_iommu_disable: not implemented\n");
|
||||
}
|
||||
|
||||
static void *
|
||||
amd_iommu_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
printf("amd_iommu_create_domain: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_destroy_domain(void *domain)
|
||||
{
|
||||
|
||||
printf("amd_iommu_destroy_domain: not implemented\n");
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
uint64_t len)
|
||||
{
|
||||
|
||||
printf("amd_iommu_create_mapping: not implemented\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_add_device(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
printf("amd_iommu_add_device: not implemented\n");
|
||||
}
|
||||
|
||||
static void
|
||||
amd_iommu_remove_device(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
printf("amd_iommu_remove_device: not implemented\n");
|
||||
}
|
||||
|
||||
struct iommu_ops iommu_ops_amd = {
|
||||
amd_iommu_init,
|
||||
amd_iommu_cleanup,
|
||||
amd_iommu_enable,
|
||||
amd_iommu_disable,
|
||||
amd_iommu_create_domain,
|
||||
amd_iommu_destroy_domain,
|
||||
amd_iommu_create_mapping,
|
||||
amd_iommu_add_device,
|
||||
amd_iommu_remove_device,
|
||||
};
|
312
sys/amd64/vmm/intel/ept.c
Normal file
312
sys/amd64/vmm/intel/ept.c
Normal file
@ -0,0 +1,312 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/param.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmx_cpufunc.h"
|
||||
#include "vmx_msr.h"
|
||||
#include "vmx.h"
|
||||
#include "ept.h"
|
||||
|
||||
#define EPT_PWL4(cap) ((cap) & (1UL << 6))
|
||||
#define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
|
||||
#define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
|
||||
#define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
|
||||
#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
|
||||
#define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
|
||||
|
||||
#define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
|
||||
#define INVVPID_ALL_TYPES_SUPPORTED(cap) \
|
||||
(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
|
||||
|
||||
#define INVEPT_ALL_TYPES_MASK 0x6000000UL
|
||||
#define INVEPT_ALL_TYPES_SUPPORTED(cap) \
|
||||
(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
|
||||
|
||||
#define EPT_PG_RD (1 << 0)
|
||||
#define EPT_PG_WR (1 << 1)
|
||||
#define EPT_PG_EX (1 << 2)
|
||||
#define EPT_PG_MEMORY_TYPE(x) ((x) << 3)
|
||||
#define EPT_PG_IGNORE_PAT (1 << 6)
|
||||
#define EPT_PG_SUPERPAGE (1 << 7)
|
||||
|
||||
#define EPT_ADDR_MASK ((uint64_t)-1 << 12)
|
||||
|
||||
MALLOC_DECLARE(M_VMX);
|
||||
|
||||
static uint64_t page_sizes_mask;
|
||||
|
||||
int
|
||||
ept_init(void)
|
||||
{
|
||||
int page_shift;
|
||||
uint64_t cap;
|
||||
|
||||
cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
|
||||
|
||||
/*
|
||||
* Verify that:
|
||||
* - page walk length is 4 steps
|
||||
* - extended page tables can be laid out in write-back memory
|
||||
* - invvpid instruction with all possible types is supported
|
||||
* - invept instruction with all possible types is supported
|
||||
*/
|
||||
if (!EPT_PWL4(cap) ||
|
||||
!EPT_MEMORY_TYPE_WB(cap) ||
|
||||
!INVVPID_SUPPORTED(cap) ||
|
||||
!INVVPID_ALL_TYPES_SUPPORTED(cap) ||
|
||||
!INVEPT_SUPPORTED(cap) ||
|
||||
!INVEPT_ALL_TYPES_SUPPORTED(cap))
|
||||
return (EINVAL);
|
||||
|
||||
/* Set bits in 'page_sizes_mask' for each valid page size */
|
||||
page_shift = PAGE_SHIFT;
|
||||
page_sizes_mask = 1UL << page_shift; /* 4KB page */
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDPTE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static size_t
|
||||
ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
|
||||
{
|
||||
int spshift, ptpshift, ptpindex, nlevels;
|
||||
|
||||
/*
|
||||
* Compute the size of the mapping that we can accomodate.
|
||||
*
|
||||
* This is based on three factors:
|
||||
* - super page sizes supported by the processor
|
||||
* - alignment of the region starting at 'gpa' and 'hpa'
|
||||
* - length of the region 'len'
|
||||
*/
|
||||
spshift = PAGE_SHIFT;
|
||||
if (spok)
|
||||
spshift += (EPT_PWLEVELS - 1) * 9;
|
||||
while (spshift >= PAGE_SHIFT) {
|
||||
uint64_t spsize = 1UL << spshift;
|
||||
if ((page_sizes_mask & spsize) != 0 &&
|
||||
(gpa & (spsize - 1)) == 0 &&
|
||||
(hpa & (spsize - 1)) == 0 &&
|
||||
length >= spsize) {
|
||||
break;
|
||||
}
|
||||
spshift -= 9;
|
||||
}
|
||||
|
||||
if (spshift < PAGE_SHIFT) {
|
||||
panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
|
||||
"length 0x%016lx, page_sizes_mask 0x%016lx",
|
||||
gpa, hpa, length, page_sizes_mask);
|
||||
}
|
||||
|
||||
nlevels = EPT_PWLEVELS;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
/* We have reached the leaf mapping */
|
||||
if (spshift >= ptpshift)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We are working on a non-leaf page table page.
|
||||
*
|
||||
* Create the next level page table page if necessary and point
|
||||
* to it from the current page table.
|
||||
*/
|
||||
if (ptp[ptpindex] == 0) {
|
||||
void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
|
||||
ptp[ptpindex] = vtophys(nlp);
|
||||
ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
|
||||
}
|
||||
|
||||
/* Work our way down to the next level page table page */
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
|
||||
}
|
||||
|
||||
if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
|
||||
panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
|
||||
"mismatch\n", gpa, ptpshift);
|
||||
}
|
||||
|
||||
/* Do the mapping */
|
||||
ptp[ptpindex] = hpa;
|
||||
|
||||
/* Apply the access controls */
|
||||
if (prot & VM_PROT_READ)
|
||||
ptp[ptpindex] |= EPT_PG_RD;
|
||||
if (prot & VM_PROT_WRITE)
|
||||
ptp[ptpindex] |= EPT_PG_WR;
|
||||
if (prot & VM_PROT_EXECUTE)
|
||||
ptp[ptpindex] |= EPT_PG_EX;
|
||||
|
||||
/*
|
||||
* XXX should we enforce this memory type by setting the ignore PAT
|
||||
* bit to 1.
|
||||
*/
|
||||
ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
|
||||
|
||||
if (nlevels > 0)
|
||||
ptp[ptpindex] |= EPT_PG_SUPERPAGE;
|
||||
|
||||
return (1UL << ptpshift);
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pt_entry(pt_entry_t pte)
|
||||
{
|
||||
if (pte == 0)
|
||||
return;
|
||||
|
||||
/* sanity check */
|
||||
if ((pte & EPT_PG_SUPERPAGE) != 0)
|
||||
panic("ept_free_pt_entry: pte cannot have superpage bit");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pd_entry(pd_entry_t pde)
|
||||
{
|
||||
pt_entry_t *pt;
|
||||
int i;
|
||||
|
||||
if (pde == 0)
|
||||
return;
|
||||
|
||||
if ((pde & EPT_PG_SUPERPAGE) == 0) {
|
||||
pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPTEPG; i++)
|
||||
ept_free_pt_entry(pt[i]);
|
||||
free(pt, M_VMX); /* free the page table page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pdp_entry(pdp_entry_t pdpe)
|
||||
{
|
||||
pd_entry_t *pd;
|
||||
int i;
|
||||
|
||||
if (pdpe == 0)
|
||||
return;
|
||||
|
||||
if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDEPG; i++)
|
||||
ept_free_pd_entry(pd[i]);
|
||||
free(pd, M_VMX); /* free the page directory page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pml4_entry(pml4_entry_t pml4e)
|
||||
{
|
||||
pdp_entry_t *pdp;
|
||||
int i;
|
||||
|
||||
if (pml4e == 0)
|
||||
return;
|
||||
|
||||
if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDPEPG; i++)
|
||||
ept_free_pdp_entry(pdp[i]);
|
||||
free(pdp, M_VMX); /* free the page directory ptr page */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ept_vmcleanup(struct vmx *vmx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NPML4EPG; i++)
|
||||
ept_free_pml4_entry(vmx->pml4ept[i]);
|
||||
}
|
||||
|
||||
int
|
||||
ept_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
size_t n;
|
||||
struct vmx *vmx = arg;
|
||||
|
||||
while (len > 0) {
|
||||
n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
|
||||
prot, spok);
|
||||
len -= n;
|
||||
gpa += n;
|
||||
hpa += n;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
invept_single_context(void *arg)
|
||||
{
|
||||
struct invept_desc desc = *(struct invept_desc *)arg;
|
||||
|
||||
invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
|
||||
}
|
||||
|
||||
void
|
||||
ept_invalidate_mappings(u_long pml4ept)
|
||||
{
|
||||
struct invept_desc invept_desc = { 0 };
|
||||
|
||||
invept_desc.eptp = EPTP(pml4ept);
|
||||
|
||||
smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
|
||||
}
|
42
sys/amd64/vmm/intel/ept.h
Normal file
42
sys/amd64/vmm/intel/ept.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _EPT_H_
|
||||
#define _EPT_H_
|
||||
|
||||
struct vmx;
|
||||
|
||||
#define EPT_PWLEVELS 4 /* page walk levels */
|
||||
#define EPTP(pml4) ((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
|
||||
|
||||
int ept_init(void);
|
||||
int ept_vmmmap(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
|
||||
void ept_invalidate_mappings(u_long ept_pml4);
|
||||
void ept_vmcleanup(struct vmx *vmx);
|
||||
#endif
|
451
sys/amd64/vmm/intel/vmcs.c
Normal file
451
sys/amd64/vmm/intel/vmcs.c
Normal file
@ -0,0 +1,451 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/pcpu.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/segments.h>
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmcs.h"
|
||||
#include "vmx_cpufunc.h"
|
||||
#include "ept.h"
|
||||
#include "vmx.h"
|
||||
|
||||
static uint64_t
|
||||
vmcs_fix_regval(uint32_t encoding, uint64_t val)
|
||||
{
|
||||
|
||||
switch (encoding) {
|
||||
case VMCS_GUEST_CR0:
|
||||
val = vmx_fix_cr0(val);
|
||||
break;
|
||||
case VMCS_GUEST_CR4:
|
||||
val = vmx_fix_cr4(val);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return (val);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vmcs_field_encoding(int ident)
|
||||
{
|
||||
switch (ident) {
|
||||
case VM_REG_GUEST_CR0:
|
||||
return (VMCS_GUEST_CR0);
|
||||
case VM_REG_GUEST_CR3:
|
||||
return (VMCS_GUEST_CR3);
|
||||
case VM_REG_GUEST_CR4:
|
||||
return (VMCS_GUEST_CR4);
|
||||
case VM_REG_GUEST_DR7:
|
||||
return (VMCS_GUEST_DR7);
|
||||
case VM_REG_GUEST_RSP:
|
||||
return (VMCS_GUEST_RSP);
|
||||
case VM_REG_GUEST_RIP:
|
||||
return (VMCS_GUEST_RIP);
|
||||
case VM_REG_GUEST_RFLAGS:
|
||||
return (VMCS_GUEST_RFLAGS);
|
||||
case VM_REG_GUEST_ES:
|
||||
return (VMCS_GUEST_ES_SELECTOR);
|
||||
case VM_REG_GUEST_CS:
|
||||
return (VMCS_GUEST_CS_SELECTOR);
|
||||
case VM_REG_GUEST_SS:
|
||||
return (VMCS_GUEST_SS_SELECTOR);
|
||||
case VM_REG_GUEST_DS:
|
||||
return (VMCS_GUEST_DS_SELECTOR);
|
||||
case VM_REG_GUEST_FS:
|
||||
return (VMCS_GUEST_FS_SELECTOR);
|
||||
case VM_REG_GUEST_GS:
|
||||
return (VMCS_GUEST_GS_SELECTOR);
|
||||
case VM_REG_GUEST_TR:
|
||||
return (VMCS_GUEST_TR_SELECTOR);
|
||||
case VM_REG_GUEST_LDTR:
|
||||
return (VMCS_GUEST_LDTR_SELECTOR);
|
||||
case VM_REG_GUEST_EFER:
|
||||
return (VMCS_GUEST_IA32_EFER);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
|
||||
{
|
||||
|
||||
switch (seg) {
|
||||
case VM_REG_GUEST_ES:
|
||||
*base = VMCS_GUEST_ES_BASE;
|
||||
*lim = VMCS_GUEST_ES_LIMIT;
|
||||
*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_CS:
|
||||
*base = VMCS_GUEST_CS_BASE;
|
||||
*lim = VMCS_GUEST_CS_LIMIT;
|
||||
*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_SS:
|
||||
*base = VMCS_GUEST_SS_BASE;
|
||||
*lim = VMCS_GUEST_SS_LIMIT;
|
||||
*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_DS:
|
||||
*base = VMCS_GUEST_DS_BASE;
|
||||
*lim = VMCS_GUEST_DS_LIMIT;
|
||||
*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_FS:
|
||||
*base = VMCS_GUEST_FS_BASE;
|
||||
*lim = VMCS_GUEST_FS_LIMIT;
|
||||
*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_GS:
|
||||
*base = VMCS_GUEST_GS_BASE;
|
||||
*lim = VMCS_GUEST_GS_LIMIT;
|
||||
*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_TR:
|
||||
*base = VMCS_GUEST_TR_BASE;
|
||||
*lim = VMCS_GUEST_TR_LIMIT;
|
||||
*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_LDTR:
|
||||
*base = VMCS_GUEST_LDTR_BASE;
|
||||
*lim = VMCS_GUEST_LDTR_LIMIT;
|
||||
*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
|
||||
break;
|
||||
case VM_REG_GUEST_IDTR:
|
||||
*base = VMCS_GUEST_IDTR_BASE;
|
||||
*lim = VMCS_GUEST_IDTR_LIMIT;
|
||||
*acc = VMCS_INVALID_ENCODING;
|
||||
break;
|
||||
case VM_REG_GUEST_GDTR:
|
||||
*base = VMCS_GUEST_GDTR_BASE;
|
||||
*lim = VMCS_GUEST_GDTR_LIMIT;
|
||||
*acc = VMCS_INVALID_ENCODING;
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
|
||||
{
|
||||
int error;
|
||||
uint32_t encoding;
|
||||
|
||||
/*
|
||||
* If we need to get at vmx-specific state in the VMCS we can bypass
|
||||
* the translation of 'ident' to 'encoding' by simply setting the
|
||||
* sign bit. As it so happens the upper 16 bits are reserved (i.e
|
||||
* set to 0) in the encodings for the VMCS so we are free to use the
|
||||
* sign bit.
|
||||
*/
|
||||
if (ident < 0)
|
||||
encoding = ident & 0x7fffffff;
|
||||
else
|
||||
encoding = vmcs_field_encoding(ident);
|
||||
|
||||
if (encoding == (uint32_t)-1)
|
||||
return (EINVAL);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
error = vmread(encoding, retval);
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
uint32_t encoding;
|
||||
|
||||
if (ident < 0)
|
||||
encoding = ident & 0x7fffffff;
|
||||
else
|
||||
encoding = vmcs_field_encoding(ident);
|
||||
|
||||
if (encoding == (uint32_t)-1)
|
||||
return (EINVAL);
|
||||
|
||||
val = vmcs_fix_regval(encoding, val);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
error = vmwrite(encoding, val);
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
|
||||
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
|
||||
if (error != 0)
|
||||
panic("vmcs_setdesc: invalid segment register %d", seg);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
if ((error = vmwrite(base, desc->base)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(limit, desc->limit)) != 0)
|
||||
goto done;
|
||||
|
||||
if (access != VMCS_INVALID_ENCODING) {
|
||||
if ((error = vmwrite(access, desc->access)) != 0)
|
||||
goto done;
|
||||
}
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
uint64_t u64;
|
||||
|
||||
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
|
||||
if (error != 0)
|
||||
panic("vmcs_getdesc: invalid segment register %d", seg);
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
if ((error = vmread(base, &u64)) != 0)
|
||||
goto done;
|
||||
desc->base = u64;
|
||||
|
||||
if ((error = vmread(limit, &u64)) != 0)
|
||||
goto done;
|
||||
desc->limit = u64;
|
||||
|
||||
if (access != VMCS_INVALID_ENCODING) {
|
||||
if ((error = vmread(access, &u64)) != 0)
|
||||
goto done;
|
||||
desc->access = u64;
|
||||
}
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
|
||||
{
|
||||
int error;
|
||||
|
||||
VMPTRLD(vmcs);
|
||||
|
||||
/*
|
||||
* Guest MSRs are saved in the VM-exit MSR-store area.
|
||||
* Guest MSRs are loaded from the VM-entry MSR-load area.
|
||||
* Both areas point to the same location in memory.
|
||||
*/
|
||||
if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vmcs_set_defaults(struct vmcs *vmcs,
|
||||
u_long host_rip, u_long host_rsp, u_long ept_pml4,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
|
||||
{
|
||||
int error, codesel, datasel, tsssel;
|
||||
u_long cr0, cr4, efer;
|
||||
uint64_t eptp, pat;
|
||||
uint32_t exc_bitmap;
|
||||
|
||||
codesel = GSEL(GCODE_SEL, SEL_KPL);
|
||||
datasel = GSEL(GDATA_SEL, SEL_KPL);
|
||||
tsssel = GSEL(GPROC0_SEL, SEL_KPL);
|
||||
|
||||
/*
|
||||
* Make sure we have a "current" VMCS to work with.
|
||||
*/
|
||||
VMPTRLD(vmcs);
|
||||
|
||||
/*
|
||||
* Load the VMX controls
|
||||
*/
|
||||
if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
|
||||
goto done;
|
||||
if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Guest state */
|
||||
|
||||
/* Initialize guest IA32_PAT MSR with the default value */
|
||||
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(1, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(2, PAT_UNCACHED) |
|
||||
PAT_VALUE(3, PAT_UNCACHEABLE) |
|
||||
PAT_VALUE(4, PAT_WRITE_BACK) |
|
||||
PAT_VALUE(5, PAT_WRITE_THROUGH) |
|
||||
PAT_VALUE(6, PAT_UNCACHED) |
|
||||
PAT_VALUE(7, PAT_UNCACHEABLE);
|
||||
if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Host state */
|
||||
|
||||
/* Initialize host IA32_PAT MSR */
|
||||
pat = rdmsr(MSR_PAT);
|
||||
if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the IA32_EFER MSR */
|
||||
efer = rdmsr(MSR_EFER);
|
||||
if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the control registers */
|
||||
cr0 = rcr0();
|
||||
if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = rcr4();
|
||||
if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
/* Load the segment selectors */
|
||||
if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Load the Base-Address for %fs and idtr.
|
||||
*
|
||||
* Note that we exclude %gs, tss and gdtr here because their base
|
||||
* address is pcpu specific.
|
||||
*/
|
||||
if ((error = vmwrite(VMCS_HOST_FS_BASE, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vmwrite(VMCS_HOST_IDTR_BASE, r_idt.rd_base)) != 0)
|
||||
goto done;
|
||||
|
||||
/* instruction pointer */
|
||||
if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
|
||||
goto done;
|
||||
|
||||
/* stack pointer */
|
||||
if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
|
||||
goto done;
|
||||
|
||||
/* eptp */
|
||||
eptp = EPTP(ept_pml4);
|
||||
if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
|
||||
goto done;
|
||||
|
||||
/* vpid */
|
||||
if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
|
||||
goto done;
|
||||
|
||||
/* msr bitmap */
|
||||
if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
|
||||
goto done;
|
||||
|
||||
/* exception bitmap */
|
||||
exc_bitmap = 1 << IDT_MC;
|
||||
if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
|
||||
goto done;
|
||||
|
||||
/* link pointer */
|
||||
if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
|
||||
goto done;
|
||||
done:
|
||||
VMCLEAR(vmcs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vmcs_read(uint32_t encoding)
|
||||
{
|
||||
int error;
|
||||
uint64_t val;
|
||||
|
||||
error = vmread(encoding, &val);
|
||||
if (error != 0)
|
||||
panic("vmcs_read(%u) error %d", encoding, error);
|
||||
|
||||
return (val);
|
||||
}
|
324
sys/amd64/vmm/intel/vmcs.h
Normal file
324
sys/amd64/vmm/intel/vmcs.h
Normal file
@ -0,0 +1,324 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMCS_H_
|
||||
#define _VMCS_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct vmcs {
|
||||
uint32_t identifier;
|
||||
uint32_t abort_code;
|
||||
char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
|
||||
};
|
||||
CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
|
||||
|
||||
/* MSR save region is composed of an array of 'struct msr_entry' */
|
||||
struct msr_entry {
|
||||
uint32_t index;
|
||||
uint32_t reserved;
|
||||
uint64_t val;
|
||||
|
||||
};
|
||||
|
||||
int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
|
||||
int vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
|
||||
u_long ept_pml4,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap,
|
||||
uint16_t vpid);
|
||||
int vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval);
|
||||
int vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val);
|
||||
int vmcs_getdesc(struct vmcs *vmcs, int ident,
|
||||
struct seg_desc *desc);
|
||||
int vmcs_setdesc(struct vmcs *vmcs, int ident,
|
||||
struct seg_desc *desc);
|
||||
uint64_t vmcs_read(uint32_t encoding);
|
||||
|
||||
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
|
||||
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
|
||||
#define vmcs_instruction_error() vmcs_read(VMCS_INSTRUCTION_ERROR)
|
||||
#define vmcs_exit_reason() (vmcs_read(VMCS_EXIT_REASON) & 0xffff)
|
||||
#define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION)
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#define VMCS_IDENT(encoding) ((encoding) | 0x80000000)
|
||||
/*
|
||||
* VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
|
||||
*/
|
||||
#define VMCS_INVALID_ENCODING 0xffffffff
|
||||
|
||||
/* 16-bit control fields */
|
||||
#define VMCS_VPID 0x00000000
|
||||
|
||||
/* 16-bit guest-state fields */
|
||||
#define VMCS_GUEST_ES_SELECTOR 0x00000800
|
||||
#define VMCS_GUEST_CS_SELECTOR 0x00000802
|
||||
#define VMCS_GUEST_SS_SELECTOR 0x00000804
|
||||
#define VMCS_GUEST_DS_SELECTOR 0x00000806
|
||||
#define VMCS_GUEST_FS_SELECTOR 0x00000808
|
||||
#define VMCS_GUEST_GS_SELECTOR 0x0000080A
|
||||
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
|
||||
#define VMCS_GUEST_TR_SELECTOR 0x0000080E
|
||||
|
||||
/* 16-bit host-state fields */
|
||||
#define VMCS_HOST_ES_SELECTOR 0x00000C00
|
||||
#define VMCS_HOST_CS_SELECTOR 0x00000C02
|
||||
#define VMCS_HOST_SS_SELECTOR 0x00000C04
|
||||
#define VMCS_HOST_DS_SELECTOR 0x00000C06
|
||||
#define VMCS_HOST_FS_SELECTOR 0x00000C08
|
||||
#define VMCS_HOST_GS_SELECTOR 0x00000C0A
|
||||
#define VMCS_HOST_TR_SELECTOR 0x00000C0C
|
||||
|
||||
/* 64-bit control fields */
|
||||
#define VMCS_IO_BITMAP_A 0x00002000
|
||||
#define VMCS_IO_BITMAP_B 0x00002002
|
||||
#define VMCS_MSR_BITMAP 0x00002004
|
||||
#define VMCS_EXIT_MSR_STORE 0x00002006
|
||||
#define VMCS_EXIT_MSR_LOAD 0x00002008
|
||||
#define VMCS_ENTRY_MSR_LOAD 0x0000200A
|
||||
#define VMCS_EXECUTIVE_VMCS 0x0000200C
|
||||
#define VMCS_TSC_OFFSET 0x00002010
|
||||
#define VMCS_VIRTUAL_APIC 0x00002012
|
||||
#define VMCS_APIC_ACCESS 0x00002014
|
||||
#define VMCS_EPTP 0x0000201A
|
||||
|
||||
/* 64-bit read-only fields */
|
||||
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
|
||||
|
||||
/* 64-bit guest-state fields */
|
||||
#define VMCS_LINK_POINTER 0x00002800
|
||||
#define VMCS_GUEST_IA32_DEBUGCTL 0x00002802
|
||||
#define VMCS_GUEST_IA32_PAT 0x00002804
|
||||
#define VMCS_GUEST_IA32_EFER 0x00002806
|
||||
#define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
|
||||
#define VMCS_GUEST_PDPTE0 0x0000280A
|
||||
#define VMCS_GUEST_PDPTE1 0x0000280C
|
||||
#define VMCS_GUEST_PDPTE2 0x0000280E
|
||||
#define VMCS_GUEST_PDPTE3 0x00002810
|
||||
|
||||
/* 64-bit host-state fields */
|
||||
#define VMCS_HOST_IA32_PAT 0x00002C00
|
||||
#define VMCS_HOST_IA32_EFER 0x00002C02
|
||||
#define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04
|
||||
|
||||
/* 32-bit control fields */
|
||||
#define VMCS_PIN_BASED_CTLS 0x00004000
|
||||
#define VMCS_PRI_PROC_BASED_CTLS 0x00004002
|
||||
#define VMCS_EXCEPTION_BITMAP 0x00004004
|
||||
#define VMCS_PF_ERROR_MASK 0x00004006
|
||||
#define VMCS_PF_ERROR_MATCH 0x00004008
|
||||
#define VMCS_CR3_TARGET_COUNT 0x0000400A
|
||||
#define VMCS_EXIT_CTLS 0x0000400C
|
||||
#define VMCS_EXIT_MSR_STORE_COUNT 0x0000400E
|
||||
#define VMCS_EXIT_MSR_LOAD_COUNT 0x00004010
|
||||
#define VMCS_ENTRY_CTLS 0x00004012
|
||||
#define VMCS_ENTRY_MSR_LOAD_COUNT 0x00004014
|
||||
#define VMCS_ENTRY_INTR_INFO 0x00004016
|
||||
#define VMCS_ENTRY_EXCEPTION_ERROR 0x00004018
|
||||
#define VMCS_ENTRY_INST_LENGTH 0x0000401A
|
||||
#define VMCS_TPR_THRESHOLD 0x0000401C
|
||||
#define VMCS_SEC_PROC_BASED_CTLS 0x0000401E
|
||||
#define VMCS_PLE_GAP 0x00004020
|
||||
#define VMCS_PLE_WINDOW 0x00004022
|
||||
|
||||
/* 32-bit read-only data fields */
|
||||
#define VMCS_INSTRUCTION_ERROR 0x00004400
|
||||
#define VMCS_EXIT_REASON 0x00004402
|
||||
#define VMCS_EXIT_INTERRUPTION_INFO 0x00004404
|
||||
#define VMCS_EXIT_INTERRUPTION_ERROR 0x00004406
|
||||
#define VMCS_IDT_VECTORING_INFO 0x00004408
|
||||
#define VMCS_IDT_VECTORING_ERROR 0x0000440A
|
||||
#define VMCS_EXIT_INSTRUCTION_LENGTH 0x0000440C
|
||||
#define VMCS_EXIT_INSTRUCTION_INFO 0x0000440E
|
||||
|
||||
/* 32-bit guest-state fields */
|
||||
#define VMCS_GUEST_ES_LIMIT 0x00004800
|
||||
#define VMCS_GUEST_CS_LIMIT 0x00004802
|
||||
#define VMCS_GUEST_SS_LIMIT 0x00004804
|
||||
#define VMCS_GUEST_DS_LIMIT 0x00004806
|
||||
#define VMCS_GUEST_FS_LIMIT 0x00004808
|
||||
#define VMCS_GUEST_GS_LIMIT 0x0000480A
|
||||
#define VMCS_GUEST_LDTR_LIMIT 0x0000480C
|
||||
#define VMCS_GUEST_TR_LIMIT 0x0000480E
|
||||
#define VMCS_GUEST_GDTR_LIMIT 0x00004810
|
||||
#define VMCS_GUEST_IDTR_LIMIT 0x00004812
|
||||
#define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814
|
||||
#define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816
|
||||
#define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818
|
||||
#define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481A
|
||||
#define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481C
|
||||
#define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481E
|
||||
#define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820
|
||||
#define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822
|
||||
#define VMCS_GUEST_INTERRUPTIBILITY 0x00004824
|
||||
#define VMCS_GUEST_ACTIVITY 0x00004826
|
||||
#define VMCS_GUEST_SMBASE 0x00004828
|
||||
#define VMCS_GUEST_IA32_SYSENTER_CS 0x0000482A
|
||||
#define VMCS_PREEMPTION_TIMER_VALUE 0x0000482E
|
||||
|
||||
/* 32-bit host state fields */
|
||||
#define VMCS_HOST_IA32_SYSENTER_CS 0x00004C00
|
||||
|
||||
/* Natural Width control fields */
|
||||
#define VMCS_CR0_MASK 0x00006000
|
||||
#define VMCS_CR4_MASK 0x00006002
|
||||
#define VMCS_CR0_SHADOW 0x00006004
|
||||
#define VMCS_CR4_SHADOW 0x00006006
|
||||
#define VMCS_CR3_TARGET0 0x00006008
|
||||
#define VMCS_CR3_TARGET1 0x0000600A
|
||||
#define VMCS_CR3_TARGET2 0x0000600C
|
||||
#define VMCS_CR3_TARGET3 0x0000600E
|
||||
|
||||
/* Natural Width read-only fields */
|
||||
#define VMCS_EXIT_QUALIFICATION 0x00006400
|
||||
#define VMCS_IO_RCX 0x00006402
|
||||
#define VMCS_IO_RSI 0x00006404
|
||||
#define VMCS_IO_RDI 0x00006406
|
||||
#define VMCS_IO_RIP 0x00006408
|
||||
#define VMCS_GUEST_LINEAR_ADDRESS 0x0000640A
|
||||
|
||||
/* Natural Width guest-state fields */
|
||||
#define VMCS_GUEST_CR0 0x00006800
|
||||
#define VMCS_GUEST_CR3 0x00006802
|
||||
#define VMCS_GUEST_CR4 0x00006804
|
||||
#define VMCS_GUEST_ES_BASE 0x00006806
|
||||
#define VMCS_GUEST_CS_BASE 0x00006808
|
||||
#define VMCS_GUEST_SS_BASE 0x0000680A
|
||||
#define VMCS_GUEST_DS_BASE 0x0000680C
|
||||
#define VMCS_GUEST_FS_BASE 0x0000680E
|
||||
#define VMCS_GUEST_GS_BASE 0x00006810
|
||||
#define VMCS_GUEST_LDTR_BASE 0x00006812
|
||||
#define VMCS_GUEST_TR_BASE 0x00006814
|
||||
#define VMCS_GUEST_GDTR_BASE 0x00006816
|
||||
#define VMCS_GUEST_IDTR_BASE 0x00006818
|
||||
#define VMCS_GUEST_DR7 0x0000681A
|
||||
#define VMCS_GUEST_RSP 0x0000681C
|
||||
#define VMCS_GUEST_RIP 0x0000681E
|
||||
#define VMCS_GUEST_RFLAGS 0x00006820
|
||||
#define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
|
||||
#define VMCS_GUEST_IA32_SYSENTER_ESP 0x00006824
|
||||
#define VMCS_GUEST_IA32_SYSENTER_EIP 0x00006826
|
||||
|
||||
/* Natural Width host-state fields */
|
||||
#define VMCS_HOST_CR0 0x00006C00
|
||||
#define VMCS_HOST_CR3 0x00006C02
|
||||
#define VMCS_HOST_CR4 0x00006C04
|
||||
#define VMCS_HOST_FS_BASE 0x00006C06
|
||||
#define VMCS_HOST_GS_BASE 0x00006C08
|
||||
#define VMCS_HOST_TR_BASE 0x00006C0A
|
||||
#define VMCS_HOST_GDTR_BASE 0x00006C0C
|
||||
#define VMCS_HOST_IDTR_BASE 0x00006C0E
|
||||
#define VMCS_HOST_IA32_SYSENTER_ESP 0x00006C10
|
||||
#define VMCS_HOST_IA32_SYSENTER_EIP 0x00006C12
|
||||
#define VMCS_HOST_RSP 0x00006C14
|
||||
#define VMCS_HOST_RIP 0x00006c16
|
||||
|
||||
/*
|
||||
* VM instruction error numbers
|
||||
*/
|
||||
#define VMRESUME_WITH_NON_LAUNCHED_VMCS 5
|
||||
|
||||
/*
|
||||
* VMCS exit reasons
|
||||
*/
|
||||
#define EXIT_REASON_EXCEPTION 0
|
||||
#define EXIT_REASON_EXT_INTR 1
|
||||
#define EXIT_REASON_TRIPLE_FAULT 2
|
||||
#define EXIT_REASON_INIT 3
|
||||
#define EXIT_REASON_SIPI 4
|
||||
#define EXIT_REASON_IO_SMI 5
|
||||
#define EXIT_REASON_SMI 6
|
||||
#define EXIT_REASON_INTR_WINDOW 7
|
||||
#define EXIT_REASON_NMI_WINDOW 8
|
||||
#define EXIT_REASON_TASK_SWITCH 9
|
||||
#define EXIT_REASON_CPUID 10
|
||||
#define EXIT_REASON_GETSEC 11
|
||||
#define EXIT_REASON_HLT 12
|
||||
#define EXIT_REASON_INVD 13
|
||||
#define EXIT_REASON_INVLPG 14
|
||||
#define EXIT_REASON_RDPMC 15
|
||||
#define EXIT_REASON_RDTSC 16
|
||||
#define EXIT_REASON_RSM 17
|
||||
#define EXIT_REASON_VMCALL 18
|
||||
#define EXIT_REASON_VMCLEAR 19
|
||||
#define EXIT_REASON_VMLAUNCH 20
|
||||
#define EXIT_REASON_VMPTRLD 21
|
||||
#define EXIT_REASON_VMPTRST 22
|
||||
#define EXIT_REASON_VMREAD 23
|
||||
#define EXIT_REASON_VMRESUME 24
|
||||
#define EXIT_REASON_VMWRITE 25
|
||||
#define EXIT_REASON_VMXOFF 26
|
||||
#define EXIT_REASON_VMXON 27
|
||||
#define EXIT_REASON_CR_ACCESS 28
|
||||
#define EXIT_REASON_DR_ACCESS 29
|
||||
#define EXIT_REASON_INOUT 30
|
||||
#define EXIT_REASON_RDMSR 31
|
||||
#define EXIT_REASON_WRMSR 32
|
||||
#define EXIT_REASON_INVAL_VMCS 33
|
||||
#define EXIT_REASON_INVAL_MSR 34
|
||||
#define EXIT_REASON_MWAIT 36
|
||||
#define EXIT_REASON_MTF 37
|
||||
#define EXIT_REASON_MONITOR 39
|
||||
#define EXIT_REASON_PAUSE 40
|
||||
#define EXIT_REASON_MCE 41
|
||||
#define EXIT_REASON_TPR 43
|
||||
#define EXIT_REASON_APIC 44
|
||||
#define EXIT_REASON_GDTR_IDTR 46
|
||||
#define EXIT_REASON_LDTR_TR 47
|
||||
#define EXIT_REASON_EPT_FAULT 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_INVEPT 50
|
||||
#define EXIT_REASON_RDTSCP 51
|
||||
#define EXIT_REASON_VMX_PREEMPT 52
|
||||
#define EXIT_REASON_INVVPID 53
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
#define EXIT_REASON_XSETBV 55
|
||||
|
||||
/*
|
||||
* VMCS interrupt information fields
|
||||
*/
|
||||
#define VMCS_INTERRUPTION_INFO_VALID (1 << 31)
|
||||
#define VMCS_INTERRUPTION_INFO_HW_INTR (0 << 8)
|
||||
#define VMCS_INTERRUPTION_INFO_NMI (2 << 8)
|
||||
|
||||
/*
|
||||
* VMCS Guest interruptibility field
|
||||
*/
|
||||
#define VMCS_INTERRUPTIBILITY_STI_BLOCKING (1 << 0)
|
||||
#define VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING (1 << 1)
|
||||
#define VMCS_INTERRUPTIBILITY_SMI_BLOCKING (1 << 2)
|
||||
#define VMCS_INTERRUPTIBILITY_NMI_BLOCKING (1 << 3)
|
||||
|
||||
/*
|
||||
* Exit qualification for EXIT_REASON_INVAL_VMCS
|
||||
*/
|
||||
#define EXIT_QUAL_NMI_WHILE_STI_BLOCKING 3
|
||||
|
||||
#endif
|
1673
sys/amd64/vmm/intel/vmx.c
Normal file
1673
sys/amd64/vmm/intel/vmx.c
Normal file
File diff suppressed because it is too large
Load Diff
115
sys/amd64/vmm/intel/vmx.h
Normal file
115
sys/amd64/vmm/intel/vmx.h
Normal file
@ -0,0 +1,115 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_H_
|
||||
#define _VMX_H_
|
||||
|
||||
#include "vmcs.h"
|
||||
|
||||
#define GUEST_MSR_MAX_ENTRIES 64 /* arbitrary */
|
||||
|
||||
struct vmxctx {
|
||||
register_t guest_rdi; /* Guest state */
|
||||
register_t guest_rsi;
|
||||
register_t guest_rdx;
|
||||
register_t guest_rcx;
|
||||
register_t guest_r8;
|
||||
register_t guest_r9;
|
||||
register_t guest_rax;
|
||||
register_t guest_rbx;
|
||||
register_t guest_rbp;
|
||||
register_t guest_r10;
|
||||
register_t guest_r11;
|
||||
register_t guest_r12;
|
||||
register_t guest_r13;
|
||||
register_t guest_r14;
|
||||
register_t guest_r15;
|
||||
register_t guest_cr2;
|
||||
|
||||
register_t host_r15; /* Host state */
|
||||
register_t host_r14;
|
||||
register_t host_r13;
|
||||
register_t host_r12;
|
||||
register_t host_rbp;
|
||||
register_t host_rsp;
|
||||
register_t host_rbx;
|
||||
register_t host_rip;
|
||||
/*
|
||||
* XXX todo debug registers and fpu state
|
||||
*/
|
||||
|
||||
int launch_error;
|
||||
};
|
||||
|
||||
struct vmxcap {
|
||||
int set;
|
||||
uint32_t proc_ctls;
|
||||
};
|
||||
|
||||
struct vmxstate {
|
||||
int request_nmi;
|
||||
int lastcpu; /* host cpu that this 'vcpu' last ran on */
|
||||
uint16_t vpid;
|
||||
};
|
||||
|
||||
/* virtual machine softc */
|
||||
struct vmx {
|
||||
pml4_entry_t pml4ept[NPML4EPG];
|
||||
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
|
||||
char msr_bitmap[PAGE_SIZE];
|
||||
struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
|
||||
struct vmxctx ctx[VM_MAXCPU];
|
||||
struct vmxcap cap[VM_MAXCPU];
|
||||
struct vmxstate state[VM_MAXCPU];
|
||||
struct vm *vm;
|
||||
};
|
||||
CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
|
||||
|
||||
#define VMX_RETURN_DIRECT 0
|
||||
#define VMX_RETURN_LONGJMP 1
|
||||
#define VMX_RETURN_VMRESUME 2
|
||||
#define VMX_RETURN_VMLAUNCH 3
|
||||
/*
|
||||
* vmx_setjmp() returns:
|
||||
* - 0 when it returns directly
|
||||
* - 1 when it returns from vmx_longjmp
|
||||
* - 2 when it returns from vmx_resume (which would only be in the error case)
|
||||
* - 3 when it returns from vmx_launch (which would only be in the error case)
|
||||
*/
|
||||
int vmx_setjmp(struct vmxctx *ctx);
|
||||
void vmx_longjmp(void); /* returns via vmx_setjmp */
|
||||
void vmx_launch(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
|
||||
void vmx_resume(struct vmxctx *ctx) __dead2; /* may return via vmx_setjmp */
|
||||
|
||||
u_long vmx_fix_cr0(u_long cr0);
|
||||
u_long vmx_fix_cr4(u_long cr4);
|
||||
|
||||
#endif
|
92
sys/amd64/vmm/intel/vmx_controls.h
Normal file
92
sys/amd64/vmm/intel/vmx_controls.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_CONTROLS_H_
|
||||
#define _VMX_CONTROLS_H_
|
||||
|
||||
/* Pin-Based VM-Execution Controls */
|
||||
#define PINBASED_EXTINT_EXITING (1 << 0)
|
||||
#define PINBASED_NMI_EXITING (1 << 3)
|
||||
#define PINBASED_VIRTUAL_NMI (1 << 5)
|
||||
#define PINBASED_PREMPTION_TIMER (1 << 6)
|
||||
|
||||
/* Primary Processor-Based VM-Execution Controls */
|
||||
#define PROCBASED_INT_WINDOW_EXITING (1 << 2)
|
||||
#define PROCBASED_TSC_OFFSET (1 << 3)
|
||||
#define PROCBASED_HLT_EXITING (1 << 7)
|
||||
#define PROCBASED_INVLPG_EXITING (1 << 9)
|
||||
#define PROCBASED_MWAIT_EXITING (1 << 10)
|
||||
#define PROCBASED_RDPMC_EXITING (1 << 11)
|
||||
#define PROCBASED_RDTSC_EXITING (1 << 12)
|
||||
#define PROCBASED_CR3_LOAD_EXITING (1 << 15)
|
||||
#define PROCBASED_CR3_STORE_EXITING (1 << 16)
|
||||
#define PROCBASED_CR8_LOAD_EXITING (1 << 19)
|
||||
#define PROCBASED_CR8_STORE_EXITING (1 << 20)
|
||||
#define PROCBASED_USE_TPR_SHADOW (1 << 21)
|
||||
#define PROCBASED_NMI_WINDOW_EXITING (1 << 22)
|
||||
#define PROCBASED_MOV_DR_EXITING (1 << 23)
|
||||
#define PROCBASED_IO_EXITING (1 << 24)
|
||||
#define PROCBASED_IO_BITMAPS (1 << 25)
|
||||
#define PROCBASED_MTF (1 << 27)
|
||||
#define PROCBASED_MSR_BITMAPS (1 << 28)
|
||||
#define PROCBASED_MONITOR_EXITING (1 << 29)
|
||||
#define PROCBASED_PAUSE_EXITING (1 << 30)
|
||||
#define PROCBASED_SECONDARY_CONTROLS (1 << 31)
|
||||
|
||||
/* Secondary Processor-Based VM-Execution Controls */
|
||||
#define PROCBASED2_VIRTUALIZE_APIC (1 << 0)
|
||||
#define PROCBASED2_ENABLE_EPT (1 << 1)
|
||||
#define PROCBASED2_DESC_TABLE_EXITING (1 << 2)
|
||||
#define PROCBASED2_ENABLE_RDTSCP (1 << 3)
|
||||
#define PROCBASED2_VIRTUALIZE_X2APIC (1 << 4)
|
||||
#define PROCBASED2_ENABLE_VPID (1 << 5)
|
||||
#define PROCBASED2_WBINVD_EXITING (1 << 6)
|
||||
#define PROCBASED2_UNRESTRICTED_GUEST (1 << 7)
|
||||
#define PROCBASED2_PAUSE_LOOP_EXITING (1 << 10)
|
||||
|
||||
/* VM Exit Controls */
|
||||
#define VM_EXIT_SAVE_DEBUG_CONTROLS (1 << 2)
|
||||
#define VM_EXIT_HOST_LMA (1 << 9)
|
||||
#define VM_EXIT_LOAD_PERF_GLOBAL_CTRL (1 << 12)
|
||||
#define VM_EXIT_ACKNOWLEDGE_INTERRUPT (1 << 15)
|
||||
#define VM_EXIT_SAVE_PAT (1 << 18)
|
||||
#define VM_EXIT_LOAD_PAT (1 << 19)
|
||||
#define VM_EXIT_SAVE_EFER (1 << 20)
|
||||
#define VM_EXIT_LOAD_EFER (1 << 21)
|
||||
#define VM_EXIT_SAVE_PREEMPTION_TIMER (1 << 22)
|
||||
|
||||
/* VM Entry Controls */
|
||||
#define VM_ENTRY_LOAD_DEBUG_CONTROLS (1 << 2)
|
||||
#define VM_ENTRY_GUEST_LMA (1 << 9)
|
||||
#define VM_ENTRY_INTO_SMM (1 << 10)
|
||||
#define VM_ENTRY_DEACTIVATE_DUAL_MONITOR (1 << 11)
|
||||
#define VM_ENTRY_LOAD_PERF_GLOBAL_CTRL (1 << 13)
|
||||
#define VM_ENTRY_LOAD_PAT (1 << 14)
|
||||
#define VM_ENTRY_LOAD_EFER (1 << 15)
|
||||
|
||||
#endif
|
199
sys/amd64/vmm/intel/vmx_cpufunc.h
Normal file
199
sys/amd64/vmm/intel/vmx_cpufunc.h
Normal file
@ -0,0 +1,199 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_CPUFUNC_H_
|
||||
#define _VMX_CPUFUNC_H_
|
||||
|
||||
struct vmcs;
|
||||
|
||||
/*
|
||||
* Section 5.2 "Conventions" from Intel Architecture Manual 2B.
|
||||
*
|
||||
* error
|
||||
* VMsucceed 0
|
||||
* VMFailInvalid 1
|
||||
* VMFailValid 2 see also VMCS VM-Instruction Error Field
|
||||
*/
|
||||
#define VM_SUCCESS 0
|
||||
#define VM_FAIL_INVALID 1
|
||||
#define VM_FAIL_VALID 2
|
||||
#define VMX_SET_ERROR_CODE(varname) \
|
||||
do { \
|
||||
__asm __volatile(" jnc 1f;" \
|
||||
" mov $1, %0;" /* CF: error = 1 */ \
|
||||
" jmp 3f;" \
|
||||
"1: jnz 2f;" \
|
||||
" mov $2, %0;" /* ZF: error = 2 */ \
|
||||
" jmp 3f;" \
|
||||
"2: mov $0, %0;" \
|
||||
"3: nop" \
|
||||
:"=r" (varname)); \
|
||||
} while (0)
|
||||
|
||||
/* returns 0 on success and non-zero on failure */
|
||||
static __inline int
|
||||
vmxon(char *region)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(region);
|
||||
__asm __volatile("vmxon %0" : : "m" (*(uint64_t *)&addr) : "memory");
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* returns 0 on success and non-zero on failure */
|
||||
static __inline int
|
||||
vmclear(struct vmcs *vmcs)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(vmcs);
|
||||
__asm __volatile("vmclear %0" : : "m" (*(uint64_t *)&addr) : "memory");
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
vmxoff(void)
|
||||
{
|
||||
__asm __volatile("vmxoff");
|
||||
}
|
||||
|
||||
static __inline void
|
||||
vmptrst(uint64_t *addr)
|
||||
{
|
||||
__asm __volatile("vmptrst %0" : : "m" (*addr) : "memory");
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmptrld(struct vmcs *vmcs)
|
||||
{
|
||||
int error;
|
||||
uint64_t addr;
|
||||
|
||||
addr = vtophys(vmcs);
|
||||
__asm __volatile("vmptrld %0" : : "m" (*(uint64_t *)&addr) : "memory");
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmwrite(uint64_t reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("vmwrite %0, %1" : : "r" (val), "r" (reg) : "memory");
|
||||
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vmread(uint64_t r, uint64_t *addr)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("vmread %0, %1" : : "r" (r), "m" (*addr) : "memory");
|
||||
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void __inline
|
||||
VMCLEAR(struct vmcs *vmcs)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = vmclear(vmcs);
|
||||
if (err != 0)
|
||||
panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
|
||||
|
||||
critical_exit();
|
||||
}
|
||||
|
||||
static void __inline
|
||||
VMPTRLD(struct vmcs *vmcs)
|
||||
{
|
||||
int err;
|
||||
|
||||
critical_enter();
|
||||
|
||||
err = vmptrld(vmcs);
|
||||
if (err != 0)
|
||||
panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
|
||||
}
|
||||
|
||||
#define INVVPID_TYPE_ADDRESS 0UL
|
||||
#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
|
||||
#define INVVPID_TYPE_ALL_CONTEXTS 2UL
|
||||
|
||||
struct invvpid_desc {
|
||||
uint16_t vpid;
|
||||
uint16_t _res1;
|
||||
uint32_t _res2;
|
||||
uint64_t linear_addr;
|
||||
};
|
||||
CTASSERT(sizeof(struct invvpid_desc) == 16);
|
||||
|
||||
static void __inline
|
||||
invvpid(uint64_t type, struct invvpid_desc desc)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("invvpid %0, %1" :: "m" (desc), "r" (type) : "memory");
|
||||
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
if (error)
|
||||
panic("invvpid error %d", error);
|
||||
}
|
||||
|
||||
#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
|
||||
#define INVEPT_TYPE_ALL_CONTEXTS 2UL
|
||||
struct invept_desc {
|
||||
uint64_t eptp;
|
||||
uint64_t _res;
|
||||
};
|
||||
CTASSERT(sizeof(struct invept_desc) == 16);
|
||||
|
||||
static void __inline
|
||||
invept(uint64_t type, struct invept_desc desc)
|
||||
{
|
||||
int error;
|
||||
|
||||
__asm __volatile("invept %0, %1" :: "m" (desc), "r" (type) : "memory");
|
||||
|
||||
VMX_SET_ERROR_CODE(error);
|
||||
if (error)
|
||||
panic("invept error %d", error);
|
||||
}
|
||||
#endif
|
81
sys/amd64/vmm/intel/vmx_genassym.c
Normal file
81
sys/amd64/vmm/intel/vmx_genassym.c
Normal file
@ -0,0 +1,81 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/assym.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmx.h"
|
||||
#include "vmx_cpufunc.h"
|
||||
|
||||
ASSYM(VMXCTX_GUEST_RDI, offsetof(struct vmxctx, guest_rdi));
|
||||
ASSYM(VMXCTX_GUEST_RSI, offsetof(struct vmxctx, guest_rsi));
|
||||
ASSYM(VMXCTX_GUEST_RDX, offsetof(struct vmxctx, guest_rdx));
|
||||
ASSYM(VMXCTX_GUEST_RCX, offsetof(struct vmxctx, guest_rcx));
|
||||
ASSYM(VMXCTX_GUEST_R8, offsetof(struct vmxctx, guest_r8));
|
||||
ASSYM(VMXCTX_GUEST_R9, offsetof(struct vmxctx, guest_r9));
|
||||
ASSYM(VMXCTX_GUEST_RAX, offsetof(struct vmxctx, guest_rax));
|
||||
ASSYM(VMXCTX_GUEST_RBX, offsetof(struct vmxctx, guest_rbx));
|
||||
ASSYM(VMXCTX_GUEST_RBP, offsetof(struct vmxctx, guest_rbp));
|
||||
ASSYM(VMXCTX_GUEST_R10, offsetof(struct vmxctx, guest_r10));
|
||||
ASSYM(VMXCTX_GUEST_R11, offsetof(struct vmxctx, guest_r11));
|
||||
ASSYM(VMXCTX_GUEST_R12, offsetof(struct vmxctx, guest_r12));
|
||||
ASSYM(VMXCTX_GUEST_R13, offsetof(struct vmxctx, guest_r13));
|
||||
ASSYM(VMXCTX_GUEST_R14, offsetof(struct vmxctx, guest_r14));
|
||||
ASSYM(VMXCTX_GUEST_R15, offsetof(struct vmxctx, guest_r15));
|
||||
ASSYM(VMXCTX_GUEST_CR2, offsetof(struct vmxctx, guest_cr2));
|
||||
|
||||
ASSYM(VMXCTX_HOST_R15, offsetof(struct vmxctx, host_r15));
|
||||
ASSYM(VMXCTX_HOST_R14, offsetof(struct vmxctx, host_r14));
|
||||
ASSYM(VMXCTX_HOST_R13, offsetof(struct vmxctx, host_r13));
|
||||
ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12));
|
||||
ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp));
|
||||
ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp));
|
||||
ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
|
||||
ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
|
||||
|
||||
ASSYM(VMXCTX_LAUNCH_ERROR, offsetof(struct vmxctx, launch_error));
|
||||
|
||||
ASSYM(VM_SUCCESS, VM_SUCCESS);
|
||||
ASSYM(VM_FAIL_INVALID, VM_FAIL_INVALID);
|
||||
ASSYM(VM_FAIL_VALID, VM_FAIL_VALID);
|
||||
|
||||
ASSYM(VMX_RETURN_DIRECT, VMX_RETURN_DIRECT);
|
||||
ASSYM(VMX_RETURN_LONGJMP, VMX_RETURN_LONGJMP);
|
||||
ASSYM(VMX_RETURN_VMRESUME, VMX_RETURN_VMRESUME);
|
||||
ASSYM(VMX_RETURN_VMLAUNCH, VMX_RETURN_VMLAUNCH);
|
172
sys/amd64/vmm/intel/vmx_msr.c
Normal file
172
sys/amd64/vmm/intel/vmx_msr.c
Normal file
@ -0,0 +1,172 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
#include "vmx_msr.h"
|
||||
|
||||
static boolean_t
|
||||
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
|
||||
{
|
||||
|
||||
if (msr_val & (1UL << (bitpos + 32)))
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
|
||||
{
|
||||
|
||||
if ((msr_val & (1UL << bitpos)) == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
vmx_revision(void)
|
||||
{
|
||||
|
||||
return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a bitmask to be used for the VMCS execution control fields.
|
||||
*
|
||||
* The caller specifies what bits should be set to one in 'ones_mask'
|
||||
* and what bits should be set to zero in 'zeros_mask'. The don't-care
|
||||
* bits are set to the default value. The default values are obtained
|
||||
* based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
|
||||
* VMX Capabilities".
|
||||
*
|
||||
* Returns zero on success and non-zero on error.
|
||||
*/
|
||||
int
|
||||
vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
|
||||
uint32_t zeros_mask, uint32_t *retval)
|
||||
{
|
||||
int i;
|
||||
uint64_t val, trueval;
|
||||
boolean_t true_ctls_avail, one_allowed, zero_allowed;
|
||||
|
||||
/* We cannot ask the same bit to be set to both '1' and '0' */
|
||||
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
|
||||
return (EINVAL);
|
||||
|
||||
if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
|
||||
true_ctls_avail = TRUE;
|
||||
else
|
||||
true_ctls_avail = FALSE;
|
||||
|
||||
val = rdmsr(ctl_reg);
|
||||
if (true_ctls_avail)
|
||||
trueval = rdmsr(true_ctl_reg); /* step c */
|
||||
else
|
||||
trueval = val; /* step a */
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
one_allowed = vmx_ctl_allows_one_setting(trueval, i);
|
||||
zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
|
||||
|
||||
KASSERT(one_allowed || zero_allowed,
|
||||
("invalid zero/one setting for bit %d of ctl 0x%0x, "
|
||||
"truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
|
||||
|
||||
if (zero_allowed && !one_allowed) { /* b(i),c(i) */
|
||||
if (ones_mask & (1 << i))
|
||||
return (EINVAL);
|
||||
*retval &= ~(1 << i);
|
||||
} else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
|
||||
if (zeros_mask & (1 << i))
|
||||
return (EINVAL);
|
||||
*retval |= 1 << i;
|
||||
} else {
|
||||
if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
|
||||
*retval &= ~(1 << i);
|
||||
else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
|
||||
*retval |= 1 << i;
|
||||
else if (!true_ctls_avail)
|
||||
*retval &= ~(1 << i); /* b(iii) */
|
||||
else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
|
||||
*retval &= ~(1 << i);
|
||||
else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
|
||||
*retval |= 1 << i;
|
||||
else {
|
||||
panic("vmx_set_ctlreg: unable to determine "
|
||||
"correct value of ctl bit %d for msr "
|
||||
"0x%0x and true msr 0x%0x", i, ctl_reg,
|
||||
true_ctl_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
msr_bitmap_initialize(char *bitmap)
|
||||
{
|
||||
|
||||
memset(bitmap, 0xff, PAGE_SIZE);
|
||||
}
|
||||
|
||||
int
|
||||
msr_bitmap_change_access(char *bitmap, u_int msr, int access)
|
||||
{
|
||||
int byte, bit;
|
||||
|
||||
if (msr >= 0x00000000 && msr <= 0x00001FFF)
|
||||
byte = msr / 8;
|
||||
else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
|
||||
byte = 1024 + (msr - 0xC0000000) / 8;
|
||||
else
|
||||
return (EINVAL);
|
||||
|
||||
bit = msr & 0x7;
|
||||
|
||||
if (access & MSR_BITMAP_ACCESS_READ)
|
||||
bitmap[byte] &= ~(1 << bit);
|
||||
else
|
||||
bitmap[byte] |= 1 << bit;
|
||||
|
||||
byte += 2048;
|
||||
if (access & MSR_BITMAP_ACCESS_WRITE)
|
||||
bitmap[byte] &= ~(1 << bit);
|
||||
else
|
||||
bitmap[byte] |= 1 << bit;
|
||||
|
||||
return (0);
|
||||
}
|
78
sys/amd64/vmm/intel/vmx_msr.h
Normal file
78
sys/amd64/vmm/intel/vmx_msr.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMX_MSR_H_
|
||||
#define _VMX_MSR_H_
|
||||
|
||||
#define MSR_VMX_BASIC 0x480
|
||||
#define MSR_VMX_EPT_VPID_CAP 0x48C
|
||||
|
||||
#define MSR_VMX_PROCBASED_CTLS 0x482
|
||||
#define MSR_VMX_TRUE_PROCBASED_CTLS 0x48E
|
||||
|
||||
#define MSR_VMX_PINBASED_CTLS 0x481
|
||||
#define MSR_VMX_TRUE_PINBASED_CTLS 0x48D
|
||||
|
||||
#define MSR_VMX_PROCBASED_CTLS2 0x48B
|
||||
|
||||
#define MSR_VMX_EXIT_CTLS 0x483
|
||||
#define MSR_VMX_TRUE_EXIT_CTLS 0x48f
|
||||
|
||||
#define MSR_VMX_ENTRY_CTLS 0x484
|
||||
#define MSR_VMX_TRUE_ENTRY_CTLS 0x490
|
||||
|
||||
#define MSR_VMX_CR0_FIXED0 0x486
|
||||
#define MSR_VMX_CR0_FIXED1 0x487
|
||||
|
||||
#define MSR_VMX_CR4_FIXED0 0x488
|
||||
#define MSR_VMX_CR4_FIXED1 0x489
|
||||
|
||||
uint32_t vmx_revision(void);
|
||||
|
||||
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
|
||||
uint32_t zeros_mask, uint32_t *retval);
|
||||
|
||||
/*
|
||||
* According to Section 21.10.4 "Software Access to Related Structures",
|
||||
* changes to data structures pointed to by the VMCS must be made only when
|
||||
* there is no logical processor with a current VMCS that points to the
|
||||
* data structure.
|
||||
*
|
||||
* This pretty much limits us to configuring the MSR bitmap before VMCS
|
||||
* initialization for SMP VMs. Unless of course we do it the hard way - which
|
||||
* would involve some form of synchronization between the vcpus to vmclear
|
||||
* all VMCSs' that point to the bitmap.
|
||||
*/
|
||||
#define MSR_BITMAP_ACCESS_NONE 0x0
|
||||
#define MSR_BITMAP_ACCESS_READ 0x1
|
||||
#define MSR_BITMAP_ACCESS_WRITE 0x2
|
||||
#define MSR_BITMAP_ACCESS_RW (MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
|
||||
void msr_bitmap_initialize(char *bitmap);
|
||||
int msr_bitmap_change_access(char *bitmap, u_int msr, int access);
|
||||
|
||||
#endif
|
204
sys/amd64/vmm/intel/vmx_support.S
Normal file
204
sys/amd64/vmm/intel/vmx_support.S
Normal file
@ -0,0 +1,204 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
#include "vmx_assym.s"
|
||||
|
||||
/*
|
||||
* Assumes that %rdi holds a pointer to the 'vmxctx'
|
||||
*/
|
||||
#define VMX_GUEST_RESTORE \
|
||||
/* \
|
||||
* Make sure that interrupts are disabled before restoring CR2. \
|
||||
* Otherwise there could be a page fault during the interrupt \
|
||||
* handler execution that would end up trashing CR2. \
|
||||
*/ \
|
||||
cli; \
|
||||
movq VMXCTX_GUEST_CR2(%rdi),%rsi; \
|
||||
movq %rsi,%cr2; \
|
||||
movq VMXCTX_GUEST_RSI(%rdi),%rsi; \
|
||||
movq VMXCTX_GUEST_RDX(%rdi),%rdx; \
|
||||
movq VMXCTX_GUEST_RCX(%rdi),%rcx; \
|
||||
movq VMXCTX_GUEST_R8(%rdi),%r8; \
|
||||
movq VMXCTX_GUEST_R9(%rdi),%r9; \
|
||||
movq VMXCTX_GUEST_RAX(%rdi),%rax; \
|
||||
movq VMXCTX_GUEST_RBX(%rdi),%rbx; \
|
||||
movq VMXCTX_GUEST_RBP(%rdi),%rbp; \
|
||||
movq VMXCTX_GUEST_R10(%rdi),%r10; \
|
||||
movq VMXCTX_GUEST_R11(%rdi),%r11; \
|
||||
movq VMXCTX_GUEST_R12(%rdi),%r12; \
|
||||
movq VMXCTX_GUEST_R13(%rdi),%r13; \
|
||||
movq VMXCTX_GUEST_R14(%rdi),%r14; \
|
||||
movq VMXCTX_GUEST_R15(%rdi),%r15; \
|
||||
movq VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
|
||||
|
||||
#define VM_INSTRUCTION_ERROR(reg) \
|
||||
jnc 1f; \
|
||||
movl $VM_FAIL_INVALID,reg; /* CF is set */ \
|
||||
jmp 3f; \
|
||||
1: jnz 2f; \
|
||||
movl $VM_FAIL_VALID,reg; /* ZF is set */ \
|
||||
jmp 3f; \
|
||||
2: movl $VM_SUCCESS,reg; \
|
||||
3: movl reg,VMXCTX_LAUNCH_ERROR(%rsp)
|
||||
|
||||
.text
|
||||
/*
|
||||
* int vmx_setjmp(ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Return value is '0' when it returns directly from here.
|
||||
* Return value is '1' when it returns after a vm exit through vmx_longjmp.
|
||||
*/
|
||||
ENTRY(vmx_setjmp)
|
||||
movq (%rsp),%rax /* return address */
|
||||
movq %r15,VMXCTX_HOST_R15(%rdi)
|
||||
movq %r14,VMXCTX_HOST_R14(%rdi)
|
||||
movq %r13,VMXCTX_HOST_R13(%rdi)
|
||||
movq %r12,VMXCTX_HOST_R12(%rdi)
|
||||
movq %rbp,VMXCTX_HOST_RBP(%rdi)
|
||||
movq %rsp,VMXCTX_HOST_RSP(%rdi)
|
||||
movq %rbx,VMXCTX_HOST_RBX(%rdi)
|
||||
movq %rax,VMXCTX_HOST_RIP(%rdi)
|
||||
|
||||
/*
|
||||
* XXX save host debug registers
|
||||
*/
|
||||
movl $VMX_RETURN_DIRECT,%eax
|
||||
ret
|
||||
END(vmx_setjmp)
|
||||
|
||||
/*
|
||||
* void vmx_return(struct vmxctx *ctxp, int retval)
|
||||
* %rdi = ctxp
|
||||
* %rsi = retval
|
||||
* Return to vmm context through vmx_setjmp() with a value of 'retval'.
|
||||
*/
|
||||
ENTRY(vmx_return)
|
||||
/* Restore host context. */
|
||||
movq VMXCTX_HOST_R15(%rdi),%r15
|
||||
movq VMXCTX_HOST_R14(%rdi),%r14
|
||||
movq VMXCTX_HOST_R13(%rdi),%r13
|
||||
movq VMXCTX_HOST_R12(%rdi),%r12
|
||||
movq VMXCTX_HOST_RBP(%rdi),%rbp
|
||||
movq VMXCTX_HOST_RSP(%rdi),%rsp
|
||||
movq VMXCTX_HOST_RBX(%rdi),%rbx
|
||||
movq VMXCTX_HOST_RIP(%rdi),%rax
|
||||
movq %rax,(%rsp) /* return address */
|
||||
|
||||
/*
|
||||
* XXX restore host debug registers
|
||||
*/
|
||||
movl %esi,%eax
|
||||
ret
|
||||
END(vmx_return)
|
||||
|
||||
/*
|
||||
* void vmx_longjmp(void)
|
||||
* %rsp points to the struct vmxctx
|
||||
*/
|
||||
ENTRY(vmx_longjmp)
|
||||
/*
|
||||
* Save guest state that is not automatically saved in the vmcs.
|
||||
*/
|
||||
movq %rdi,VMXCTX_GUEST_RDI(%rsp)
|
||||
movq %rsi,VMXCTX_GUEST_RSI(%rsp)
|
||||
movq %rdx,VMXCTX_GUEST_RDX(%rsp)
|
||||
movq %rcx,VMXCTX_GUEST_RCX(%rsp)
|
||||
movq %r8,VMXCTX_GUEST_R8(%rsp)
|
||||
movq %r9,VMXCTX_GUEST_R9(%rsp)
|
||||
movq %rax,VMXCTX_GUEST_RAX(%rsp)
|
||||
movq %rbx,VMXCTX_GUEST_RBX(%rsp)
|
||||
movq %rbp,VMXCTX_GUEST_RBP(%rsp)
|
||||
movq %r10,VMXCTX_GUEST_R10(%rsp)
|
||||
movq %r11,VMXCTX_GUEST_R11(%rsp)
|
||||
movq %r12,VMXCTX_GUEST_R12(%rsp)
|
||||
movq %r13,VMXCTX_GUEST_R13(%rsp)
|
||||
movq %r14,VMXCTX_GUEST_R14(%rsp)
|
||||
movq %r15,VMXCTX_GUEST_R15(%rsp)
|
||||
|
||||
movq %cr2,%rdi
|
||||
movq %rdi,VMXCTX_GUEST_CR2(%rsp)
|
||||
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_LONGJMP,%rsi
|
||||
callq vmx_return
|
||||
END(vmx_longjmp)
|
||||
|
||||
/*
|
||||
* void vmx_resume(struct vmxctx *ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Although the return type is a 'void' this function may return indirectly
|
||||
* through vmx_setjmp() with a return value of 2.
|
||||
*/
|
||||
ENTRY(vmx_resume)
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
VMX_GUEST_RESTORE
|
||||
|
||||
vmresume
|
||||
|
||||
/*
|
||||
* Capture the reason why vmresume failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_VMRESUME,%rsi
|
||||
callq vmx_return
|
||||
END(vmx_resume)
|
||||
|
||||
/*
|
||||
* void vmx_launch(struct vmxctx *ctxp)
|
||||
* %rdi = ctxp
|
||||
*
|
||||
* Although the return type is a 'void' this function may return indirectly
|
||||
* through vmx_setjmp() with a return value of 3.
|
||||
*/
|
||||
ENTRY(vmx_launch)
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
VMX_GUEST_RESTORE
|
||||
|
||||
vmlaunch
|
||||
|
||||
/*
|
||||
* Capture the reason why vmlaunch failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
|
||||
movq %rsp,%rdi
|
||||
movq $VMX_RETURN_VMLAUNCH,%rsi
|
||||
callq vmx_return
|
||||
END(vmx_launch)
|
637
sys/amd64/vmm/intel/vtd.c
Normal file
637
sys/amd64/vmm/intel/vtd.c
Normal file
@ -0,0 +1,637 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/pci_cfgreg.h>
|
||||
|
||||
#include "io/iommu.h"
|
||||
|
||||
/*
|
||||
* Documented in the "Intel Virtualization Technology for Directed I/O",
|
||||
* Architecture Spec, September 2008.
|
||||
*/
|
||||
|
||||
/* Section 10.4 "Register Descriptions" */
|
||||
struct vtdmap {
|
||||
volatile uint32_t version;
|
||||
volatile uint32_t res0;
|
||||
volatile uint64_t cap;
|
||||
volatile uint64_t ext_cap;
|
||||
volatile uint32_t gcr;
|
||||
volatile uint32_t gsr;
|
||||
volatile uint64_t rta;
|
||||
volatile uint64_t ccr;
|
||||
};
|
||||
|
||||
#define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
|
||||
#define VTD_CAP_ND(cap) ((cap) & 0x7)
|
||||
#define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
|
||||
#define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
|
||||
#define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
|
||||
|
||||
#define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
|
||||
#define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
|
||||
#define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
|
||||
|
||||
#define VTD_GCR_WBF (1 << 27)
|
||||
#define VTD_GCR_SRTP (1 << 30)
|
||||
#define VTD_GCR_TE (1 << 31)
|
||||
|
||||
#define VTD_GSR_WBFS (1 << 27)
|
||||
#define VTD_GSR_RTPS (1 << 30)
|
||||
#define VTD_GSR_TES (1 << 31)
|
||||
|
||||
#define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */
|
||||
#define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */
|
||||
|
||||
#define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */
|
||||
#define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */
|
||||
#define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */
|
||||
#define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */
|
||||
#define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */
|
||||
#define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */
|
||||
#define VTD_IIR_DOMAIN_P 32
|
||||
|
||||
#define VTD_ROOT_PRESENT 0x1
|
||||
#define VTD_CTX_PRESENT 0x1
|
||||
#define VTD_CTX_TT_ALL (1UL << 2)
|
||||
|
||||
#define VTD_PTE_RD (1UL << 0)
|
||||
#define VTD_PTE_WR (1UL << 1)
|
||||
#define VTD_PTE_SUPERPAGE (1UL << 7)
|
||||
#define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
|
||||
|
||||
struct domain {
|
||||
uint64_t *ptp; /* first level page table page */
|
||||
int pt_levels; /* number of page table levels */
|
||||
int addrwidth; /* 'AW' field in context entry */
|
||||
int spsmask; /* supported super page sizes */
|
||||
u_int id; /* domain id */
|
||||
vm_paddr_t maxaddr; /* highest address to be mapped */
|
||||
SLIST_ENTRY(domain) next;
|
||||
};
|
||||
|
||||
static SLIST_HEAD(, domain) domhead;
|
||||
|
||||
#define DRHD_MAX_UNITS 8
|
||||
static int drhd_num;
|
||||
static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
|
||||
static int max_domains;
|
||||
typedef int (*drhd_ident_func_t)(void);
|
||||
|
||||
static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
|
||||
static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
|
||||
|
||||
static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
|
||||
|
||||
/*
|
||||
* Config space register definitions from the "Intel 5520 and 5500" datasheet.
|
||||
*/
|
||||
static int
|
||||
tylersburg_vtd_ident(void)
|
||||
{
|
||||
int units, nlbus;
|
||||
uint16_t did, vid;
|
||||
uint32_t miscsts, vtbar;
|
||||
|
||||
const int bus = 0;
|
||||
const int slot = 20;
|
||||
const int func = 0;
|
||||
|
||||
units = 0;
|
||||
|
||||
vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
|
||||
did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
|
||||
if (vid != 0x8086 || did != 0x342E)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Check if this is a dual IOH configuration.
|
||||
*/
|
||||
miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
|
||||
if (miscsts & (1 << 25))
|
||||
nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
|
||||
else
|
||||
nlbus = -1;
|
||||
|
||||
vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
|
||||
if (vtbar & 0x1) {
|
||||
vtdmaps[units++] = (struct vtdmap *)
|
||||
PHYS_TO_DMAP(vtbar & 0xffffe000);
|
||||
} else if (bootverbose)
|
||||
printf("VT-d unit in legacy IOH is disabled!\n");
|
||||
|
||||
if (nlbus != -1) {
|
||||
vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
|
||||
if (vtbar & 0x1) {
|
||||
vtdmaps[units++] = (struct vtdmap *)
|
||||
PHYS_TO_DMAP(vtbar & 0xffffe000);
|
||||
} else if (bootverbose)
|
||||
printf("VT-d unit in non-legacy IOH is disabled!\n");
|
||||
}
|
||||
done:
|
||||
return (units);
|
||||
}
|
||||
|
||||
static drhd_ident_func_t drhd_ident_funcs[] = {
|
||||
tylersburg_vtd_ident,
|
||||
NULL
|
||||
};
|
||||
|
||||
static int
|
||||
vtd_max_domains(struct vtdmap *vtdmap)
|
||||
{
|
||||
int nd;
|
||||
|
||||
nd = VTD_CAP_ND(vtdmap->cap);
|
||||
|
||||
switch (nd) {
|
||||
case 0:
|
||||
return (16);
|
||||
case 1:
|
||||
return (64);
|
||||
case 2:
|
||||
return (256);
|
||||
case 3:
|
||||
return (1024);
|
||||
case 4:
|
||||
return (4 * 1024);
|
||||
case 5:
|
||||
return (16 * 1024);
|
||||
case 6:
|
||||
return (64 * 1024);
|
||||
default:
|
||||
panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
|
||||
}
|
||||
}
|
||||
|
||||
static u_int
|
||||
domain_id(void)
|
||||
{
|
||||
u_int id;
|
||||
struct domain *dom;
|
||||
|
||||
/* Skip domain id 0 - it is reserved when Caching Mode field is set */
|
||||
for (id = 1; id < max_domains; id++) {
|
||||
SLIST_FOREACH(dom, &domhead, next) {
|
||||
if (dom->id == id)
|
||||
break;
|
||||
}
|
||||
if (dom == NULL)
|
||||
break; /* found it */
|
||||
}
|
||||
|
||||
if (id >= max_domains)
|
||||
panic("domain ids exhausted");
|
||||
|
||||
return (id);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_wbflush(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
|
||||
pmap_invalidate_cache();
|
||||
|
||||
if (VTD_CAP_RWBF(vtdmap->cap)) {
|
||||
vtdmap->gcr = VTD_GCR_WBF;
|
||||
while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
|
||||
while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
|
||||
;
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
|
||||
{
|
||||
int offset;
|
||||
volatile uint64_t *iotlb_reg, val;
|
||||
|
||||
vtd_wbflush(vtdmap);
|
||||
|
||||
offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
|
||||
iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
|
||||
|
||||
*iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
|
||||
VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
|
||||
|
||||
while (1) {
|
||||
val = *iotlb_reg;
|
||||
if ((val & VTD_IIR_IVT) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_translation_enable(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->gcr = VTD_GCR_TE;
|
||||
while ((vtdmap->gsr & VTD_GSR_TES) == 0)
|
||||
;
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_translation_disable(struct vtdmap *vtdmap)
|
||||
{
|
||||
|
||||
vtdmap->gcr = 0;
|
||||
while ((vtdmap->gsr & VTD_GSR_TES) != 0)
|
||||
;
|
||||
}
|
||||
|
||||
static int
|
||||
vtd_init(void)
|
||||
{
|
||||
int i, units;
|
||||
struct vtdmap *vtdmap;
|
||||
vm_paddr_t ctx_paddr;
|
||||
|
||||
for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
|
||||
units = (*drhd_ident_funcs[i])();
|
||||
if (units > 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (units <= 0)
|
||||
return (ENXIO);
|
||||
|
||||
drhd_num = units;
|
||||
vtdmap = vtdmaps[0];
|
||||
|
||||
if (VTD_CAP_CM(vtdmap->cap) != 0)
|
||||
panic("vtd_init: invalid caching mode");
|
||||
|
||||
max_domains = vtd_max_domains(vtdmap);
|
||||
|
||||
/*
|
||||
* Set up the root-table to point to the context-entry tables
|
||||
*/
|
||||
for (i = 0; i < 256; i++) {
|
||||
ctx_paddr = vtophys(ctx_tables[i]);
|
||||
if (ctx_paddr & PAGE_MASK)
|
||||
panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
|
||||
|
||||
root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_cleanup(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_enable(void)
|
||||
{
|
||||
int i;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_wbflush(vtdmap);
|
||||
|
||||
/* Update the root table address */
|
||||
vtdmap->rta = vtophys(root_table);
|
||||
vtdmap->gcr = VTD_GCR_SRTP;
|
||||
while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
|
||||
;
|
||||
|
||||
vtd_ctx_global_invalidate(vtdmap);
|
||||
vtd_iotlb_global_invalidate(vtdmap);
|
||||
|
||||
vtd_translation_enable(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_disable(void)
|
||||
{
|
||||
int i;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_translation_disable(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_add_device(void *arg, int bus, int slot, int func)
|
||||
{
|
||||
int idx;
|
||||
uint64_t *ctxp;
|
||||
struct domain *dom = arg;
|
||||
vm_paddr_t pt_paddr;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (bus < 0 || bus > PCI_BUSMAX ||
|
||||
slot < 0 || slot > PCI_SLOTMAX ||
|
||||
func < 0 || func > PCI_FUNCMAX)
|
||||
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
|
||||
|
||||
vtdmap = vtdmaps[0];
|
||||
ctxp = ctx_tables[bus];
|
||||
pt_paddr = vtophys(dom->ptp);
|
||||
idx = (slot << 3 | func) * 2;
|
||||
|
||||
if (ctxp[idx] & VTD_CTX_PRESENT) {
|
||||
panic("vtd_add_device: device %d/%d/%d is already owned by "
|
||||
"domain %d", bus, slot, func,
|
||||
(uint16_t)(ctxp[idx + 1] >> 8));
|
||||
}
|
||||
|
||||
/*
|
||||
* Order is important. The 'present' bit is set only after all fields
|
||||
* of the context pointer are initialized.
|
||||
*/
|
||||
ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
|
||||
|
||||
if (VTD_ECAP_DI(vtdmap->ext_cap))
|
||||
ctxp[idx] = VTD_CTX_TT_ALL;
|
||||
else
|
||||
ctxp[idx] = 0;
|
||||
|
||||
ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
|
||||
|
||||
/*
|
||||
* 'Not Present' entries are not cached in either the Context Cache
|
||||
* or in the IOTLB, so there is no need to invalidate either of them.
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_remove_device(void *arg, int bus, int slot, int func)
|
||||
{
|
||||
int i, idx;
|
||||
uint64_t *ctxp;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (bus < 0 || bus > PCI_BUSMAX ||
|
||||
slot < 0 || slot > PCI_SLOTMAX ||
|
||||
func < 0 || func > PCI_FUNCMAX)
|
||||
panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
|
||||
|
||||
ctxp = ctx_tables[bus];
|
||||
idx = (slot << 3 | func) * 2;
|
||||
|
||||
/*
|
||||
* Order is important. The 'present' bit is must be cleared first.
|
||||
*/
|
||||
ctxp[idx] = 0;
|
||||
ctxp[idx + 1] = 0;
|
||||
|
||||
/*
|
||||
* Invalidate the Context Cache and the IOTLB.
|
||||
*
|
||||
* XXX use device-selective invalidation for Context Cache
|
||||
* XXX use domain-selective invalidation for IOTLB
|
||||
*/
|
||||
for (i = 0; i < drhd_num; i++) {
|
||||
vtdmap = vtdmaps[i];
|
||||
vtd_ctx_global_invalidate(vtdmap);
|
||||
vtd_iotlb_global_invalidate(vtdmap);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
|
||||
{
|
||||
struct domain *dom;
|
||||
int i, spshift, ptpshift, ptpindex, nlevels;
|
||||
uint64_t spsize, *ptp;
|
||||
|
||||
dom = arg;
|
||||
ptpindex = 0;
|
||||
ptpshift = 0;
|
||||
|
||||
if (gpa & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
|
||||
|
||||
if (hpa & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
|
||||
|
||||
if (len & PAGE_MASK)
|
||||
panic("vtd_create_mapping: unaligned len 0x%0lx", len);
|
||||
|
||||
/*
|
||||
* Compute the size of the mapping that we can accomodate.
|
||||
*
|
||||
* This is based on three factors:
|
||||
* - supported super page size
|
||||
* - alignment of the region starting at 'gpa' and 'hpa'
|
||||
* - length of the region 'len'
|
||||
*/
|
||||
spshift = 48;
|
||||
for (i = 3; i >= 0; i--) {
|
||||
spsize = 1UL << spshift;
|
||||
if ((dom->spsmask & (1 << i)) != 0 &&
|
||||
(gpa & (spsize - 1)) == 0 &&
|
||||
(hpa & (spsize - 1)) == 0 &&
|
||||
(len >= spsize)) {
|
||||
break;
|
||||
}
|
||||
spshift -= 9;
|
||||
}
|
||||
|
||||
ptp = dom->ptp;
|
||||
nlevels = dom->pt_levels;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = 12 + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
/* We have reached the leaf mapping */
|
||||
if (spshift >= ptpshift) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are working on a non-leaf page table page.
|
||||
*
|
||||
* Create a downstream page table page if necessary and point
|
||||
* to it from the current page table.
|
||||
*/
|
||||
if (ptp[ptpindex] == 0) {
|
||||
void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
|
||||
ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
|
||||
}
|
||||
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
|
||||
}
|
||||
|
||||
if ((gpa & ((1UL << ptpshift) - 1)) != 0)
|
||||
panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
|
||||
|
||||
/*
|
||||
* Create a 'gpa' -> 'hpa' mapping
|
||||
*/
|
||||
ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
|
||||
|
||||
if (nlevels > 0)
|
||||
ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
|
||||
|
||||
return (1UL << ptpshift);
|
||||
}
|
||||
|
||||
static void *
|
||||
vtd_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
struct domain *dom;
|
||||
vm_paddr_t addr;
|
||||
int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
|
||||
struct vtdmap *vtdmap;
|
||||
|
||||
if (drhd_num <= 0)
|
||||
panic("vtd_create_domain: no dma remapping hardware available");
|
||||
|
||||
vtdmap = vtdmaps[0];
|
||||
|
||||
/*
|
||||
* Calculate AGAW.
|
||||
* Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
|
||||
*/
|
||||
addr = 0;
|
||||
for (gaw = 0; addr < maxaddr; gaw++)
|
||||
addr = 1ULL << gaw;
|
||||
|
||||
res = (gaw - 12) % 9;
|
||||
if (res == 0)
|
||||
agaw = gaw;
|
||||
else
|
||||
agaw = gaw + 9 - res;
|
||||
|
||||
if (agaw > 64)
|
||||
agaw = 64;
|
||||
|
||||
/*
|
||||
* Select the smallest Supported AGAW and the corresponding number
|
||||
* of page table levels.
|
||||
*/
|
||||
pt_levels = 2;
|
||||
sagaw = 30;
|
||||
addrwidth = 0;
|
||||
tmp = VTD_CAP_SAGAW(vtdmap->cap);
|
||||
for (i = 0; i < 5; i++) {
|
||||
if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
|
||||
break;
|
||||
pt_levels++;
|
||||
addrwidth++;
|
||||
sagaw += 9;
|
||||
if (sagaw > 64)
|
||||
sagaw = 64;
|
||||
}
|
||||
|
||||
if (i >= 5) {
|
||||
panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
|
||||
VTD_CAP_SAGAW(vtdmap->cap), agaw);
|
||||
}
|
||||
|
||||
dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
|
||||
dom->pt_levels = pt_levels;
|
||||
dom->addrwidth = addrwidth;
|
||||
dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
|
||||
dom->id = domain_id();
|
||||
dom->maxaddr = maxaddr;
|
||||
dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
|
||||
if ((uintptr_t)dom->ptp & PAGE_MASK)
|
||||
panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
|
||||
|
||||
SLIST_INSERT_HEAD(&domhead, dom, next);
|
||||
|
||||
return (dom);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_free_ptp(uint64_t *ptp, int level)
|
||||
{
|
||||
int i;
|
||||
uint64_t *nlp;
|
||||
|
||||
if (level > 1) {
|
||||
for (i = 0; i < 512; i++) {
|
||||
if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
|
||||
continue;
|
||||
if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
|
||||
continue;
|
||||
nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
|
||||
vtd_free_ptp(nlp, level - 1);
|
||||
}
|
||||
}
|
||||
|
||||
bzero(ptp, PAGE_SIZE);
|
||||
free(ptp, M_VTD);
|
||||
}
|
||||
|
||||
static void
|
||||
vtd_destroy_domain(void *arg)
|
||||
{
|
||||
struct domain *dom;
|
||||
|
||||
dom = arg;
|
||||
|
||||
SLIST_REMOVE(&domhead, dom, domain, next);
|
||||
vtd_free_ptp(dom->ptp, dom->pt_levels);
|
||||
free(dom, M_VTD);
|
||||
}
|
||||
|
||||
struct iommu_ops iommu_ops_intel = {
|
||||
vtd_init,
|
||||
vtd_cleanup,
|
||||
vtd_enable,
|
||||
vtd_disable,
|
||||
vtd_create_domain,
|
||||
vtd_destroy_domain,
|
||||
vtd_create_mapping,
|
||||
vtd_add_device,
|
||||
vtd_remove_device,
|
||||
};
|
230
sys/amd64/vmm/io/iommu.c
Normal file
230
sys/amd64/vmm/io/iommu.c
Normal file
@ -0,0 +1,230 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <dev/pci/pcivar.h>
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
#include "iommu.h"
|
||||
|
||||
static boolean_t iommu_avail;
|
||||
static struct iommu_ops *ops;
|
||||
static void *host_domain;
|
||||
|
||||
static __inline int
|
||||
IOMMU_INIT(void)
|
||||
{
|
||||
if (ops != NULL)
|
||||
return ((*ops->init)());
|
||||
else
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_CLEANUP(void)
|
||||
{
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->cleanup)();
|
||||
}
|
||||
|
||||
static __inline void *
|
||||
IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
return ((*ops->create_domain)(maxaddr));
|
||||
else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_DESTROY_DOMAIN(void *dom)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->destroy_domain)(dom);
|
||||
}
|
||||
|
||||
static __inline uint64_t
|
||||
IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
return ((*ops->create_mapping)(domain, gpa, hpa, len));
|
||||
else
|
||||
return (len); /* XXX */
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_ADD_DEVICE(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->add_device)(domain, bus, slot, func);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_REMOVE_DEVICE(void *domain, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->remove_device)(domain, bus, slot, func);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_ENABLE(void)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->enable)();
|
||||
}
|
||||
|
||||
static __inline void
|
||||
IOMMU_DISABLE(void)
|
||||
{
|
||||
|
||||
if (ops != NULL && iommu_avail)
|
||||
(*ops->disable)();
|
||||
}
|
||||
|
||||
void
|
||||
iommu_init(void)
|
||||
{
|
||||
int error, bus, slot, func;
|
||||
vm_paddr_t maxaddr;
|
||||
const char *name;
|
||||
device_t dev;
|
||||
|
||||
if (vmm_is_intel())
|
||||
ops = &iommu_ops_intel;
|
||||
else if (vmm_is_amd())
|
||||
ops = &iommu_ops_amd;
|
||||
else
|
||||
ops = NULL;
|
||||
|
||||
error = IOMMU_INIT();
|
||||
if (error)
|
||||
return;
|
||||
|
||||
iommu_avail = TRUE;
|
||||
|
||||
/*
|
||||
* Create a domain for the devices owned by the host
|
||||
*/
|
||||
maxaddr = ptoa(Maxmem);
|
||||
host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
|
||||
if (host_domain == NULL)
|
||||
panic("iommu_init: unable to create a host domain");
|
||||
|
||||
/*
|
||||
* Create 1:1 mappings from '0' to 'Maxmem' for devices assigned to
|
||||
* the host
|
||||
*/
|
||||
iommu_create_mapping(host_domain, 0, 0, maxaddr);
|
||||
|
||||
for (bus = 0; bus <= PCI_BUSMAX; bus++) {
|
||||
for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
|
||||
for (func = 0; func <= PCI_FUNCMAX; func++) {
|
||||
dev = pci_find_dbsf(0, bus, slot, func);
|
||||
if (dev == NULL)
|
||||
continue;
|
||||
|
||||
/* skip passthrough devices */
|
||||
name = device_get_name(dev);
|
||||
if (name != NULL && strcmp(name, "ppt") == 0)
|
||||
continue;
|
||||
|
||||
/* everything else belongs to the host domain */
|
||||
iommu_add_device(host_domain, bus, slot, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
IOMMU_ENABLE();
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
iommu_cleanup(void)
|
||||
{
|
||||
IOMMU_DISABLE();
|
||||
IOMMU_DESTROY_DOMAIN(host_domain);
|
||||
IOMMU_CLEANUP();
|
||||
}
|
||||
|
||||
void *
|
||||
iommu_create_domain(vm_paddr_t maxaddr)
|
||||
{
|
||||
|
||||
return (IOMMU_CREATE_DOMAIN(maxaddr));
|
||||
}
|
||||
|
||||
void
|
||||
iommu_destroy_domain(void *dom)
|
||||
{
|
||||
|
||||
IOMMU_DESTROY_DOMAIN(dom);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
|
||||
{
|
||||
uint64_t mapped, remaining;
|
||||
|
||||
remaining = len;
|
||||
|
||||
while (remaining > 0) {
|
||||
mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
|
||||
gpa += mapped;
|
||||
hpa += mapped;
|
||||
remaining -= mapped;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
iommu_add_device(void *dom, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
IOMMU_ADD_DEVICE(dom, bus, slot, func);
|
||||
}
|
||||
|
||||
void
|
||||
iommu_remove_device(void *dom, int bus, int slot, int func)
|
||||
{
|
||||
|
||||
IOMMU_REMOVE_DEVICE(dom, bus, slot, func);
|
||||
}
|
67
sys/amd64/vmm/io/iommu.h
Normal file
67
sys/amd64/vmm/io/iommu.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IO_IOMMU_H_
|
||||
#define _IO_IOMMU_H_
|
||||
|
||||
typedef int (*iommu_init_func_t)(void);
|
||||
typedef void (*iommu_cleanup_func_t)(void);
|
||||
typedef void (*iommu_enable_func_t)(void);
|
||||
typedef void (*iommu_disable_func_t)(void);
|
||||
typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr);
|
||||
typedef void (*iommu_destroy_domain_t)(void *domain);
|
||||
typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
|
||||
vm_paddr_t hpa, uint64_t len);
|
||||
typedef void (*iommu_add_device_t)(void *domain, int bus, int slot, int func);
|
||||
typedef void (*iommu_remove_device_t)(void *dom, int bus, int slot, int func);
|
||||
|
||||
struct iommu_ops {
|
||||
iommu_init_func_t init; /* module wide */
|
||||
iommu_cleanup_func_t cleanup;
|
||||
iommu_enable_func_t enable;
|
||||
iommu_disable_func_t disable;
|
||||
|
||||
iommu_create_domain_t create_domain; /* domain-specific */
|
||||
iommu_destroy_domain_t destroy_domain;
|
||||
iommu_create_mapping_t create_mapping;
|
||||
iommu_add_device_t add_device;
|
||||
iommu_remove_device_t remove_device;
|
||||
};
|
||||
|
||||
extern struct iommu_ops iommu_ops_intel;
|
||||
extern struct iommu_ops iommu_ops_amd;
|
||||
|
||||
void iommu_init(void);
|
||||
void iommu_cleanup(void);
|
||||
void *iommu_create_domain(vm_paddr_t maxaddr);
|
||||
void iommu_destroy_domain(void *dom);
|
||||
void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
size_t len);
|
||||
void iommu_add_device(void *dom, int bus, int slot, int func);
|
||||
void iommu_remove_device(void *dom, int bus, int slot, int func);
|
||||
#endif
|
449
sys/amd64/vmm/io/ppt.c
Normal file
449
sys/amd64/vmm/io/ppt.c
Normal file
@ -0,0 +1,449 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/rman.h>
|
||||
|
||||
#include <dev/pci/pcivar.h>
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/resource.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_ktr.h"
|
||||
|
||||
#include "iommu.h"
|
||||
#include "ppt.h"
|
||||
|
||||
#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
|
||||
#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1)
|
||||
#define MAX_MSIMSGS 32
|
||||
|
||||
struct pptintr_arg { /* pptintr(pptintr_arg) */
|
||||
struct pptdev *pptdev;
|
||||
int msg;
|
||||
};
|
||||
|
||||
static struct pptdev {
|
||||
device_t dev;
|
||||
struct vm *vm; /* owner of this device */
|
||||
struct vm_memory_segment mmio[MAX_MMIOSEGS];
|
||||
struct {
|
||||
int num_msgs; /* guest state */
|
||||
int vector;
|
||||
int vcpu;
|
||||
|
||||
int startrid; /* host state */
|
||||
struct resource *res[MAX_MSIMSGS];
|
||||
void *cookie[MAX_MSIMSGS];
|
||||
struct pptintr_arg arg[MAX_MSIMSGS];
|
||||
} msi;
|
||||
} pptdevs[32];
|
||||
|
||||
static int num_pptdevs;
|
||||
|
||||
static int
|
||||
ppt_probe(device_t dev)
|
||||
{
|
||||
int bus, slot, func;
|
||||
struct pci_devinfo *dinfo;
|
||||
|
||||
dinfo = (struct pci_devinfo *)device_get_ivars(dev);
|
||||
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
|
||||
/*
|
||||
* To qualify as a pci passthrough device a device must:
|
||||
* - be allowed by administrator to be used in this role
|
||||
* - be an endpoint device
|
||||
*/
|
||||
if (vmm_is_pptdev(bus, slot, func) &&
|
||||
(dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
|
||||
return (0);
|
||||
else
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
ppt_attach(device_t dev)
|
||||
{
|
||||
int n;
|
||||
|
||||
if (num_pptdevs >= MAX_PPTDEVS) {
|
||||
printf("ppt_attach: maximum number of pci passthrough devices "
|
||||
"exceeded\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
n = num_pptdevs++;
|
||||
pptdevs[n].dev = dev;
|
||||
|
||||
if (bootverbose)
|
||||
device_printf(dev, "attached\n");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ppt_detach(device_t dev)
|
||||
{
|
||||
/*
|
||||
* XXX check whether there are any pci passthrough devices assigned
|
||||
* to guests before we allow this driver to detach.
|
||||
*/
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static device_method_t ppt_methods[] = {
|
||||
/* Device interface */
|
||||
DEVMETHOD(device_probe, ppt_probe),
|
||||
DEVMETHOD(device_attach, ppt_attach),
|
||||
DEVMETHOD(device_detach, ppt_detach),
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
static devclass_t ppt_devclass;
|
||||
DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
|
||||
DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
|
||||
|
||||
static struct pptdev *
|
||||
ppt_find(int bus, int slot, int func)
|
||||
{
|
||||
device_t dev;
|
||||
int i, b, s, f;
|
||||
|
||||
for (i = 0; i < num_pptdevs; i++) {
|
||||
dev = pptdevs[i].dev;
|
||||
b = pci_get_bus(dev);
|
||||
s = pci_get_slot(dev);
|
||||
f = pci_get_function(dev);
|
||||
if (bus == b && slot == s && func == f)
|
||||
return (&pptdevs[i]);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
|
||||
{
|
||||
int i;
|
||||
struct vm_memory_segment *seg;
|
||||
|
||||
for (i = 0; i < MAX_MMIOSEGS; i++) {
|
||||
seg = &ppt->mmio[i];
|
||||
if (seg->len == 0)
|
||||
continue;
|
||||
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
|
||||
bzero(seg, sizeof(struct vm_memory_segment));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ppt_teardown_msi(struct pptdev *ppt)
|
||||
{
|
||||
int i, rid;
|
||||
void *cookie;
|
||||
struct resource *res;
|
||||
|
||||
if (ppt->msi.num_msgs == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ppt->msi.num_msgs; i++) {
|
||||
rid = ppt->msi.startrid + i;
|
||||
res = ppt->msi.res[i];
|
||||
cookie = ppt->msi.cookie[i];
|
||||
|
||||
if (cookie != NULL)
|
||||
bus_teardown_intr(ppt->dev, res, cookie);
|
||||
|
||||
if (res != NULL)
|
||||
bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
|
||||
|
||||
ppt->msi.res[i] = NULL;
|
||||
ppt->msi.cookie[i] = NULL;
|
||||
}
|
||||
|
||||
if (ppt->msi.startrid == 1)
|
||||
pci_release_msi(ppt->dev);
|
||||
|
||||
ppt->msi.num_msgs = 0;
|
||||
}
|
||||
|
||||
int
|
||||
ppt_assign_device(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
/*
|
||||
* If this device is owned by a different VM then we
|
||||
* cannot change its owner.
|
||||
*/
|
||||
if (ppt->vm != NULL && ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
ppt->vm = vm;
|
||||
iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
|
||||
return (0);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
/*
|
||||
* If this device is not owned by this 'vm' then bail out.
|
||||
*/
|
||||
if (ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
ppt_unmap_mmio(vm, ppt);
|
||||
ppt_teardown_msi(ppt);
|
||||
iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
|
||||
ppt->vm = NULL;
|
||||
return (0);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_unassign_all(struct vm *vm)
|
||||
{
|
||||
int i, bus, slot, func;
|
||||
device_t dev;
|
||||
|
||||
for (i = 0; i < num_pptdevs; i++) {
|
||||
if (pptdevs[i].vm == vm) {
|
||||
dev = pptdevs[i].dev;
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
ppt_unassign_device(vm, bus, slot, func);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
int i, error;
|
||||
struct vm_memory_segment *seg;
|
||||
struct pptdev *ppt;
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt != NULL) {
|
||||
if (ppt->vm != vm)
|
||||
return (EBUSY);
|
||||
|
||||
for (i = 0; i < MAX_MMIOSEGS; i++) {
|
||||
seg = &ppt->mmio[i];
|
||||
if (seg->len == 0) {
|
||||
error = vm_map_mmio(vm, gpa, len, hpa);
|
||||
if (error == 0) {
|
||||
seg->gpa = gpa;
|
||||
seg->len = len;
|
||||
seg->hpa = hpa;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
return (ENOSPC);
|
||||
}
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
static int
|
||||
pptintr(void *arg)
|
||||
{
|
||||
int vec;
|
||||
struct pptdev *ppt;
|
||||
struct pptintr_arg *pptarg;
|
||||
|
||||
pptarg = arg;
|
||||
ppt = pptarg->pptdev;
|
||||
vec = ppt->msi.vector + pptarg->msg;
|
||||
|
||||
if (ppt->vm != NULL)
|
||||
(void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec);
|
||||
else {
|
||||
/*
|
||||
* XXX
|
||||
* This is not expected to happen - panic?
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* For legacy interrupts give other filters a chance in case
|
||||
* the interrupt was not generated by the passthrough device.
|
||||
*/
|
||||
if (ppt->msi.startrid == 0)
|
||||
return (FILTER_STRAY);
|
||||
else
|
||||
return (FILTER_HANDLED);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* When we try to free the MSI resource the kernel will bind the thread to
|
||||
* the host cpu was originally handling the MSI. The function freeing the
|
||||
* MSI vector (apic_free_vector()) will panic the kernel if the thread
|
||||
* is already bound to a cpu.
|
||||
*
|
||||
* So, we temporarily unbind the vcpu thread before freeing the MSI resource.
|
||||
*/
|
||||
static void
|
||||
PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
|
||||
{
|
||||
int pincpu = -1;
|
||||
|
||||
vm_get_pinning(vm, vcpu, &pincpu);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, -1);
|
||||
|
||||
ppt_teardown_msi(ppt);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, pincpu);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
{
|
||||
int i, rid, flags;
|
||||
int msi_count, startrid, error, tmp;
|
||||
struct pptdev *ppt;
|
||||
|
||||
if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
|
||||
(vector < 0 || vector > 255) ||
|
||||
(numvec < 0 || numvec > MAX_MSIMSGS))
|
||||
return (EINVAL);
|
||||
|
||||
ppt = ppt_find(bus, slot, func);
|
||||
if (ppt == NULL)
|
||||
return (ENOENT);
|
||||
if (ppt->vm != vm) /* Make sure we own this device */
|
||||
return (EBUSY);
|
||||
|
||||
/* Free any allocated resources */
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
|
||||
if (numvec == 0) /* nothing more to do */
|
||||
return (0);
|
||||
|
||||
flags = RF_ACTIVE;
|
||||
msi_count = pci_msi_count(ppt->dev);
|
||||
if (msi_count == 0) {
|
||||
startrid = 0; /* legacy interrupt */
|
||||
msi_count = 1;
|
||||
flags |= RF_SHAREABLE;
|
||||
} else
|
||||
startrid = 1; /* MSI */
|
||||
|
||||
/*
|
||||
* The device must be capable of supporting the number of vectors
|
||||
* the guest wants to allocate.
|
||||
*/
|
||||
if (numvec > msi_count)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* Make sure that we can allocate all the MSI vectors that are needed
|
||||
* by the guest.
|
||||
*/
|
||||
if (startrid == 1) {
|
||||
tmp = numvec;
|
||||
error = pci_alloc_msi(ppt->dev, &tmp);
|
||||
if (error)
|
||||
return (error);
|
||||
else if (tmp != numvec) {
|
||||
pci_release_msi(ppt->dev);
|
||||
return (ENOSPC);
|
||||
} else {
|
||||
/* success */
|
||||
}
|
||||
}
|
||||
|
||||
ppt->msi.vector = vector;
|
||||
ppt->msi.vcpu = destcpu;
|
||||
ppt->msi.startrid = startrid;
|
||||
|
||||
/*
|
||||
* Allocate the irq resource and attach it to the interrupt handler.
|
||||
*/
|
||||
for (i = 0; i < numvec; i++) {
|
||||
ppt->msi.num_msgs = i + 1;
|
||||
ppt->msi.cookie[i] = NULL;
|
||||
|
||||
rid = startrid + i;
|
||||
ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
|
||||
&rid, flags);
|
||||
if (ppt->msi.res[i] == NULL)
|
||||
break;
|
||||
|
||||
ppt->msi.arg[i].pptdev = ppt;
|
||||
ppt->msi.arg[i].msg = i;
|
||||
|
||||
error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
|
||||
INTR_TYPE_NET | INTR_MPSAFE | INTR_FAST,
|
||||
pptintr, NULL, &ppt->msi.arg[i],
|
||||
&ppt->msi.cookie[i]);
|
||||
if (error != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < numvec) {
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
40
sys/amd64/vmm/io/ppt.h
Normal file
40
sys/amd64/vmm/io/ppt.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IO_PPT_H_
|
||||
#define _IO_PPT_H_
|
||||
|
||||
int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_all(struct vm *vm);
|
||||
int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec);
|
||||
|
||||
#endif
|
270
sys/amd64/vmm/io/vdev.c
Normal file
270
sys/amd64/vmm/io/vdev.c
Normal file
@ -0,0 +1,270 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
|
||||
#include "vdev.h"
|
||||
|
||||
struct vdev {
|
||||
SLIST_ENTRY(vdev) entry;
|
||||
struct vdev_ops *ops;
|
||||
void *dev;
|
||||
};
|
||||
static SLIST_HEAD(, vdev) vdev_head;
|
||||
static int vdev_count;
|
||||
|
||||
struct vdev_region {
|
||||
SLIST_ENTRY(vdev_region) entry;
|
||||
struct vdev_ops *ops;
|
||||
void *dev;
|
||||
struct io_region *io;
|
||||
};
|
||||
static SLIST_HEAD(, vdev_region) region_head;
|
||||
static int region_count;
|
||||
|
||||
static MALLOC_DEFINE(M_VDEV, "vdev", "vdev");
|
||||
|
||||
#define VDEV_INIT (0)
|
||||
#define VDEV_RESET (1)
|
||||
#define VDEV_HALT (2)
|
||||
|
||||
// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"};
|
||||
|
||||
static int
|
||||
vdev_system_event(int event)
|
||||
{
|
||||
struct vdev *vd;
|
||||
int rc;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_FOREACH(vd, &vdev_head, entry) {
|
||||
// printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name);
|
||||
switch (event) {
|
||||
case VDEV_INIT:
|
||||
rc = vd->ops->init(vd->dev);
|
||||
break;
|
||||
case VDEV_RESET:
|
||||
rc = vd->ops->reset(vd->dev);
|
||||
break;
|
||||
case VDEV_HALT:
|
||||
rc = vd->ops->halt(vd->dev);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (rc) {
|
||||
printf("vdev %s init failed rc=%d\n",
|
||||
vd->ops->name, rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_init(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_INIT);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_reset(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_RESET);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_halt(void)
|
||||
{
|
||||
return vdev_system_event(VDEV_HALT);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_vm_init(void)
|
||||
{
|
||||
SLIST_INIT(&vdev_head);
|
||||
vdev_count = 0;
|
||||
|
||||
SLIST_INIT(®ion_head);
|
||||
region_count = 0;
|
||||
}
|
||||
void
|
||||
vdev_vm_cleanup(void)
|
||||
{
|
||||
struct vdev *vd;
|
||||
|
||||
// TODO: locking
|
||||
while (!SLIST_EMPTY(&vdev_head)) {
|
||||
vd = SLIST_FIRST(&vdev_head);
|
||||
SLIST_REMOVE_HEAD(&vdev_head, entry);
|
||||
free(vd, M_VDEV);
|
||||
vdev_count--;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vdev_register(struct vdev_ops *ops, void *dev)
|
||||
{
|
||||
struct vdev *vd;
|
||||
vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO);
|
||||
vd->ops = ops;
|
||||
vd->dev = dev;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_INSERT_HEAD(&vdev_head, vd, entry);
|
||||
vdev_count++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vdev_unregister(void *dev)
|
||||
{
|
||||
struct vdev *vd, *found;
|
||||
|
||||
found = NULL;
|
||||
// TODO: locking
|
||||
SLIST_FOREACH(vd, &vdev_head, entry) {
|
||||
if (vd->dev == dev) {
|
||||
found = vd;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
SLIST_REMOVE(&vdev_head, found, vdev, entry);
|
||||
free(found, M_VDEV);
|
||||
}
|
||||
}
|
||||
|
||||
#define IN_RANGE(val, start, end) \
|
||||
(((val) >= (start)) && ((val) < (end)))
|
||||
|
||||
static struct vdev_region*
|
||||
vdev_find_region(struct io_region *io, void *dev)
|
||||
{
|
||||
struct vdev_region *region, *found;
|
||||
uint64_t region_base;
|
||||
uint64_t region_end;
|
||||
|
||||
found = NULL;
|
||||
|
||||
// TODO: locking
|
||||
// FIXME: we should verify we are in the context the current
|
||||
// vcpu here as well.
|
||||
SLIST_FOREACH(region, ®ion_head, entry) {
|
||||
region_base = region->io->base;
|
||||
region_end = region_base + region->io->len;
|
||||
if (IN_RANGE(io->base, region_base, region_end) &&
|
||||
IN_RANGE(io->base+io->len, region_base, region_end+1) &&
|
||||
(dev && dev == region->dev)) {
|
||||
found = region;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
|
||||
region = vdev_find_region(io, dev);
|
||||
if (region) {
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO);
|
||||
region->io = io;
|
||||
region->ops = ops;
|
||||
region->dev = dev;
|
||||
|
||||
// TODO: locking
|
||||
SLIST_INSERT_HEAD(®ion_head, region, entry);
|
||||
region_count++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vdev_unregister_region(void *dev, struct io_region *io)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
|
||||
region = vdev_find_region(io, dev);
|
||||
|
||||
if (region) {
|
||||
SLIST_REMOVE(®ion_head, region, vdev_region, entry);
|
||||
free(region, M_VDEV);
|
||||
region_count--;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read)
|
||||
{
|
||||
struct vdev_region *region;
|
||||
struct io_region io;
|
||||
region_attr_t attr;
|
||||
int rc;
|
||||
|
||||
io.base = gpa;
|
||||
io.len = size;
|
||||
|
||||
region = vdev_find_region(&io, NULL);
|
||||
if (!region)
|
||||
return -EINVAL;
|
||||
|
||||
attr = (read) ? MMIO_READ : MMIO_WRITE;
|
||||
if (!(region->io->attr & attr))
|
||||
return -EPERM;
|
||||
|
||||
if (read)
|
||||
rc = region->ops->memread(region->dev, gpa, size, data);
|
||||
else
|
||||
rc = region->ops->memwrite(region->dev, gpa, size, *data);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data)
|
||||
{
|
||||
return vdev_memrw(gpa, size, data, 1);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data)
|
||||
{
|
||||
return vdev_memrw(gpa, size, &data, 0);
|
||||
}
|
84
sys/amd64/vmm/io/vdev.h
Normal file
84
sys/amd64/vmm/io/vdev.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VDEV_H_
|
||||
#define _VDEV_H_
|
||||
|
||||
typedef enum {
|
||||
BYTE = 1,
|
||||
WORD = 2,
|
||||
DWORD = 4,
|
||||
QWORD = 8,
|
||||
} opsize_t;
|
||||
|
||||
typedef enum {
|
||||
MMIO_READ = 1,
|
||||
MMIO_WRITE = 2,
|
||||
} region_attr_t;
|
||||
|
||||
struct io_region {
|
||||
uint64_t base;
|
||||
uint64_t len;
|
||||
region_attr_t attr;
|
||||
int vcpu;
|
||||
};
|
||||
|
||||
typedef int (*vdev_init_t)(void* dev);
|
||||
typedef int (*vdev_reset_t)(void* dev);
|
||||
typedef int (*vdev_halt_t)(void* dev);
|
||||
typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data);
|
||||
typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data);
|
||||
|
||||
|
||||
struct vdev_ops {
|
||||
const char *name;
|
||||
vdev_init_t init;
|
||||
vdev_reset_t reset;
|
||||
vdev_halt_t halt;
|
||||
vdev_memread_t memread;
|
||||
vdev_memwrite_t memwrite;
|
||||
};
|
||||
|
||||
|
||||
void vdev_vm_init(void);
|
||||
void vdev_vm_cleanup(void);
|
||||
|
||||
int vdev_register(struct vdev_ops *ops, void *dev);
|
||||
void vdev_unregister(void *dev);
|
||||
|
||||
int vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io);
|
||||
void vdev_unregister_region(void *dev, struct io_region *io);
|
||||
|
||||
int vdev_init(void);
|
||||
int vdev_reset(void);
|
||||
int vdev_halt(void);
|
||||
int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data);
|
||||
int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data);
|
||||
|
||||
#endif /* _VDEV_H_ */
|
||||
|
812
sys/amd64/vmm/io/vlapic.c
Normal file
812
sys/amd64/vmm/io/vlapic.c
Normal file
@ -0,0 +1,812 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/clock.h>
|
||||
#include <machine/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_ktr.h"
|
||||
#include "vdev.h"
|
||||
#include "vlapic.h"
|
||||
|
||||
#define VLAPIC_CTR0(vlapic, format) \
|
||||
VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
|
||||
|
||||
#define VLAPIC_CTR1(vlapic, format, p1) \
|
||||
VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
|
||||
|
||||
#define VLAPIC_CTR_IRR(vlapic, msg) \
|
||||
do { \
|
||||
uint32_t *irrptr = &(vlapic)->apic.irr0; \
|
||||
irrptr[0] = irrptr[0]; /* silence compiler */ \
|
||||
VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \
|
||||
} while (0)
|
||||
|
||||
#define VLAPIC_CTR_ISR(vlapic, msg) \
|
||||
do { \
|
||||
uint32_t *isrptr = &(vlapic)->apic.isr0; \
|
||||
isrptr[0] = isrptr[0]; /* silence compiler */ \
|
||||
VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \
|
||||
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
|
||||
} while (0)
|
||||
|
||||
static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
|
||||
|
||||
#define PRIO(x) ((x) >> 4)
|
||||
|
||||
#define VLAPIC_VERSION (16)
|
||||
#define VLAPIC_MAXLVT_ENTRIES (5)
|
||||
|
||||
struct vlapic {
|
||||
struct vm *vm;
|
||||
int vcpuid;
|
||||
|
||||
struct io_region *mmio;
|
||||
struct vdev_ops *ops;
|
||||
struct LAPIC apic;
|
||||
|
||||
int esr_update;
|
||||
|
||||
int divisor;
|
||||
int ccr_ticks;
|
||||
|
||||
/*
|
||||
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
|
||||
* A vector is popped from the stack when the processor does an EOI.
|
||||
* The vector on the top of the stack is used to compute the
|
||||
* Processor Priority in conjunction with the TPR.
|
||||
*/
|
||||
uint8_t isrvec_stk[ISRVEC_STK_SIZE];
|
||||
int isrvec_stk_top;
|
||||
};
|
||||
|
||||
static void
|
||||
vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_lvt; i++) {
|
||||
*lvts |= APIC_LVT_M;
|
||||
lvts += 4;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static inline void
|
||||
vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
|
||||
{
|
||||
printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
|
||||
*lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
|
||||
*lvt & APIC_LVTT_M);
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint64_t
|
||||
vlapic_get_ccr(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
return lapic->ccr_timer;
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_update_errors(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
lapic->esr = 0; // XXX
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_init_ipi(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
lapic->version = VLAPIC_VERSION;
|
||||
lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
|
||||
lapic->dfr = 0xffffffff;
|
||||
lapic->svr = APIC_SVR_VECTOR;
|
||||
vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_reset(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
|
||||
memset(lapic, 0, sizeof(*lapic));
|
||||
lapic->id = vlapic->vcpuid << 24;
|
||||
lapic->apr = vlapic->vcpuid;
|
||||
vlapic_init_ipi(vlapic);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_init(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
vdev_register_region(vlapic->ops, vlapic, vlapic->mmio);
|
||||
return vlapic_op_reset(dev);
|
||||
}
|
||||
|
||||
static int
|
||||
vlapic_op_halt(void* dev)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
vdev_unregister_region(vlapic, vlapic->mmio);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_set_intr_ready(struct vlapic *vlapic, int vector)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *irrptr;
|
||||
int idx;
|
||||
|
||||
if (vector < 0 || vector >= 256)
|
||||
panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
|
||||
|
||||
idx = (vector / 32) * 4;
|
||||
irrptr = &lapic->irr0;
|
||||
atomic_set_int(&irrptr[idx], 1 << (vector % 32));
|
||||
VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
|
||||
}
|
||||
|
||||
#define VLAPIC_BUS_FREQ tsc_freq
|
||||
#define VLAPIC_DCR(x) ((x->dcr_timer & 0x8) >> 1)|(x->dcr_timer & 0x3)
|
||||
|
||||
static int
|
||||
vlapic_timer_divisor(uint32_t dcr)
|
||||
{
|
||||
switch (dcr & 0xB) {
|
||||
case APIC_TDCR_2:
|
||||
return (2);
|
||||
case APIC_TDCR_4:
|
||||
return (4);
|
||||
case APIC_TDCR_8:
|
||||
return (8);
|
||||
case APIC_TDCR_16:
|
||||
return (16);
|
||||
case APIC_TDCR_32:
|
||||
return (32);
|
||||
case APIC_TDCR_64:
|
||||
return (64);
|
||||
case APIC_TDCR_128:
|
||||
return (128);
|
||||
default:
|
||||
panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
|
||||
{
|
||||
uint32_t icr_timer;
|
||||
|
||||
icr_timer = vlapic->apic.icr_timer;
|
||||
|
||||
vlapic->ccr_ticks = ticks;
|
||||
if (elapsed < icr_timer)
|
||||
vlapic->apic.ccr_timer = icr_timer - elapsed;
|
||||
else {
|
||||
/*
|
||||
* This can happen when the guest is trying to run its local
|
||||
* apic timer higher that the setting of 'hz' in the host.
|
||||
*
|
||||
* We deal with this by running the guest local apic timer
|
||||
* at the rate of the host's 'hz' setting.
|
||||
*/
|
||||
vlapic->apic.ccr_timer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static __inline uint32_t *
|
||||
vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
int i;
|
||||
|
||||
if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
|
||||
panic("vlapic_get_lvt: invalid LVT\n");
|
||||
}
|
||||
i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
|
||||
return ((&lapic->lvt_timer) + i);;
|
||||
}
|
||||
|
||||
#if 1
|
||||
static void
|
||||
dump_isrvec_stk(struct vlapic *vlapic)
|
||||
{
|
||||
int i;
|
||||
uint32_t *isrptr;
|
||||
|
||||
isrptr = &vlapic->apic.isr0;
|
||||
for (i = 0; i < 8; i++)
|
||||
printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
|
||||
|
||||
for (i = 0; i <= vlapic->isrvec_stk_top; i++)
|
||||
printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Algorithm adopted from section "Interrupt, Task and Processor Priority"
|
||||
* in Intel Architecture Manual Vol 3a.
|
||||
*/
|
||||
static void
|
||||
vlapic_update_ppr(struct vlapic *vlapic)
|
||||
{
|
||||
int isrvec, tpr, ppr;
|
||||
|
||||
/*
|
||||
* Note that the value on the stack at index 0 is always 0.
|
||||
*
|
||||
* This is a placeholder for the value of ISRV when none of the
|
||||
* bits is set in the ISRx registers.
|
||||
*/
|
||||
isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
|
||||
tpr = vlapic->apic.tpr;
|
||||
|
||||
#if 1
|
||||
{
|
||||
int i, lastprio, curprio, vector, idx;
|
||||
uint32_t *isrptr;
|
||||
|
||||
if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
|
||||
panic("isrvec_stk is corrupted: %d", isrvec);
|
||||
|
||||
/*
|
||||
* Make sure that the priority of the nested interrupts is
|
||||
* always increasing.
|
||||
*/
|
||||
lastprio = -1;
|
||||
for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
|
||||
curprio = PRIO(vlapic->isrvec_stk[i]);
|
||||
if (curprio <= lastprio) {
|
||||
dump_isrvec_stk(vlapic);
|
||||
panic("isrvec_stk does not satisfy invariant");
|
||||
}
|
||||
lastprio = curprio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that each bit set in the ISRx registers has a
|
||||
* corresponding entry on the isrvec stack.
|
||||
*/
|
||||
i = 1;
|
||||
isrptr = &vlapic->apic.isr0;
|
||||
for (vector = 0; vector < 256; vector++) {
|
||||
idx = (vector / 32) * 4;
|
||||
if (isrptr[idx] & (1 << (vector % 32))) {
|
||||
if (i > vlapic->isrvec_stk_top ||
|
||||
vlapic->isrvec_stk[i] != vector) {
|
||||
dump_isrvec_stk(vlapic);
|
||||
panic("ISR and isrvec_stk out of sync");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (PRIO(tpr) >= PRIO(isrvec))
|
||||
ppr = tpr;
|
||||
else
|
||||
ppr = isrvec & 0xf0;
|
||||
|
||||
vlapic->apic.ppr = ppr;
|
||||
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_process_eoi(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *isrptr;
|
||||
int i, idx, bitpos;
|
||||
|
||||
isrptr = &lapic->isr0;
|
||||
|
||||
/*
|
||||
* The x86 architecture reserves the the first 32 vectors for use
|
||||
* by the processor.
|
||||
*/
|
||||
for (i = 7; i > 0; i--) {
|
||||
idx = i * 4;
|
||||
bitpos = fls(isrptr[idx]);
|
||||
if (bitpos != 0) {
|
||||
if (vlapic->isrvec_stk_top <= 0) {
|
||||
panic("invalid vlapic isrvec_stk_top %d",
|
||||
vlapic->isrvec_stk_top);
|
||||
}
|
||||
isrptr[idx] &= ~(1 << (bitpos - 1));
|
||||
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
|
||||
vlapic->isrvec_stk_top--;
|
||||
vlapic_update_ppr(vlapic);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
|
||||
{
|
||||
return (*lvt & mask);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
vlapic_periodic_timer(struct vlapic *vlapic)
|
||||
{
|
||||
uint32_t *lvt;
|
||||
|
||||
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
|
||||
|
||||
return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
|
||||
}
|
||||
|
||||
static void
|
||||
vlapic_fire_timer(struct vlapic *vlapic)
|
||||
{
|
||||
int vector;
|
||||
uint32_t *lvt;
|
||||
|
||||
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
|
||||
|
||||
if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
|
||||
vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
|
||||
vlapic_set_intr_ready(vlapic, vector);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
|
||||
{
|
||||
int i;
|
||||
cpumask_t dmask, thiscpumask;
|
||||
uint32_t dest, vec, mode;
|
||||
|
||||
thiscpumask = vcpu_mask(vlapic->vcpuid);
|
||||
|
||||
dmask = 0;
|
||||
dest = icrval >> 32;
|
||||
vec = icrval & APIC_VECTOR_MASK;
|
||||
mode = icrval & APIC_DELMODE_MASK;
|
||||
|
||||
if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
|
||||
switch (icrval & APIC_DEST_MASK) {
|
||||
case APIC_DEST_DESTFLD:
|
||||
dmask = vcpu_mask(dest);
|
||||
break;
|
||||
case APIC_DEST_SELF:
|
||||
dmask = thiscpumask;
|
||||
break;
|
||||
case APIC_DEST_ALLISELF:
|
||||
dmask = vm_active_cpus(vlapic->vm);
|
||||
break;
|
||||
case APIC_DEST_ALLESELF:
|
||||
dmask = vm_active_cpus(vlapic->vm) & ~thiscpumask;
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < VM_MAXCPU; i++) {
|
||||
if (dmask & vcpu_mask(i)) {
|
||||
if (mode == APIC_DELMODE_FIXED)
|
||||
lapic_set_intr(vlapic->vm, i, vec);
|
||||
else
|
||||
vm_inject_nmi(vlapic->vm, i);
|
||||
}
|
||||
}
|
||||
|
||||
return (0); /* handled completely in the kernel */
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX this assumes that the startup IPI always succeeds
|
||||
*/
|
||||
if (mode == APIC_DELMODE_STARTUP)
|
||||
vm_activate_cpu(vlapic->vm, dest);
|
||||
|
||||
/*
|
||||
* This will cause a return to userland.
|
||||
*/
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_pending_intr(struct vlapic *vlapic)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
int idx, i, bitpos, vector;
|
||||
uint32_t *irrptr, val;
|
||||
|
||||
irrptr = &lapic->irr0;
|
||||
|
||||
/*
|
||||
* The x86 architecture reserves the the first 32 vectors for use
|
||||
* by the processor.
|
||||
*/
|
||||
for (i = 7; i > 0; i--) {
|
||||
idx = i * 4;
|
||||
val = atomic_load_acq_int(&irrptr[idx]);
|
||||
bitpos = fls(val);
|
||||
if (bitpos != 0) {
|
||||
vector = i * 32 + (bitpos - 1);
|
||||
if (PRIO(vector) > PRIO(lapic->ppr)) {
|
||||
VLAPIC_CTR1(vlapic, "pending intr %d", vector);
|
||||
return (vector);
|
||||
} else
|
||||
break;
|
||||
}
|
||||
}
|
||||
VLAPIC_CTR0(vlapic, "no pending intr");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_intr_accepted(struct vlapic *vlapic, int vector)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *irrptr, *isrptr;
|
||||
int idx, stk_top;
|
||||
|
||||
/*
|
||||
* clear the ready bit for vector being accepted in irr
|
||||
* and set the vector as in service in isr.
|
||||
*/
|
||||
idx = (vector / 32) * 4;
|
||||
|
||||
irrptr = &lapic->irr0;
|
||||
atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
|
||||
VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
|
||||
|
||||
isrptr = &lapic->isr0;
|
||||
isrptr[idx] |= 1 << (vector % 32);
|
||||
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
|
||||
|
||||
/*
|
||||
* Update the PPR
|
||||
*/
|
||||
vlapic->isrvec_stk_top++;
|
||||
|
||||
stk_top = vlapic->isrvec_stk_top;
|
||||
if (stk_top >= ISRVEC_STK_SIZE)
|
||||
panic("isrvec_stk_top overflow %d", stk_top);
|
||||
|
||||
vlapic->isrvec_stk[stk_top] = vector;
|
||||
vlapic_update_ppr(vlapic);
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint64_t offset = gpa & ~(PAGE_SIZE);
|
||||
uint32_t *reg;
|
||||
int i;
|
||||
|
||||
if (offset > sizeof(*lapic)) {
|
||||
*data = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
offset &= ~3;
|
||||
switch(offset)
|
||||
{
|
||||
case APIC_OFFSET_ID:
|
||||
*data = lapic->id;
|
||||
break;
|
||||
case APIC_OFFSET_VER:
|
||||
*data = lapic->version;
|
||||
break;
|
||||
case APIC_OFFSET_TPR:
|
||||
*data = lapic->tpr;
|
||||
break;
|
||||
case APIC_OFFSET_APR:
|
||||
*data = lapic->apr;
|
||||
break;
|
||||
case APIC_OFFSET_PPR:
|
||||
*data = lapic->ppr;
|
||||
break;
|
||||
case APIC_OFFSET_EOI:
|
||||
*data = lapic->eoi;
|
||||
break;
|
||||
case APIC_OFFSET_LDR:
|
||||
*data = lapic->ldr;
|
||||
break;
|
||||
case APIC_OFFSET_DFR:
|
||||
*data = lapic->dfr;
|
||||
break;
|
||||
case APIC_OFFSET_SVR:
|
||||
*data = lapic->svr;
|
||||
break;
|
||||
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
|
||||
i = (offset - APIC_OFFSET_ISR0) >> 2;
|
||||
reg = &lapic->isr0;
|
||||
*data = *(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
|
||||
i = (offset - APIC_OFFSET_TMR0) >> 2;
|
||||
reg = &lapic->tmr0;
|
||||
*data = *(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
|
||||
i = (offset - APIC_OFFSET_IRR0) >> 2;
|
||||
reg = &lapic->irr0;
|
||||
*data = atomic_load_acq_int(reg + i);
|
||||
break;
|
||||
case APIC_OFFSET_ESR:
|
||||
*data = lapic->esr;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_LOW:
|
||||
*data = lapic->icr_lo;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_HI:
|
||||
*data = lapic->icr_hi;
|
||||
break;
|
||||
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
|
||||
reg = vlapic_get_lvt(vlapic, offset);
|
||||
*data = *(reg);
|
||||
break;
|
||||
case APIC_OFFSET_ICR:
|
||||
*data = lapic->icr_timer;
|
||||
break;
|
||||
case APIC_OFFSET_CCR:
|
||||
*data = vlapic_get_ccr(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_DCR:
|
||||
*data = lapic->dcr_timer;
|
||||
break;
|
||||
case APIC_OFFSET_RRR:
|
||||
default:
|
||||
*data = 0;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
|
||||
{
|
||||
struct vlapic *vlapic = (struct vlapic*)dev;
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint64_t offset = gpa & ~(PAGE_SIZE);
|
||||
uint32_t *reg;
|
||||
int retval;
|
||||
|
||||
if (offset > sizeof(*lapic)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
retval = 0;
|
||||
offset &= ~3;
|
||||
switch(offset)
|
||||
{
|
||||
case APIC_OFFSET_ID:
|
||||
lapic->id = data;
|
||||
break;
|
||||
case APIC_OFFSET_TPR:
|
||||
lapic->tpr = data & 0xff;
|
||||
vlapic_update_ppr(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_EOI:
|
||||
vlapic_process_eoi(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_LDR:
|
||||
break;
|
||||
case APIC_OFFSET_DFR:
|
||||
break;
|
||||
case APIC_OFFSET_SVR:
|
||||
lapic->svr = data;
|
||||
break;
|
||||
case APIC_OFFSET_ICR_LOW:
|
||||
retval = lapic_process_icr(vlapic, data);
|
||||
break;
|
||||
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
|
||||
reg = vlapic_get_lvt(vlapic, offset);
|
||||
if (!(lapic->svr & APIC_SVR_ENABLE)) {
|
||||
data |= APIC_LVT_M;
|
||||
}
|
||||
*reg = data;
|
||||
// vlapic_dump_lvt(offset, reg);
|
||||
break;
|
||||
case APIC_OFFSET_ICR:
|
||||
lapic->icr_timer = data;
|
||||
vlapic_start_timer(vlapic, 0);
|
||||
break;
|
||||
|
||||
case APIC_OFFSET_DCR:
|
||||
lapic->dcr_timer = data;
|
||||
vlapic->divisor = vlapic_timer_divisor(data);
|
||||
break;
|
||||
|
||||
case APIC_OFFSET_ESR:
|
||||
vlapic_update_errors(vlapic);
|
||||
break;
|
||||
case APIC_OFFSET_VER:
|
||||
case APIC_OFFSET_APR:
|
||||
case APIC_OFFSET_PPR:
|
||||
case APIC_OFFSET_RRR:
|
||||
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
|
||||
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
|
||||
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
|
||||
case APIC_OFFSET_CCR:
|
||||
default:
|
||||
// Read only.
|
||||
break;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_timer_tick(struct vlapic *vlapic)
|
||||
{
|
||||
int curticks, delta, periodic;
|
||||
uint32_t ccr;
|
||||
uint32_t decrement, remainder;
|
||||
|
||||
curticks = ticks;
|
||||
|
||||
/* Common case */
|
||||
delta = curticks - vlapic->ccr_ticks;
|
||||
if (delta == 0)
|
||||
return;
|
||||
|
||||
/* Local APIC timer is disabled */
|
||||
if (vlapic->apic.icr_timer == 0)
|
||||
return;
|
||||
|
||||
/* One-shot mode and timer has already counted down to zero */
|
||||
periodic = vlapic_periodic_timer(vlapic);
|
||||
if (!periodic && vlapic->apic.ccr_timer == 0)
|
||||
return;
|
||||
/*
|
||||
* The 'curticks' and 'ccr_ticks' are out of sync by more than
|
||||
* 2^31 ticks. We deal with this by restarting the timer.
|
||||
*/
|
||||
if (delta < 0) {
|
||||
vlapic_start_timer(vlapic, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
ccr = vlapic->apic.ccr_timer;
|
||||
decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
|
||||
while (delta-- > 0) {
|
||||
if (ccr <= decrement) {
|
||||
remainder = decrement - ccr;
|
||||
vlapic_fire_timer(vlapic);
|
||||
if (periodic) {
|
||||
vlapic_start_timer(vlapic, remainder);
|
||||
ccr = vlapic->apic.ccr_timer;
|
||||
} else {
|
||||
/*
|
||||
* One-shot timer has counted down to zero.
|
||||
*/
|
||||
ccr = 0;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
ccr -= decrement;
|
||||
}
|
||||
|
||||
vlapic->ccr_ticks = curticks;
|
||||
vlapic->apic.ccr_timer = ccr;
|
||||
}
|
||||
|
||||
struct vdev_ops vlapic_dev_ops = {
|
||||
.name = "vlapic",
|
||||
.init = vlapic_op_init,
|
||||
.reset = vlapic_op_reset,
|
||||
.halt = vlapic_op_halt,
|
||||
.memread = vlapic_op_mem_read,
|
||||
.memwrite = vlapic_op_mem_write,
|
||||
};
|
||||
static struct io_region vlapic_mmio[VM_MAXCPU];
|
||||
|
||||
struct vlapic *
|
||||
vlapic_init(struct vm *vm, int vcpuid)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
|
||||
vlapic->vm = vm;
|
||||
vlapic->vcpuid = vcpuid;
|
||||
vlapic->ops = &vlapic_dev_ops;
|
||||
|
||||
vlapic->mmio = vlapic_mmio + vcpuid;
|
||||
vlapic->mmio->base = DEFAULT_APIC_BASE;
|
||||
vlapic->mmio->len = PAGE_SIZE;
|
||||
vlapic->mmio->attr = MMIO_READ|MMIO_WRITE;
|
||||
vlapic->mmio->vcpu = vcpuid;
|
||||
|
||||
vdev_register(&vlapic_dev_ops, vlapic);
|
||||
|
||||
vlapic_op_init(vlapic);
|
||||
|
||||
return (vlapic);
|
||||
}
|
||||
|
||||
void
|
||||
vlapic_cleanup(struct vlapic *vlapic)
|
||||
{
|
||||
vdev_unregister(vlapic);
|
||||
free(vlapic, M_VLAPIC);
|
||||
}
|
105
sys/amd64/vmm/io/vlapic.h
Normal file
105
sys/amd64/vmm/io/vlapic.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VLAPIC_H_
|
||||
#define _VLAPIC_H_
|
||||
|
||||
#include "vdev.h"
|
||||
|
||||
struct vm;
|
||||
|
||||
/*
|
||||
* Map of APIC Registers: Offset Description Access
|
||||
*/
|
||||
#define APIC_OFFSET_ID 0x20 // Local APIC ID R/W
|
||||
#define APIC_OFFSET_VER 0x30 // Local APIC Version R
|
||||
#define APIC_OFFSET_TPR 0x80 // Task Priority Register R/W
|
||||
#define APIC_OFFSET_APR 0x90 // Arbitration Priority Register R
|
||||
#define APIC_OFFSET_PPR 0xA0 // Processor Priority Register R
|
||||
#define APIC_OFFSET_EOI 0xB0 // EOI Register W
|
||||
#define APIC_OFFSET_RRR 0xC0 // Remote read R
|
||||
#define APIC_OFFSET_LDR 0xD0 // Logical Destination R/W
|
||||
#define APIC_OFFSET_DFR 0xE0 // Destination Format Register 0..27 R; 28..31 R/W
|
||||
#define APIC_OFFSET_SVR 0xF0 // Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W
|
||||
#define APIC_OFFSET_ISR0 0x100 // ISR 000-031 R
|
||||
#define APIC_OFFSET_ISR1 0x110 // ISR 032-063 R
|
||||
#define APIC_OFFSET_ISR2 0x120 // ISR 064-095 R
|
||||
#define APIC_OFFSET_ISR3 0x130 // ISR 095-128 R
|
||||
#define APIC_OFFSET_ISR4 0x140 // ISR 128-159 R
|
||||
#define APIC_OFFSET_ISR5 0x150 // ISR 160-191 R
|
||||
#define APIC_OFFSET_ISR6 0x160 // ISR 192-223 R
|
||||
#define APIC_OFFSET_ISR7 0x170 // ISR 224-255 R
|
||||
#define APIC_OFFSET_TMR0 0x180 // TMR 000-031 R
|
||||
#define APIC_OFFSET_TMR1 0x190 // TMR 032-063 R
|
||||
#define APIC_OFFSET_TMR2 0x1A0 // TMR 064-095 R
|
||||
#define APIC_OFFSET_TMR3 0x1B0 // TMR 095-128 R
|
||||
#define APIC_OFFSET_TMR4 0x1C0 // TMR 128-159 R
|
||||
#define APIC_OFFSET_TMR5 0x1D0 // TMR 160-191 R
|
||||
#define APIC_OFFSET_TMR6 0x1E0 // TMR 192-223 R
|
||||
#define APIC_OFFSET_TMR7 0x1F0 // TMR 224-255 R
|
||||
#define APIC_OFFSET_IRR0 0x200 // IRR 000-031 R
|
||||
#define APIC_OFFSET_IRR1 0x210 // IRR 032-063 R
|
||||
#define APIC_OFFSET_IRR2 0x220 // IRR 064-095 R
|
||||
#define APIC_OFFSET_IRR3 0x230 // IRR 095-128 R
|
||||
#define APIC_OFFSET_IRR4 0x240 // IRR 128-159 R
|
||||
#define APIC_OFFSET_IRR5 0x250 // IRR 160-191 R
|
||||
#define APIC_OFFSET_IRR6 0x260 // IRR 192-223 R
|
||||
#define APIC_OFFSET_IRR7 0x270 // IRR 224-255 R
|
||||
#define APIC_OFFSET_ESR 0x280 // Error Status Register R
|
||||
#define APIC_OFFSET_ICR_LOW 0x300 // Interrupt Command Reg. (0-31) R/W
|
||||
#define APIC_OFFSET_ICR_HI 0x310 // Interrupt Command Reg. (32-63) R/W
|
||||
#define APIC_OFFSET_TIMER_LVT 0x320 // Local Vector Table (Timer) R/W
|
||||
#define APIC_OFFSET_THERM_LVT 0x330 // Local Vector Table (Thermal) R/W (PIV+)
|
||||
#define APIC_OFFSET_PERF_LVT 0x340 // Local Vector Table (Performance) R/W (P6+)
|
||||
#define APIC_OFFSET_LINT0_LVT 0x350 // Local Vector Table (LINT0) R/W
|
||||
#define APIC_OFFSET_LINT1_LVT 0x360 // Local Vector Table (LINT1) R/W
|
||||
#define APIC_OFFSET_ERROR_LVT 0x370 // Local Vector Table (ERROR) R/W
|
||||
#define APIC_OFFSET_ICR 0x380 // Initial Count Reg. for Timer R/W
|
||||
#define APIC_OFFSET_CCR 0x390 // Current Count of Timer R
|
||||
#define APIC_OFFSET_DCR 0x3E0 // Timer Divide Configuration Reg. R/W
|
||||
|
||||
/*
|
||||
* 16 priority levels with at most one vector injected per level.
|
||||
*/
|
||||
#define ISRVEC_STK_SIZE (16 + 1)
|
||||
|
||||
struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
|
||||
void vlapic_cleanup(struct vlapic *vlapic);
|
||||
|
||||
int vlapic_op_mem_write(void* dev, uint64_t gpa,
|
||||
opsize_t size, uint64_t data);
|
||||
|
||||
int vlapic_op_mem_read(void* dev, uint64_t gpa,
|
||||
opsize_t size, uint64_t *data);
|
||||
|
||||
int vlapic_pending_intr(struct vlapic *vlapic);
|
||||
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
|
||||
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector);
|
||||
void vlapic_timer_tick(struct vlapic *vlapic);
|
||||
|
||||
#endif /* _VLAPIC_H_ */
|
737
sys/amd64/vmm/vmm.c
Normal file
737
sys/amd64/vmm/vmm.c
Normal file
@ -0,0 +1,737 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
|
||||
#include <machine/vm.h>
|
||||
#include <machine/pcb.h>
|
||||
#include <machine/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_mem.h"
|
||||
#include "vmm_util.h"
|
||||
#include <machine/vmm_dev.h>
|
||||
#include "vlapic.h"
|
||||
#include "vmm_msr.h"
|
||||
#include "vmm_ipi.h"
|
||||
#include "vmm_stat.h"
|
||||
|
||||
#include "io/ppt.h"
|
||||
#include "io/iommu.h"
|
||||
|
||||
struct vlapic;
|
||||
|
||||
struct vcpu {
|
||||
int flags;
|
||||
int pincpu; /* host cpuid this vcpu is bound to */
|
||||
int hostcpu; /* host cpuid this vcpu last ran on */
|
||||
uint64_t guest_msrs[VMM_MSR_NUM];
|
||||
struct vlapic *vlapic;
|
||||
int vcpuid;
|
||||
struct savefpu savefpu; /* guest fpu state */
|
||||
void *stats;
|
||||
};
|
||||
#define VCPU_F_PINNED 0x0001
|
||||
#define VCPU_F_RUNNING 0x0002
|
||||
|
||||
#define VCPU_PINCPU(vm, vcpuid) \
|
||||
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
|
||||
|
||||
#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
|
||||
|
||||
#define VCPU_PIN(vm, vcpuid, host_cpuid) \
|
||||
do { \
|
||||
vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
|
||||
vm->vcpu[vcpuid].pincpu = host_cpuid; \
|
||||
} while(0)
|
||||
|
||||
#define VM_MAX_MEMORY_SEGMENTS 2
|
||||
|
||||
struct vm {
|
||||
void *cookie; /* processor-specific data */
|
||||
void *iommu; /* iommu-specific data */
|
||||
struct vcpu vcpu[VM_MAXCPU];
|
||||
int num_mem_segs;
|
||||
struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
|
||||
char name[VM_MAX_NAMELEN];
|
||||
|
||||
/*
|
||||
* Mask of active vcpus.
|
||||
* An active vcpu is one that has been started implicitly (BSP) or
|
||||
* explicitly (AP) by sending it a startup ipi.
|
||||
*/
|
||||
cpumask_t active_cpus;
|
||||
};
|
||||
|
||||
static struct vmm_ops *ops;
|
||||
#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
|
||||
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
|
||||
|
||||
#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
|
||||
#define VMRUN(vmi, vcpu, rip, vmexit) \
|
||||
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, vmexit) : ENXIO)
|
||||
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
|
||||
#define VMMMAP(vmi, gpa, hpa, len, attr, prot, spm) \
|
||||
(ops != NULL ? (*ops->vmmmap)(vmi, gpa, hpa, len, attr, prot, spm) : ENXIO)
|
||||
#define VMGETREG(vmi, vcpu, num, retval) \
|
||||
(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
|
||||
#define VMSETREG(vmi, vcpu, num, val) \
|
||||
(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
|
||||
#define VMGETDESC(vmi, vcpu, num, desc) \
|
||||
(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
|
||||
#define VMSETDESC(vmi, vcpu, num, desc) \
|
||||
(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
|
||||
#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \
|
||||
(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
|
||||
#define VMNMI(vmi, vcpu) \
|
||||
(ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
|
||||
#define VMGETCAP(vmi, vcpu, num, retval) \
|
||||
(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
|
||||
#define VMSETCAP(vmi, vcpu, num, val) \
|
||||
(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
|
||||
|
||||
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
||||
#define fpu_start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
|
||||
: : "n" (CR0_TS) : "ax")
|
||||
#define fpu_stop_emulating() __asm("clts")
|
||||
|
||||
static MALLOC_DEFINE(M_VM, "vm", "vm");
|
||||
CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
|
||||
|
||||
/* statistics */
|
||||
static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
|
||||
|
||||
static void
|
||||
vcpu_cleanup(struct vcpu *vcpu)
|
||||
{
|
||||
vlapic_cleanup(vcpu->vlapic);
|
||||
vmm_stat_free(vcpu->stats);
|
||||
}
|
||||
|
||||
static void
|
||||
vcpu_init(struct vm *vm, uint32_t vcpu_id)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
|
||||
vcpu = &vm->vcpu[vcpu_id];
|
||||
|
||||
vcpu->hostcpu = -1;
|
||||
vcpu->vcpuid = vcpu_id;
|
||||
vcpu->vlapic = vlapic_init(vm, vcpu_id);
|
||||
fpugetregs(curthread, &vcpu->savefpu);
|
||||
vcpu->stats = vmm_stat_alloc();
|
||||
}
|
||||
|
||||
static int
|
||||
vmm_init(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
vmm_ipi_init();
|
||||
|
||||
error = vmm_mem_init();
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (vmm_is_intel())
|
||||
ops = &vmm_ops_intel;
|
||||
else if (vmm_is_amd())
|
||||
ops = &vmm_ops_amd;
|
||||
else
|
||||
return (ENXIO);
|
||||
|
||||
vmm_msr_init();
|
||||
|
||||
return (VMM_INIT());
|
||||
}
|
||||
|
||||
static int
|
||||
vmm_handler(module_t mod, int what, void *arg)
|
||||
{
|
||||
int error;
|
||||
|
||||
switch (what) {
|
||||
case MOD_LOAD:
|
||||
vmmdev_init();
|
||||
iommu_init();
|
||||
error = vmm_init();
|
||||
break;
|
||||
case MOD_UNLOAD:
|
||||
vmmdev_cleanup();
|
||||
iommu_cleanup();
|
||||
vmm_ipi_cleanup();
|
||||
error = VMM_CLEANUP();
|
||||
break;
|
||||
default:
|
||||
error = 0;
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static moduledata_t vmm_kmod = {
|
||||
"vmm",
|
||||
vmm_handler,
|
||||
NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Execute the module load handler after the pci passthru driver has had
|
||||
* a chance to claim devices. We need this information at the time we do
|
||||
* iommu initialization.
|
||||
*/
|
||||
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
|
||||
MODULE_VERSION(vmm, 1);
|
||||
|
||||
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
|
||||
|
||||
struct vm *
|
||||
vm_create(const char *name)
|
||||
{
|
||||
int i;
|
||||
struct vm *vm;
|
||||
vm_paddr_t maxaddr;
|
||||
|
||||
const int BSP = 0;
|
||||
|
||||
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
|
||||
return (NULL);
|
||||
|
||||
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
|
||||
strcpy(vm->name, name);
|
||||
vm->cookie = VMINIT(vm);
|
||||
|
||||
for (i = 0; i < VM_MAXCPU; i++) {
|
||||
vcpu_init(vm, i);
|
||||
guest_msrs_init(vm, i);
|
||||
}
|
||||
|
||||
maxaddr = vmm_mem_maxaddr();
|
||||
vm->iommu = iommu_create_domain(maxaddr);
|
||||
vm_activate_cpu(vm, BSP);
|
||||
|
||||
return (vm);
|
||||
}
|
||||
|
||||
void
|
||||
vm_destroy(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
|
||||
ppt_unassign_all(vm);
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++)
|
||||
vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
|
||||
|
||||
for (i = 0; i < VM_MAXCPU; i++)
|
||||
vcpu_cleanup(&vm->vcpu[i]);
|
||||
|
||||
iommu_destroy_domain(vm->iommu);
|
||||
|
||||
VMCLEANUP(vm->cookie);
|
||||
|
||||
free(vm, M_VM);
|
||||
}
|
||||
|
||||
const char *
|
||||
vm_name(struct vm *vm)
|
||||
{
|
||||
return (vm->name);
|
||||
}
|
||||
|
||||
int
|
||||
vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
|
||||
return (VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
|
||||
VM_PROT_RW, spok));
|
||||
}
|
||||
|
||||
int
|
||||
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
|
||||
return (VMMMAP(vm->cookie, gpa, 0, len, VM_MEMATTR_UNCACHEABLE,
|
||||
VM_PROT_NONE, spok));
|
||||
}
|
||||
|
||||
int
|
||||
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa)
|
||||
{
|
||||
int error;
|
||||
vm_paddr_t hpa;
|
||||
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
|
||||
/*
|
||||
* find the hpa if already it was already vm_malloc'd.
|
||||
*/
|
||||
hpa = vm_gpa2hpa(vm, gpa, len);
|
||||
if (hpa != ((vm_paddr_t)-1))
|
||||
goto out;
|
||||
|
||||
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
|
||||
return (E2BIG);
|
||||
|
||||
hpa = vmm_mem_alloc(len);
|
||||
if (hpa == 0)
|
||||
return (ENOMEM);
|
||||
|
||||
error = VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
|
||||
VM_PROT_ALL, spok);
|
||||
if (error) {
|
||||
vmm_mem_free(hpa, len);
|
||||
return (error);
|
||||
}
|
||||
|
||||
iommu_create_mapping(vm->iommu, gpa, hpa, len);
|
||||
|
||||
vm->mem_segs[vm->num_mem_segs].gpa = gpa;
|
||||
vm->mem_segs[vm->num_mem_segs].hpa = hpa;
|
||||
vm->mem_segs[vm->num_mem_segs].len = len;
|
||||
vm->num_mem_segs++;
|
||||
out:
|
||||
*ret_hpa = hpa;
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
int i;
|
||||
vm_paddr_t gpabase, gpalimit, hpabase;
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
hpabase = vm->mem_segs[i].hpa;
|
||||
gpabase = vm->mem_segs[i].gpa;
|
||||
gpalimit = gpabase + vm->mem_segs[i].len;
|
||||
if (gpa >= gpabase && gpa + len <= gpalimit)
|
||||
return ((gpa - gpabase) + hpabase);
|
||||
}
|
||||
return ((vm_paddr_t)-1);
|
||||
}
|
||||
|
||||
int
|
||||
vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
||||
struct vm_memory_segment *seg)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
if (gpabase == vm->mem_segs[i].gpa) {
|
||||
*seg = vm->mem_segs[i];
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
|
||||
{
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (reg >= VM_REG_LAST)
|
||||
return (EINVAL);
|
||||
|
||||
return (VMGETREG(vm->cookie, vcpu, reg, retval));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (reg >= VM_REG_LAST)
|
||||
return (EINVAL);
|
||||
|
||||
return (VMSETREG(vm->cookie, vcpu, reg, val));
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
is_descriptor_table(int reg)
|
||||
{
|
||||
|
||||
switch (reg) {
|
||||
case VM_REG_GUEST_IDTR:
|
||||
case VM_REG_GUEST_GDTR:
|
||||
return (TRUE);
|
||||
default:
|
||||
return (FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
is_segment_register(int reg)
|
||||
{
|
||||
|
||||
switch (reg) {
|
||||
case VM_REG_GUEST_ES:
|
||||
case VM_REG_GUEST_CS:
|
||||
case VM_REG_GUEST_SS:
|
||||
case VM_REG_GUEST_DS:
|
||||
case VM_REG_GUEST_FS:
|
||||
case VM_REG_GUEST_GS:
|
||||
case VM_REG_GUEST_TR:
|
||||
case VM_REG_GUEST_LDTR:
|
||||
return (TRUE);
|
||||
default:
|
||||
return (FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *desc)
|
||||
{
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (!is_segment_register(reg) && !is_descriptor_table(reg))
|
||||
return (EINVAL);
|
||||
|
||||
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *desc)
|
||||
{
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (!is_segment_register(reg) && !is_descriptor_table(reg))
|
||||
return (EINVAL);
|
||||
|
||||
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
|
||||
{
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
*cpuid = VCPU_PINCPU(vm, vcpuid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
|
||||
{
|
||||
struct thread *td;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
td = curthread; /* XXXSMP only safe when muxing vcpus */
|
||||
|
||||
/* unpin */
|
||||
if (host_cpuid < 0) {
|
||||
VCPU_UNPIN(vm, vcpuid);
|
||||
thread_lock(td);
|
||||
sched_unbind(td);
|
||||
thread_unlock(td);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (CPU_ABSENT(host_cpuid))
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* XXX we should check that 'host_cpuid' has not already been pinned
|
||||
* by another vm.
|
||||
*/
|
||||
thread_lock(td);
|
||||
sched_bind(td, host_cpuid);
|
||||
thread_unlock(td);
|
||||
VCPU_PIN(vm, vcpuid, host_cpuid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
restore_guest_fpustate(struct vcpu *vcpu)
|
||||
{
|
||||
register_t s;
|
||||
|
||||
s = intr_disable();
|
||||
fpu_stop_emulating();
|
||||
fxrstor(&vcpu->savefpu);
|
||||
fpu_start_emulating();
|
||||
intr_restore(s);
|
||||
}
|
||||
|
||||
static void
|
||||
save_guest_fpustate(struct vcpu *vcpu)
|
||||
{
|
||||
register_t s;
|
||||
|
||||
s = intr_disable();
|
||||
fpu_stop_emulating();
|
||||
fxsave(&vcpu->savefpu);
|
||||
fpu_start_emulating();
|
||||
intr_restore(s);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vm *vm, struct vm_run *vmrun)
|
||||
{
|
||||
int error, vcpuid;
|
||||
struct vcpu *vcpu;
|
||||
struct pcb *pcb;
|
||||
uint64_t tscval;
|
||||
|
||||
vcpuid = vmrun->cpuid;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
|
||||
critical_enter();
|
||||
|
||||
tscval = rdtsc();
|
||||
|
||||
pcb = PCPU_GET(curpcb);
|
||||
pcb->pcb_full_iret = 1;
|
||||
|
||||
vcpu->hostcpu = curcpu;
|
||||
|
||||
fpuexit(curthread);
|
||||
restore_guest_msrs(vm, vcpuid);
|
||||
restore_guest_fpustate(vcpu);
|
||||
error = VMRUN(vm->cookie, vcpuid, vmrun->rip, &vmrun->vm_exit);
|
||||
save_guest_fpustate(vcpu);
|
||||
restore_host_msrs(vm, vcpuid);
|
||||
|
||||
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
|
||||
|
||||
critical_exit();
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event(struct vm *vm, int vcpuid, int type,
|
||||
int vector, uint32_t code, int code_valid)
|
||||
{
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
|
||||
return (EINVAL);
|
||||
|
||||
if (vector < 0 || vector > 255)
|
||||
return (EINVAL);
|
||||
|
||||
return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_nmi(struct vm *vm, int vcpu)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
error = VMNMI(vm->cookie, vcpu);
|
||||
vm_interrupt_hostcpu(vm, vcpu);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
|
||||
{
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (type < 0 || type >= VM_CAP_MAX)
|
||||
return (EINVAL);
|
||||
|
||||
return (VMGETCAP(vm->cookie, vcpu, type, retval));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_capability(struct vm *vm, int vcpu, int type, int val)
|
||||
{
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (type < 0 || type >= VM_CAP_MAX)
|
||||
return (EINVAL);
|
||||
|
||||
return (VMSETCAP(vm->cookie, vcpu, type, val));
|
||||
}
|
||||
|
||||
uint64_t *
|
||||
vm_guest_msrs(struct vm *vm, int cpu)
|
||||
{
|
||||
return (vm->vcpu[cpu].guest_msrs);
|
||||
}
|
||||
|
||||
struct vlapic *
|
||||
vm_lapic(struct vm *vm, int cpu)
|
||||
{
|
||||
return (vm->vcpu[cpu].vlapic);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vmm_is_pptdev(int bus, int slot, int func)
|
||||
{
|
||||
int found, b, s, f, n;
|
||||
char *val, *cp, *cp2;
|
||||
|
||||
/*
|
||||
* setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
|
||||
*/
|
||||
found = 0;
|
||||
cp = val = getenv("pptdevs");
|
||||
while (cp != NULL && *cp != '\0') {
|
||||
if ((cp2 = strchr(cp, ' ')) != NULL)
|
||||
*cp2 = '\0';
|
||||
|
||||
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
|
||||
if (n == 3 && bus == b && slot == s && func == f) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (cp2 != NULL)
|
||||
*cp2++ = ' ';
|
||||
|
||||
cp = cp2;
|
||||
}
|
||||
freeenv(val);
|
||||
return (found);
|
||||
}
|
||||
|
||||
void *
|
||||
vm_iommu_domain(struct vm *vm)
|
||||
{
|
||||
|
||||
return (vm->iommu);
|
||||
}
|
||||
|
||||
void
|
||||
vm_set_run_state(struct vm *vm, int vcpuid, int state)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
|
||||
if (state == VCPU_RUNNING) {
|
||||
if (vcpu->flags & VCPU_F_RUNNING) {
|
||||
panic("vm_set_run_state: %s[%d] is already running",
|
||||
vm_name(vm), vcpuid);
|
||||
}
|
||||
vcpu->flags |= VCPU_F_RUNNING;
|
||||
} else {
|
||||
if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
|
||||
panic("vm_set_run_state: %s[%d] is already stopped",
|
||||
vm_name(vm), vcpuid);
|
||||
}
|
||||
vcpu->flags &= ~VCPU_F_RUNNING;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
|
||||
{
|
||||
int retval, hostcpu;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
if (vcpu->flags & VCPU_F_RUNNING) {
|
||||
retval = VCPU_RUNNING;
|
||||
hostcpu = vcpu->hostcpu;
|
||||
} else {
|
||||
retval = VCPU_STOPPED;
|
||||
hostcpu = -1;
|
||||
}
|
||||
|
||||
if (cpuptr)
|
||||
*cpuptr = hostcpu;
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
void
|
||||
vm_activate_cpu(struct vm *vm, int vcpuid)
|
||||
{
|
||||
|
||||
if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
|
||||
vm->active_cpus |= vcpu_mask(vcpuid);
|
||||
}
|
||||
|
||||
cpumask_t
|
||||
vm_active_cpus(struct vm *vm)
|
||||
{
|
||||
|
||||
return (vm->active_cpus);
|
||||
}
|
||||
|
||||
void *
|
||||
vcpu_stats(struct vm *vm, int vcpuid)
|
||||
{
|
||||
|
||||
return (vm->vcpu[vcpuid].stats);
|
||||
}
|
468
sys/amd64/vmm/vmm_dev.c
Normal file
468
sys/amd64/vmm/vmm_dev.c
Normal file
@ -0,0 +1,468 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_stat.h"
|
||||
#include "io/ppt.h"
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
struct vmmdev_softc {
|
||||
struct vm *vm; /* vm instance cookie */
|
||||
struct cdev *cdev;
|
||||
SLIST_ENTRY(vmmdev_softc) link;
|
||||
};
|
||||
static SLIST_HEAD(, vmmdev_softc) head;
|
||||
|
||||
static struct mtx vmmdev_mtx;
|
||||
|
||||
static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
|
||||
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
|
||||
static struct vmmdev_softc *
|
||||
vmmdev_lookup(const char *name)
|
||||
{
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
#ifdef notyet /* XXX kernel is not compiled with invariants */
|
||||
mtx_assert(&vmmdev_mtx, MA_OWNED);
|
||||
#endif
|
||||
|
||||
SLIST_FOREACH(sc, &head, link) {
|
||||
if (strcmp(name, vm_name(sc->vm)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return (sc);
|
||||
}
|
||||
|
||||
static struct vmmdev_softc *
|
||||
vmmdev_lookup2(struct cdev *cdev)
|
||||
{
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
#ifdef notyet /* XXX kernel is not compiled with invariants */
|
||||
mtx_assert(&vmmdev_mtx, MA_OWNED);
|
||||
#endif
|
||||
|
||||
SLIST_FOREACH(sc, &head, link) {
|
||||
if (sc->cdev == cdev)
|
||||
break;
|
||||
}
|
||||
|
||||
return (sc);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
|
||||
{
|
||||
int error, off, c;
|
||||
vm_paddr_t hpa, gpa;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
static char zerobuf[PAGE_SIZE];
|
||||
|
||||
error = 0;
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
|
||||
while (uio->uio_resid > 0 && error == 0) {
|
||||
gpa = uio->uio_offset;
|
||||
off = gpa & PAGE_MASK;
|
||||
c = min(uio->uio_resid, PAGE_SIZE - off);
|
||||
|
||||
/*
|
||||
* The VM has a hole in its physical memory map. If we want to
|
||||
* use 'dd' to inspect memory beyond the hole we need to
|
||||
* provide bogus data for memory that lies in the hole.
|
||||
*
|
||||
* Since this device does not support lseek(2), dd(1) will
|
||||
* read(2) blocks of data to simulate the lseek(2).
|
||||
*/
|
||||
hpa = vm_gpa2hpa(sc->vm, gpa, c);
|
||||
if (hpa == (vm_paddr_t)-1) {
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
error = uiomove(zerobuf, c, uio);
|
||||
else
|
||||
error = EFAULT;
|
||||
} else
|
||||
error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
|
||||
}
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
struct thread *td)
|
||||
{
|
||||
int error, vcpu;
|
||||
struct vmmdev_softc *sc;
|
||||
struct vm_memory_segment *seg;
|
||||
struct vm_register *vmreg;
|
||||
struct vm_seg_desc* vmsegdesc;
|
||||
struct vm_pin *vmpin;
|
||||
struct vm_run *vmrun;
|
||||
struct vm_event *vmevent;
|
||||
struct vm_lapic_irq *vmirq;
|
||||
struct vm_capability *vmcap;
|
||||
struct vm_pptdev *pptdev;
|
||||
struct vm_pptdev_mmio *pptmmio;
|
||||
struct vm_pptdev_msi *pptmsi;
|
||||
struct vm_nmi *vmnmi;
|
||||
struct vm_stats *vmstats;
|
||||
struct vm_stat_desc *statdesc;
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc == NULL) {
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
/*
|
||||
* Some VMM ioctls can operate only on vcpus that are not running.
|
||||
*/
|
||||
switch (cmd) {
|
||||
case VM_RUN:
|
||||
case VM_SET_PINNING:
|
||||
case VM_GET_REGISTER:
|
||||
case VM_SET_REGISTER:
|
||||
case VM_GET_SEGMENT_DESCRIPTOR:
|
||||
case VM_SET_SEGMENT_DESCRIPTOR:
|
||||
case VM_INJECT_EVENT:
|
||||
case VM_GET_CAPABILITY:
|
||||
case VM_SET_CAPABILITY:
|
||||
case VM_PPTDEV_MSI:
|
||||
/*
|
||||
* XXX fragile, handle with care
|
||||
* Assumes that the first field of the ioctl data is the vcpu.
|
||||
*/
|
||||
vcpu = *(int *)data;
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
|
||||
error = EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (vcpu_is_running(sc->vm, vcpu, NULL)) {
|
||||
error = EBUSY;
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch(cmd) {
|
||||
case VM_RUN:
|
||||
vmrun = (struct vm_run *)data;
|
||||
|
||||
vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING);
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
error = vm_run(sc->vm, vmrun);
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED);
|
||||
break;
|
||||
case VM_STAT_DESC: {
|
||||
const char *desc;
|
||||
statdesc = (struct vm_stat_desc *)data;
|
||||
desc = vmm_stat_desc(statdesc->index);
|
||||
if (desc != NULL) {
|
||||
error = 0;
|
||||
strlcpy(statdesc->desc, desc, sizeof(statdesc->desc));
|
||||
} else
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
case VM_STATS: {
|
||||
CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES);
|
||||
vmstats = (struct vm_stats *)data;
|
||||
getmicrotime(&vmstats->tv);
|
||||
error = vmm_stat_copy(sc->vm, vmstats->cpuid,
|
||||
&vmstats->num_entries, vmstats->statbuf);
|
||||
break;
|
||||
}
|
||||
case VM_PPTDEV_MSI:
|
||||
pptmsi = (struct vm_pptdev_msi *)data;
|
||||
error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
|
||||
pptmsi->bus, pptmsi->slot, pptmsi->func,
|
||||
pptmsi->destcpu, pptmsi->vector,
|
||||
pptmsi->numvec);
|
||||
break;
|
||||
case VM_MAP_PPTDEV_MMIO:
|
||||
pptmmio = (struct vm_pptdev_mmio *)data;
|
||||
error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
|
||||
pptmmio->func, pptmmio->gpa, pptmmio->len,
|
||||
pptmmio->hpa);
|
||||
break;
|
||||
case VM_BIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_UNBIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_INJECT_EVENT:
|
||||
vmevent = (struct vm_event *)data;
|
||||
error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
|
||||
vmevent->vector,
|
||||
vmevent->error_code,
|
||||
vmevent->error_code_valid);
|
||||
break;
|
||||
case VM_INJECT_NMI:
|
||||
vmnmi = (struct vm_nmi *)data;
|
||||
error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
|
||||
break;
|
||||
case VM_LAPIC_IRQ:
|
||||
vmirq = (struct vm_lapic_irq *)data;
|
||||
error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
|
||||
break;
|
||||
case VM_SET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_GET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
&vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_MAP_MEMORY:
|
||||
seg = (struct vm_memory_segment *)data;
|
||||
error = vm_malloc(sc->vm, seg->gpa, seg->len, &seg->hpa);
|
||||
break;
|
||||
case VM_GET_MEMORY_SEG:
|
||||
seg = (struct vm_memory_segment *)data;
|
||||
seg->hpa = seg->len = 0;
|
||||
(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
|
||||
error = 0;
|
||||
break;
|
||||
case VM_GET_REGISTER:
|
||||
vmreg = (struct vm_register *)data;
|
||||
error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
|
||||
&vmreg->regval);
|
||||
break;
|
||||
case VM_SET_REGISTER:
|
||||
vmreg = (struct vm_register *)data;
|
||||
error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
|
||||
vmreg->regval);
|
||||
break;
|
||||
case VM_SET_SEGMENT_DESCRIPTOR:
|
||||
vmsegdesc = (struct vm_seg_desc *)data;
|
||||
error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
|
||||
vmsegdesc->regnum,
|
||||
&vmsegdesc->desc);
|
||||
break;
|
||||
case VM_GET_SEGMENT_DESCRIPTOR:
|
||||
vmsegdesc = (struct vm_seg_desc *)data;
|
||||
error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
|
||||
vmsegdesc->regnum,
|
||||
&vmsegdesc->desc);
|
||||
break;
|
||||
case VM_GET_CAPABILITY:
|
||||
vmcap = (struct vm_capability *)data;
|
||||
error = vm_get_capability(sc->vm, vmcap->cpuid,
|
||||
vmcap->captype,
|
||||
&vmcap->capval);
|
||||
break;
|
||||
case VM_SET_CAPABILITY:
|
||||
vmcap = (struct vm_capability *)data;
|
||||
error = vm_set_capability(sc->vm, vmcap->cpuid,
|
||||
vmcap->captype,
|
||||
vmcap->capval);
|
||||
break;
|
||||
default:
|
||||
error = ENOTTY;
|
||||
break;
|
||||
}
|
||||
done:
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_mmap(struct cdev *cdev, vm_offset_t offset, vm_paddr_t *paddr, int nprot)
|
||||
{
|
||||
int error;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
error = -1;
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc != NULL && (nprot & PROT_EXEC) == 0) {
|
||||
*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
|
||||
if (*paddr != (vm_paddr_t)-1)
|
||||
error = 0;
|
||||
}
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
vmmdev_destroy(struct vmmdev_softc *sc)
|
||||
{
|
||||
|
||||
#ifdef notyet /* XXX kernel is not compiled with invariants */
|
||||
mtx_assert(&vmmdev_mtx, MA_OWNED);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* XXX must stop virtual machine instances that may be still
|
||||
* running and cleanup their state.
|
||||
*/
|
||||
SLIST_REMOVE(&head, sc, vmmdev_softc, link);
|
||||
destroy_dev(sc->cdev);
|
||||
vm_destroy(sc->vm);
|
||||
free(sc, M_VMMDEV);
|
||||
}
|
||||
|
||||
static int
|
||||
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
char buf[VM_MAX_NAMELEN];
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
strlcpy(buf, "beavis", sizeof(buf));
|
||||
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
|
||||
if (error != 0 || req->newptr == NULL)
|
||||
return (error);
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
sc = vmmdev_lookup(buf);
|
||||
if (sc == NULL) {
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (EINVAL);
|
||||
}
|
||||
vmmdev_destroy(sc);
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
|
||||
NULL, 0, sysctl_vmm_destroy, "A", NULL);
|
||||
|
||||
static struct cdevsw vmmdevsw = {
|
||||
.d_name = "vmmdev",
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = vmmdev_ioctl,
|
||||
.d_mmap = vmmdev_mmap,
|
||||
.d_read = vmmdev_rw,
|
||||
.d_write = vmmdev_rw,
|
||||
};
|
||||
|
||||
static int
|
||||
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
struct vm *vm;
|
||||
struct vmmdev_softc *sc;
|
||||
char buf[VM_MAX_NAMELEN];
|
||||
|
||||
strlcpy(buf, "beavis", sizeof(buf));
|
||||
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
|
||||
if (error != 0 || req->newptr == NULL)
|
||||
return (error);
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
|
||||
sc = vmmdev_lookup(buf);
|
||||
if (sc != NULL) {
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (EEXIST);
|
||||
}
|
||||
|
||||
vm = vm_create(buf);
|
||||
if (vm == NULL) {
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
|
||||
sc->vm = vm;
|
||||
sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
|
||||
"vmm/%s", buf);
|
||||
sc->cdev->si_drv1 = sc;
|
||||
SLIST_INSERT_HEAD(&head, sc, link);
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
|
||||
NULL, 0, sysctl_vmm_create, "A", NULL);
|
||||
|
||||
void
|
||||
vmmdev_init(void)
|
||||
{
|
||||
mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
|
||||
}
|
||||
|
||||
void
|
||||
vmmdev_cleanup(void)
|
||||
{
|
||||
struct vmmdev_softc *sc, *sc2;
|
||||
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
|
||||
SLIST_FOREACH_SAFE(sc, &head, link, sc2)
|
||||
vmmdev_destroy(sc);
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
}
|
103
sys/amd64/vmm/vmm_ipi.c
Normal file
103
sys/amd64/vmm/vmm_ipi.c
Normal file
@ -0,0 +1,103 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <machine/intr_machdep.h>
|
||||
#include <machine/apicvar.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/smp.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_ipi.h"
|
||||
|
||||
extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
|
||||
|
||||
/*
|
||||
* The default is to use the IPI_AST to interrupt a vcpu.
|
||||
*/
|
||||
static int ipinum = IPI_AST;
|
||||
|
||||
CTASSERT(APIC_SPURIOUS_INT == 255);
|
||||
|
||||
void
|
||||
vmm_ipi_init(void)
|
||||
{
|
||||
int idx;
|
||||
uintptr_t func;
|
||||
struct gate_descriptor *ip;
|
||||
|
||||
/*
|
||||
* Search backwards from the highest IDT vector available for use
|
||||
* as our IPI vector. We install the 'justreturn' handler at that
|
||||
* vector and use it to interrupt the vcpus.
|
||||
*
|
||||
* We do this because the IPI_AST is heavyweight and saves all
|
||||
* registers in the trapframe. This is overkill for our use case
|
||||
* which is simply to EOI the interrupt and return.
|
||||
*/
|
||||
idx = APIC_SPURIOUS_INT;
|
||||
while (--idx >= APIC_IPI_INTS) {
|
||||
ip = &idt[idx];
|
||||
func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
|
||||
if (func == (uintptr_t)&IDTVEC(rsvd)) {
|
||||
ipinum = idx;
|
||||
setidt(ipinum, IDTVEC(justreturn), SDT_SYSIGT,
|
||||
SEL_KPL, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ipinum != IPI_AST && bootverbose) {
|
||||
printf("vmm_ipi_init: installing ipi handler to interrupt "
|
||||
"vcpus at vector %d\n", ipinum);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vmm_ipi_cleanup(void)
|
||||
{
|
||||
if (ipinum != IPI_AST)
|
||||
setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
vm_interrupt_hostcpu(struct vm *vm, int vcpu)
|
||||
{
|
||||
int hostcpu;
|
||||
|
||||
if (vcpu_is_running(vm, vcpu, &hostcpu) && hostcpu != curcpu)
|
||||
ipi_selected((cpumask_t)1 << hostcpu, ipinum);
|
||||
}
|
38
sys/amd64/vmm/vmm_ipi.h
Normal file
38
sys/amd64/vmm/vmm_ipi.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_IPI_H_
|
||||
#define _VMM_IPI_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
void vmm_ipi_init(void);
|
||||
void vmm_ipi_cleanup(void);
|
||||
void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
|
||||
|
||||
#endif
|
51
sys/amd64/vmm/vmm_ktr.h
Normal file
51
sys/amd64/vmm/vmm_ktr.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_KTR_H_
|
||||
#define _VMM_KTR_H_
|
||||
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/pcpu.h>
|
||||
|
||||
#define KTR_VMM KTR_GEN
|
||||
|
||||
#define VMM_CTR0(vm, vcpuid, format) \
|
||||
CTR3(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu)
|
||||
|
||||
#define VMM_CTR1(vm, vcpuid, format, p1) \
|
||||
CTR4(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1))
|
||||
|
||||
#define VMM_CTR2(vm, vcpuid, format, p1, p2) \
|
||||
CTR5(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1), (p2))
|
||||
|
||||
#define VMM_CTR3(vm, vcpuid, format, p1, p2, p3) \
|
||||
CTR6(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
|
||||
(p1), (p2), (p3))
|
||||
#endif
|
121
sys/amd64/vmm/vmm_lapic.c
Normal file
121
sys/amd64/vmm/vmm_lapic.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_ipi.h"
|
||||
#include "vmm_lapic.h"
|
||||
#include "vlapic.h"
|
||||
|
||||
int
|
||||
lapic_write(struct vm *vm, int cpu, u_int offset, uint64_t val)
|
||||
{
|
||||
int handled;
|
||||
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
if (vlapic_op_mem_write(vlapic, offset, DWORD, val) == 0)
|
||||
handled = 1;
|
||||
else
|
||||
handled = 0;
|
||||
|
||||
return (handled);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_read(struct vm *vm, int cpu, u_int offset, uint64_t *rv)
|
||||
{
|
||||
int handled;
|
||||
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
if (vlapic_op_mem_read(vlapic, offset, DWORD, rv) == 0)
|
||||
handled = 1;
|
||||
else
|
||||
handled = 0;
|
||||
|
||||
return (handled);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_pending_intr(struct vm *vm, int cpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
return (vlapic_pending_intr(vlapic));
|
||||
}
|
||||
|
||||
void
|
||||
lapic_intr_accepted(struct vm *vm, int cpu, int vector)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
vlapic_intr_accepted(vlapic, vector);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_set_intr(struct vm *vm, int cpu, int vector)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
if (cpu < 0 || cpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
if (vector < 32 || vector > 255)
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
vlapic_set_intr_ready(vlapic, vector);
|
||||
|
||||
vm_interrupt_hostcpu(vm, cpu);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
lapic_timer_tick(struct vm *vm, int cpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
|
||||
vlapic_timer_tick(vlapic);
|
||||
}
|
64
sys/amd64/vmm/vmm_lapic.h
Normal file
64
sys/amd64/vmm/vmm_lapic.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_LAPIC_H_
|
||||
#define _VMM_LAPIC_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
int lapic_write(struct vm *vm, int cpu, u_int offset, uint64_t val);
|
||||
int lapic_read(struct vm *vm, int cpu, u_int offset, uint64_t *retval);
|
||||
void lapic_timer_tick(struct vm *vm, int cpu);
|
||||
|
||||
/*
|
||||
* Returns a vector between 32 and 255 if an interrupt is pending in the
|
||||
* IRR that can be delivered based on the current state of ISR and TPR.
|
||||
*
|
||||
* Note that the vector does not automatically transition to the ISR as a
|
||||
* result of calling this function.
|
||||
*
|
||||
* Returns -1 if there is no eligible vector that can be delivered to the
|
||||
* guest at this time.
|
||||
*/
|
||||
int lapic_pending_intr(struct vm *vm, int cpu);
|
||||
|
||||
/*
|
||||
* Transition 'vector' from IRR to ISR. This function is called with the
|
||||
* vector returned by 'lapic_pending_intr()' when the guest is able to
|
||||
* accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
|
||||
* block interrupt delivery).
|
||||
*/
|
||||
void lapic_intr_accepted(struct vm *vm, int cpu, int vector);
|
||||
|
||||
/*
|
||||
* Signals to the LAPIC that an interrupt at 'vector' needs to be generated
|
||||
* to the 'cpu', the state is recorded in IRR.
|
||||
*/
|
||||
int lapic_set_intr(struct vm *vm, int cpu, int vector);
|
||||
|
||||
#endif
|
413
sys/amd64/vmm/vmm_mem.c
Normal file
413
sys/amd64/vmm/vmm_mem.c
Normal file
@ -0,0 +1,413 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/linker.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/metadata.h>
|
||||
#include <machine/pc/bios.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
#include "vmm_mem.h"
|
||||
|
||||
static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
|
||||
|
||||
#define MB (1024 * 1024)
|
||||
#define GB (1024 * MB)
|
||||
|
||||
#define VMM_MEM_MAXSEGS 64
|
||||
|
||||
/* protected by vmm_mem_mtx */
|
||||
static struct {
|
||||
vm_paddr_t base;
|
||||
vm_size_t length;
|
||||
} vmm_mem_avail[VMM_MEM_MAXSEGS];
|
||||
|
||||
static int vmm_mem_nsegs;
|
||||
|
||||
static vm_paddr_t maxaddr;
|
||||
|
||||
static struct mtx vmm_mem_mtx;
|
||||
|
||||
/*
|
||||
* Steal any memory that was deliberately hidden from FreeBSD either by
|
||||
* the use of MAXMEM kernel config option or the hw.physmem loader tunable.
|
||||
*/
|
||||
static int
|
||||
vmm_mem_steal_memory(void)
|
||||
{
|
||||
int nsegs;
|
||||
caddr_t kmdp;
|
||||
uint32_t smapsize;
|
||||
uint64_t base, length;
|
||||
struct bios_smap *smapbase, *smap, *smapend;
|
||||
|
||||
/*
|
||||
* Borrowed from hammer_time() and getmemsize() in machdep.c
|
||||
*/
|
||||
kmdp = preload_search_by_type("elf kernel");
|
||||
if (kmdp == NULL)
|
||||
kmdp = preload_search_by_type("elf64 kernel");
|
||||
|
||||
smapbase = (struct bios_smap *)preload_search_info(kmdp,
|
||||
MODINFO_METADATA | MODINFOMD_SMAP);
|
||||
if (smapbase == NULL)
|
||||
panic("No BIOS smap info from loader!");
|
||||
|
||||
smapsize = *((uint32_t *)smapbase - 1);
|
||||
smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
|
||||
|
||||
nsegs = 0;
|
||||
for (smap = smapbase; smap < smapend; smap++) {
|
||||
/*
|
||||
* XXX
|
||||
* Assuming non-overlapping, monotonically increasing
|
||||
* memory segments.
|
||||
*/
|
||||
if (smap->type != SMAP_TYPE_MEMORY)
|
||||
continue;
|
||||
if (smap->length == 0)
|
||||
break;
|
||||
|
||||
base = roundup(smap->base, NBPDR);
|
||||
length = rounddown(smap->length, NBPDR);
|
||||
|
||||
/* Skip this segment if FreeBSD is using all of it. */
|
||||
if (base + length <= ptoa(Maxmem))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If FreeBSD is using part of this segment then adjust
|
||||
* 'base' and 'length' accordingly.
|
||||
*/
|
||||
if (base < ptoa(Maxmem)) {
|
||||
uint64_t used;
|
||||
used = roundup(ptoa(Maxmem), NBPDR) - base;
|
||||
base += used;
|
||||
length -= used;
|
||||
}
|
||||
|
||||
if (length == 0)
|
||||
continue;
|
||||
|
||||
vmm_mem_avail[nsegs].base = base;
|
||||
vmm_mem_avail[nsegs].length = length;
|
||||
|
||||
if (base + length > maxaddr)
|
||||
maxaddr = base + length;
|
||||
|
||||
if (0 && bootverbose) {
|
||||
printf("vmm_mem_populate: index %d, base 0x%0lx, "
|
||||
"length %ld\n",
|
||||
nsegs, vmm_mem_avail[nsegs].base,
|
||||
vmm_mem_avail[nsegs].length);
|
||||
}
|
||||
|
||||
nsegs++;
|
||||
if (nsegs >= VMM_MEM_MAXSEGS) {
|
||||
printf("vmm_mem_populate: maximum number of vmm memory "
|
||||
"segments reached!\n");
|
||||
return (ENOSPC);
|
||||
}
|
||||
}
|
||||
|
||||
vmm_mem_nsegs = nsegs;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
|
||||
{
|
||||
vm_paddr_t addr, remaining;
|
||||
int pdpi, pdi, superpage_size;
|
||||
pml4_entry_t *pml4p;
|
||||
pdp_entry_t *pdp;
|
||||
pd_entry_t *pd;
|
||||
uint64_t page_attr_bits;
|
||||
|
||||
if (end >= NBPML4)
|
||||
panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
|
||||
|
||||
/* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */
|
||||
if (0 && vmm_supports_1G_pages())
|
||||
superpage_size = NBPDP;
|
||||
else
|
||||
superpage_size = NBPDR;
|
||||
|
||||
/*
|
||||
* Get the page directory pointer page that contains the direct
|
||||
* map address mappings.
|
||||
*/
|
||||
pml4p = kernel_pmap->pm_pml4;
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
|
||||
|
||||
page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
|
||||
addr = start;
|
||||
while (addr < end) {
|
||||
remaining = end - addr;
|
||||
pdpi = addr / NBPDP;
|
||||
if (superpage_size == NBPDP &&
|
||||
remaining >= NBPDP &&
|
||||
addr % NBPDP == 0) {
|
||||
/*
|
||||
* If there isn't a mapping for this address then
|
||||
* create one but if there is one already make sure
|
||||
* it matches what we expect it to be.
|
||||
*/
|
||||
if (pdp[pdpi] == 0) {
|
||||
pdp[pdpi] = addr | page_attr_bits;
|
||||
if (0 && bootverbose) {
|
||||
printf("vmm_mem_populate: mapping "
|
||||
"0x%lx with 1GB page at "
|
||||
"pdpi %d\n", addr, pdpi);
|
||||
}
|
||||
} else {
|
||||
pdp_entry_t pdpe = pdp[pdpi];
|
||||
if ((pdpe & ~PAGE_MASK) != addr ||
|
||||
(pdpe & page_attr_bits) != page_attr_bits) {
|
||||
panic("An invalid mapping 0x%016lx "
|
||||
"already exists for 0x%016lx\n",
|
||||
pdpe, addr);
|
||||
}
|
||||
}
|
||||
addr += NBPDP;
|
||||
} else {
|
||||
if (remaining < NBPDR) {
|
||||
panic("vmm_mem_populate: remaining (%ld) must "
|
||||
"be greater than NBPDR (%d)\n",
|
||||
remaining, NBPDR);
|
||||
}
|
||||
if (pdp[pdpi] == 0) {
|
||||
/*
|
||||
* XXX we lose this memory forever because
|
||||
* we do not keep track of the virtual address
|
||||
* that would be required to free this page.
|
||||
*/
|
||||
pd = malloc(PAGE_SIZE, M_VMM_MEM,
|
||||
M_WAITOK | M_ZERO);
|
||||
if ((uintptr_t)pd & PAGE_MASK) {
|
||||
panic("vmm_mem_populate: page directory"
|
||||
"page not aligned on %d "
|
||||
"boundary\n", PAGE_SIZE);
|
||||
}
|
||||
pdp[pdpi] = vtophys(pd);
|
||||
pdp[pdpi] |= PG_RW | PG_V | PG_U;
|
||||
if (0 && bootverbose) {
|
||||
printf("Creating page directory "
|
||||
"at pdp index %d for 0x%016lx\n",
|
||||
pdpi, addr);
|
||||
}
|
||||
}
|
||||
pdi = (addr % NBPDP) / NBPDR;
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
|
||||
|
||||
/*
|
||||
* Create a new mapping if one doesn't already exist
|
||||
* or validate it if it does.
|
||||
*/
|
||||
if (pd[pdi] == 0) {
|
||||
pd[pdi] = addr | page_attr_bits;
|
||||
if (0 && bootverbose) {
|
||||
printf("vmm_mem_populate: mapping "
|
||||
"0x%lx with 2MB page at "
|
||||
"pdpi %d, pdi %d\n",
|
||||
addr, pdpi, pdi);
|
||||
}
|
||||
} else {
|
||||
pd_entry_t pde = pd[pdi];
|
||||
if ((pde & ~PAGE_MASK) != addr ||
|
||||
(pde & page_attr_bits) != page_attr_bits) {
|
||||
panic("An invalid mapping 0x%016lx "
|
||||
"already exists for 0x%016lx\n",
|
||||
pde, addr);
|
||||
}
|
||||
}
|
||||
addr += NBPDR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmm_mem_populate(void)
|
||||
{
|
||||
int seg, error;
|
||||
vm_paddr_t start, end;
|
||||
|
||||
/* populate the vmm_mem_avail[] array */
|
||||
error = vmm_mem_steal_memory();
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Now map the memory that was hidden from FreeBSD in
|
||||
* the direct map VA space.
|
||||
*/
|
||||
for (seg = 0; seg < vmm_mem_nsegs; seg++) {
|
||||
start = vmm_mem_avail[seg].base;
|
||||
end = start + vmm_mem_avail[seg].length;
|
||||
if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
|
||||
panic("start (0x%016lx) and end (0x%016lx) must be "
|
||||
"aligned on a %dMB boundary\n",
|
||||
start, end, NBPDR / MB);
|
||||
}
|
||||
vmm_mem_direct_map(start, end);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vmm_mem_init(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
|
||||
|
||||
error = vmm_mem_populate();
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vmm_mem_alloc(size_t size)
|
||||
{
|
||||
int i;
|
||||
vm_paddr_t addr;
|
||||
|
||||
if ((size & PDRMASK) != 0) {
|
||||
panic("vmm_mem_alloc: size 0x%0lx must be "
|
||||
"aligned on a 0x%0x boundary\n", size, NBPDR);
|
||||
}
|
||||
|
||||
addr = 0;
|
||||
|
||||
mtx_lock(&vmm_mem_mtx);
|
||||
for (i = 0; i < vmm_mem_nsegs; i++) {
|
||||
if (vmm_mem_avail[i].length >= size) {
|
||||
addr = vmm_mem_avail[i].base;
|
||||
vmm_mem_avail[i].base += size;
|
||||
vmm_mem_avail[i].length -= size;
|
||||
/* remove a zero length segment */
|
||||
if (vmm_mem_avail[i].length == 0) {
|
||||
memmove(&vmm_mem_avail[i],
|
||||
&vmm_mem_avail[i + 1],
|
||||
(vmm_mem_nsegs - (i + 1)) *
|
||||
sizeof(vmm_mem_avail[0]));
|
||||
vmm_mem_nsegs--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
mtx_unlock(&vmm_mem_mtx);
|
||||
|
||||
return (addr);
|
||||
}
|
||||
|
||||
void
|
||||
vmm_mem_free(vm_paddr_t base, size_t length)
|
||||
{
|
||||
int i;
|
||||
|
||||
if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
|
||||
panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
|
||||
"aligned on a 0x%0x boundary\n", base, length, NBPDR);
|
||||
}
|
||||
|
||||
mtx_lock(&vmm_mem_mtx);
|
||||
|
||||
for (i = 0; i < vmm_mem_nsegs; i++) {
|
||||
if (vmm_mem_avail[i].base > base)
|
||||
break;
|
||||
}
|
||||
|
||||
if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
|
||||
panic("vmm_mem_free: cannot free any more segments");
|
||||
|
||||
/* Create a new segment at index 'i' */
|
||||
memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
|
||||
(vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
|
||||
|
||||
vmm_mem_avail[i].base = base;
|
||||
vmm_mem_avail[i].length = length;
|
||||
|
||||
vmm_mem_nsegs++;
|
||||
|
||||
coalesce_some_more:
|
||||
for (i = 0; i < vmm_mem_nsegs - 1; i++) {
|
||||
if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
|
||||
vmm_mem_avail[i + 1].base) {
|
||||
vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
|
||||
memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
|
||||
(vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
|
||||
vmm_mem_nsegs--;
|
||||
goto coalesce_some_more;
|
||||
}
|
||||
}
|
||||
|
||||
mtx_unlock(&vmm_mem_mtx);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vmm_mem_maxaddr(void)
|
||||
{
|
||||
|
||||
return (maxaddr);
|
||||
}
|
||||
|
||||
void
|
||||
vmm_mem_dump(void)
|
||||
{
|
||||
int i;
|
||||
vm_paddr_t base;
|
||||
vm_size_t length;
|
||||
|
||||
mtx_lock(&vmm_mem_mtx);
|
||||
for (i = 0; i < vmm_mem_nsegs; i++) {
|
||||
base = vmm_mem_avail[i].base;
|
||||
length = vmm_mem_avail[i].length;
|
||||
printf("%-4d0x%016lx 0x%016lx\n", i, base, base + length);
|
||||
}
|
||||
mtx_unlock(&vmm_mem_mtx);
|
||||
}
|
38
sys/amd64/vmm/vmm_mem.h
Normal file
38
sys/amd64/vmm/vmm_mem.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_MEM_H_
|
||||
#define _VMM_MEM_H_
|
||||
|
||||
int vmm_mem_init(void);
|
||||
vm_paddr_t vmm_mem_alloc(size_t size);
|
||||
void vmm_mem_free(vm_paddr_t start, size_t size);
|
||||
vm_paddr_t vmm_mem_maxaddr(void);
|
||||
void vmm_mem_dump(void);
|
||||
|
||||
#endif
|
264
sys/amd64/vmm/vmm_msr.c
Normal file
264
sys/amd64/vmm/vmm_msr.c
Normal file
@ -0,0 +1,264 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
#include <machine/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_lapic.h"
|
||||
#include "vmm_msr.h"
|
||||
|
||||
#define VMM_MSR_F_EMULATE 0x01
|
||||
#define VMM_MSR_F_READONLY 0x02
|
||||
|
||||
struct vmm_msr {
|
||||
int num;
|
||||
int flags;
|
||||
uint64_t hostval;
|
||||
};
|
||||
|
||||
static struct vmm_msr vmm_msr[] = {
|
||||
{ MSR_LSTAR, 0 },
|
||||
{ MSR_CSTAR, 0 },
|
||||
{ MSR_STAR, 0 },
|
||||
{ MSR_SF_MASK, 0 },
|
||||
{ MSR_APICBASE, VMM_MSR_F_EMULATE },
|
||||
{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
|
||||
{ MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
|
||||
};
|
||||
|
||||
#define vmm_msr_num (sizeof(vmm_msr) / sizeof(vmm_msr[0]))
|
||||
CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
|
||||
|
||||
#define readonly_msr(idx) \
|
||||
((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
|
||||
|
||||
#define emulated_msr(idx) \
|
||||
((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
|
||||
|
||||
void
|
||||
vmm_msr_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
/*
|
||||
* XXX this assumes that the value of the host msr does not
|
||||
* change after we have cached it.
|
||||
*/
|
||||
vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
guest_msrs_init(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
switch (vmm_msr[i].num) {
|
||||
case MSR_LSTAR:
|
||||
case MSR_CSTAR:
|
||||
case MSR_STAR:
|
||||
case MSR_SF_MASK:
|
||||
case MSR_BIOS_SIGN:
|
||||
case MSR_MCG_CAP:
|
||||
guest_msrs[i] = 0;
|
||||
break;
|
||||
case MSR_APICBASE:
|
||||
guest_msrs[i] = DEFAULT_APIC_BASE | APICBASE_ENABLED |
|
||||
APICBASE_X2APIC;
|
||||
if (cpu == 0)
|
||||
guest_msrs[i] |= APICBASE_BSP;
|
||||
break;
|
||||
default:
|
||||
panic("guest_msrs_init: missing initialization for msr "
|
||||
"0x%0x", vmm_msr[i].num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
x2apic_msr(u_int num)
|
||||
{
|
||||
|
||||
if (num >= 0x800 && num <= 0xBFF)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
static u_int
|
||||
x2apic_msr_to_regoff(u_int msr)
|
||||
{
|
||||
|
||||
return ((msr - 0x800) << 4);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
x2apic_msr_id(u_int num)
|
||||
{
|
||||
return (num == 0x802);
|
||||
}
|
||||
|
||||
static int
|
||||
msr_num_to_idx(u_int num)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++)
|
||||
if (vmm_msr[i].num == num)
|
||||
return (i);
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
|
||||
{
|
||||
int handled, idx;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
handled = 0;
|
||||
|
||||
if (x2apic_msr(num))
|
||||
return (lapic_write(vm, cpu, x2apic_msr_to_regoff(num), val));
|
||||
|
||||
idx = msr_num_to_idx(num);
|
||||
if (idx < 0)
|
||||
goto done;
|
||||
|
||||
if (!readonly_msr(idx)) {
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
/* Stash the value */
|
||||
guest_msrs[idx] = val;
|
||||
|
||||
/* Update processor state for non-emulated MSRs */
|
||||
if (!emulated_msr(idx))
|
||||
wrmsr(vmm_msr[idx].num, val);
|
||||
}
|
||||
|
||||
handled = 1;
|
||||
done:
|
||||
return (handled);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_rdmsr(struct vm *vm, int cpu, u_int num)
|
||||
{
|
||||
int error, handled, idx;
|
||||
uint32_t eax, edx;
|
||||
uint64_t result, *guest_msrs;
|
||||
|
||||
handled = 0;
|
||||
|
||||
if (x2apic_msr(num)) {
|
||||
handled = lapic_read(vm, cpu, x2apic_msr_to_regoff(num),
|
||||
&result);
|
||||
/*
|
||||
* The version ID needs to be massaged
|
||||
*/
|
||||
if (x2apic_msr_id(num)) {
|
||||
result = result >> 24;
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
|
||||
idx = msr_num_to_idx(num);
|
||||
if (idx < 0)
|
||||
goto done;
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
result = guest_msrs[idx];
|
||||
|
||||
/*
|
||||
* If this is not an emulated msr register make sure that the processor
|
||||
* state matches our cached state.
|
||||
*/
|
||||
if (!emulated_msr(idx) && (rdmsr(num) != result)) {
|
||||
panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
|
||||
"(0x%016lx) and actual (0x%016lx) values", num,
|
||||
result, rdmsr(num));
|
||||
}
|
||||
|
||||
handled = 1;
|
||||
|
||||
done:
|
||||
if (handled) {
|
||||
eax = result;
|
||||
edx = result >> 32;
|
||||
error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
|
||||
if (error)
|
||||
panic("vm_set_register(rax) error %d", error);
|
||||
error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
|
||||
if (error)
|
||||
panic("vm_set_register(rdx) error %d", error);
|
||||
}
|
||||
return (handled);
|
||||
}
|
||||
|
||||
void
|
||||
restore_guest_msrs(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
guest_msrs = vm_guest_msrs(vm, cpu);
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
else
|
||||
wrmsr(vmm_msr[i].num, guest_msrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
restore_host_msrs(struct vm *vm, int cpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vmm_msr_num; i++) {
|
||||
if (emulated_msr(i))
|
||||
continue;
|
||||
else
|
||||
wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
|
||||
}
|
||||
}
|
42
sys/amd64/vmm/vmm_msr.h
Normal file
42
sys/amd64/vmm/vmm_msr.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_MSR_H_
|
||||
#define _VMM_MSR_H_
|
||||
|
||||
#define VMM_MSR_NUM 16
|
||||
struct vm;
|
||||
|
||||
void vmm_msr_init(void);
|
||||
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
|
||||
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
|
||||
void guest_msrs_init(struct vm *vm, int cpu);
|
||||
void restore_host_msrs(struct vm *vm, int cpu);
|
||||
void restore_guest_msrs(struct vm *vm, int cpu);
|
||||
|
||||
#endif
|
103
sys/amd64/vmm/vmm_stat.c
Normal file
103
sys/amd64/vmm/vmm_stat.c
Normal file
@ -0,0 +1,103 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_stat.h"
|
||||
|
||||
static int vstnum;
|
||||
static struct vmm_stat_type *vsttab[MAX_VMM_STAT_TYPES];
|
||||
|
||||
static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
|
||||
|
||||
void
|
||||
vmm_stat_init(void *arg)
|
||||
{
|
||||
struct vmm_stat_type *vst = arg;
|
||||
|
||||
/* We require all stats to identify themselves with a description */
|
||||
if (vst->desc == NULL)
|
||||
return;
|
||||
|
||||
if (vstnum >= MAX_VMM_STAT_TYPES) {
|
||||
printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
|
||||
return;
|
||||
}
|
||||
|
||||
vst->index = vstnum;
|
||||
vsttab[vstnum++] = vst;
|
||||
}
|
||||
|
||||
int
|
||||
vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
|
||||
{
|
||||
int i;
|
||||
uint64_t *stats;
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
stats = vcpu_stats(vm, vcpu);
|
||||
for (i = 0; i < vstnum; i++)
|
||||
buf[i] = stats[i];
|
||||
*num_stats = vstnum;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void *
|
||||
vmm_stat_alloc(void)
|
||||
{
|
||||
u_long size;
|
||||
|
||||
size = vstnum * sizeof(uint64_t);
|
||||
|
||||
return (malloc(size, M_VMM_STAT, M_ZERO | M_WAITOK));
|
||||
}
|
||||
|
||||
void
|
||||
vmm_stat_free(void *vp)
|
||||
{
|
||||
free(vp, M_VMM_STAT);
|
||||
}
|
||||
|
||||
const char *
|
||||
vmm_stat_desc(int index)
|
||||
{
|
||||
|
||||
if (index >= 0 && index < vstnum)
|
||||
return (vsttab[index]->desc);
|
||||
else
|
||||
return (NULL);
|
||||
}
|
71
sys/amd64/vmm/vmm_stat.h
Normal file
71
sys/amd64/vmm/vmm_stat.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_STAT_H_
|
||||
#define _VMM_STAT_H_
|
||||
|
||||
struct vm;
|
||||
|
||||
#define MAX_VMM_STAT_TYPES 64 /* arbitrary */
|
||||
|
||||
struct vmm_stat_type {
|
||||
const char *desc; /* description of statistic */
|
||||
int index; /* position in the stats buffer */
|
||||
};
|
||||
|
||||
void vmm_stat_init(void *arg);
|
||||
|
||||
#define VMM_STAT_DEFINE(type, desc) \
|
||||
struct vmm_stat_type type[1] = { \
|
||||
{ desc, -1 } \
|
||||
}; \
|
||||
SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type)
|
||||
|
||||
void *vmm_stat_alloc(void);
|
||||
void vmm_stat_free(void *vp);
|
||||
|
||||
/*
|
||||
* 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
|
||||
*/
|
||||
int vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
|
||||
const char *vmm_stat_desc(int index);
|
||||
|
||||
static void __inline
|
||||
vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
|
||||
{
|
||||
#ifdef VMM_KEEP_STATS
|
||||
uint64_t *stats = vcpu_stats(vm, vcpu);
|
||||
if (vst->index >= 0)
|
||||
stats[vst->index] += x;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
42
sys/amd64/vmm/vmm_support.S
Normal file
42
sys/amd64/vmm/vmm_support.S
Normal file
@ -0,0 +1,42 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#define LOCORE
|
||||
|
||||
#include <machine/asmacros.h>
|
||||
|
||||
#define LA_EOI 0xB0
|
||||
|
||||
.text
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(justreturn)
|
||||
pushq %rax
|
||||
movq lapic, %rax
|
||||
movl $0, LA_EOI(%rax)
|
||||
popq %rax
|
||||
iretq
|
111
sys/amd64/vmm/vmm_util.c
Normal file
111
sys/amd64/vmm/vmm_util.c
Normal file
@ -0,0 +1,111 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/libkern.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
|
||||
boolean_t
|
||||
vmm_is_intel(void)
|
||||
{
|
||||
|
||||
if (strcmp(cpu_vendor, "GenuineIntel") == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vmm_is_amd(void)
|
||||
{
|
||||
if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vmm_supports_1G_pages(void)
|
||||
{
|
||||
unsigned int regs[4];
|
||||
|
||||
/*
|
||||
* CPUID.80000001:EDX[bit 26] = 1 indicates support for 1GB pages
|
||||
*
|
||||
* Both Intel and AMD support this bit.
|
||||
*/
|
||||
if (cpu_exthigh >= 0x80000001) {
|
||||
do_cpuid(0x80000001, regs);
|
||||
if (regs[3] & (1 << 26))
|
||||
return (TRUE);
|
||||
}
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
#include <sys/proc.h>
|
||||
#include <machine/frame.h>
|
||||
#define DUMP_REG(x) printf(#x "\t\t0x%016lx\n", (long)(tf->tf_ ## x))
|
||||
#define DUMP_SEG(x) printf(#x "\t\t0x%04x\n", (unsigned)(tf->tf_ ## x))
|
||||
void
|
||||
dump_trapframe(struct trapframe *tf)
|
||||
{
|
||||
DUMP_REG(rdi);
|
||||
DUMP_REG(rsi);
|
||||
DUMP_REG(rdx);
|
||||
DUMP_REG(rcx);
|
||||
DUMP_REG(r8);
|
||||
DUMP_REG(r9);
|
||||
DUMP_REG(rax);
|
||||
DUMP_REG(rbx);
|
||||
DUMP_REG(rbp);
|
||||
DUMP_REG(r10);
|
||||
DUMP_REG(r11);
|
||||
DUMP_REG(r12);
|
||||
DUMP_REG(r13);
|
||||
DUMP_REG(r14);
|
||||
DUMP_REG(r15);
|
||||
DUMP_REG(trapno);
|
||||
DUMP_REG(addr);
|
||||
DUMP_REG(flags);
|
||||
DUMP_REG(err);
|
||||
DUMP_REG(rip);
|
||||
DUMP_REG(rflags);
|
||||
DUMP_REG(rsp);
|
||||
DUMP_SEG(cs);
|
||||
DUMP_SEG(ss);
|
||||
DUMP_SEG(fs);
|
||||
DUMP_SEG(gs);
|
||||
DUMP_SEG(es);
|
||||
DUMP_SEG(ds);
|
||||
}
|
40
sys/amd64/vmm/vmm_util.h
Normal file
40
sys/amd64/vmm/vmm_util.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_UTIL_H_
|
||||
#define _VMM_UTIL_H_
|
||||
|
||||
struct trapframe;
|
||||
|
||||
boolean_t vmm_is_intel(void);
|
||||
boolean_t vmm_is_amd(void);
|
||||
boolean_t vmm_supports_1G_pages(void);
|
||||
|
||||
void dump_trapframe(struct trapframe *tf);
|
||||
|
||||
#endif
|
113
sys/amd64/vmm/x86.c
Normal file
113
sys/amd64/vmm/x86.c
Normal file
@ -0,0 +1,113 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include "x86.h"
|
||||
|
||||
int
|
||||
x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
|
||||
{
|
||||
unsigned int func, regs[4];
|
||||
|
||||
func = *eax;
|
||||
|
||||
cpuid_count(*eax, *ecx, regs);
|
||||
|
||||
switch(func) {
|
||||
case CPUID_0000_0000:
|
||||
case CPUID_0000_0002:
|
||||
case CPUID_0000_0003:
|
||||
case CPUID_0000_0004:
|
||||
case CPUID_0000_000A:
|
||||
break;
|
||||
|
||||
case CPUID_8000_0000:
|
||||
case CPUID_8000_0001:
|
||||
case CPUID_8000_0002:
|
||||
case CPUID_8000_0003:
|
||||
case CPUID_8000_0004:
|
||||
case CPUID_8000_0006:
|
||||
case CPUID_8000_0007:
|
||||
case CPUID_8000_0008:
|
||||
|
||||
break;
|
||||
|
||||
case CPUID_0000_0001:
|
||||
/*
|
||||
* Override the APIC ID only in ebx
|
||||
*/
|
||||
regs[1] &= ~(CPUID_0000_0001_APICID_MASK);
|
||||
/*
|
||||
* XXX fixme for MP case, set apicid properly for cpu.
|
||||
*/
|
||||
regs[1] |= (0 << CPUID_0000_0001_APICID_SHIFT);
|
||||
|
||||
/*
|
||||
* Don't expose VMX capability.
|
||||
* Advertise x2APIC capability.
|
||||
*/
|
||||
regs[2] &= ~CPUID_0000_0001_FEAT0_VMX;
|
||||
regs[2] |= CPUID2_X2APIC;
|
||||
|
||||
/*
|
||||
* Machine check handling is done in the host.
|
||||
* Hide MTRR capability.
|
||||
*/
|
||||
regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
|
||||
|
||||
break;
|
||||
|
||||
case CPUID_0000_000B:
|
||||
/*
|
||||
* XXXSMP fixme
|
||||
* Processor topology enumeration
|
||||
*/
|
||||
regs[0] = 0;
|
||||
regs[1] = 0;
|
||||
regs[2] = *ecx & 0xff;
|
||||
regs[3] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return (0);
|
||||
}
|
||||
|
||||
*eax = regs[0];
|
||||
*ebx = regs[1];
|
||||
*ecx = regs[2];
|
||||
*edx = regs[3];
|
||||
return (1);
|
||||
}
|
||||
|
62
sys/amd64/vmm/x86.h
Normal file
62
sys/amd64/vmm/x86.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _X86_H_
|
||||
#define _X86_H_
|
||||
|
||||
#define CPUID_0000_0000 (0x0)
|
||||
#define CPUID_0000_0001 (0x1)
|
||||
#define CPUID_0000_0002 (0x2)
|
||||
#define CPUID_0000_0003 (0x3)
|
||||
#define CPUID_0000_0004 (0x4)
|
||||
#define CPUID_0000_000A (0xA)
|
||||
#define CPUID_0000_000B (0xB)
|
||||
#define CPUID_8000_0000 (0x80000000)
|
||||
#define CPUID_8000_0001 (0x80000001)
|
||||
#define CPUID_8000_0002 (0x80000002)
|
||||
#define CPUID_8000_0003 (0x80000003)
|
||||
#define CPUID_8000_0004 (0x80000004)
|
||||
#define CPUID_8000_0006 (0x80000006)
|
||||
#define CPUID_8000_0007 (0x80000007)
|
||||
#define CPUID_8000_0008 (0x80000008)
|
||||
|
||||
/*
|
||||
* CPUID instruction Fn0000_0001:
|
||||
*/
|
||||
#define CPUID_0000_0001_APICID_MASK (0xff<<24)
|
||||
#define CPUID_0000_0001_APICID_SHIFT 24
|
||||
|
||||
/*
|
||||
* CPUID instruction Fn0000_0001 ECX
|
||||
*/
|
||||
#define CPUID_0000_0001_FEAT0_VMX (1<<5)
|
||||
|
||||
int x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
|
||||
uint32_t *edx);
|
||||
|
||||
#endif
|
@ -290,6 +290,7 @@ SUBDIR= ${_3dfx} \
|
||||
${_vesa} \
|
||||
vge \
|
||||
vkbd \
|
||||
${_vmm} \
|
||||
${_vpo} \
|
||||
vr \
|
||||
vx \
|
||||
@ -557,6 +558,7 @@ _sppp= sppp
|
||||
_tmpfs= tmpfs
|
||||
_twa= twa
|
||||
_vesa= vesa
|
||||
_vmm= vmm
|
||||
_x86bios= x86bios
|
||||
_wi= wi
|
||||
_wpi= wpi
|
||||
|
66
sys/modules/vmm/Makefile
Normal file
66
sys/modules/vmm/Makefile
Normal file
@ -0,0 +1,66 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# *REQUIRES* binutils 2.20.1 for VT-x instructions
|
||||
AS= /usr/local/bin/as
|
||||
LD= /usr/local/bin/ld
|
||||
CFLAGS+= -B /usr/local/bin
|
||||
|
||||
KMOD= vmm
|
||||
|
||||
SRCS= device_if.h bus_if.h pci_if.h
|
||||
|
||||
CFLAGS+= -DVMM_KEEP_STATS
|
||||
CFLAGS+= -DOLD_BINUTILS
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/io
|
||||
CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel
|
||||
|
||||
# generic vmm support
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm
|
||||
SRCS+= vmm.c \
|
||||
vmm_dev.c \
|
||||
vmm_ipi.c \
|
||||
vmm_lapic.c \
|
||||
vmm_mem.c \
|
||||
vmm_msr.c \
|
||||
vmm_stat.c \
|
||||
vmm_util.c \
|
||||
x86.c \
|
||||
vmm_support.S
|
||||
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/io
|
||||
SRCS+= iommu.c \
|
||||
ppt.c \
|
||||
vdev.c \
|
||||
vlapic.c
|
||||
|
||||
# intel-specific files
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/intel
|
||||
SRCS+= ept.c \
|
||||
vmcs.c \
|
||||
vmx_msr.c \
|
||||
vmx.c \
|
||||
vtd.c
|
||||
|
||||
# amd-specific files
|
||||
.PATH: ${.CURDIR}/../../amd64/vmm/amd
|
||||
SRCS+= amdv.c
|
||||
|
||||
OBJS= vmx_support.o
|
||||
|
||||
CLEANFILES= vmx_assym.s vmx_genassym.o
|
||||
|
||||
vmx_assym.s: vmx_genassym.o
|
||||
.if exists(@)
|
||||
vmx_assym.s: @/kern/genassym.sh
|
||||
.endif
|
||||
sh @/kern/genassym.sh vmx_genassym.o > ${.TARGET}
|
||||
|
||||
vmx_support.o: vmx_support.S vmx_assym.s
|
||||
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
|
||||
${.IMPSRC} -o ${.TARGET}
|
||||
|
||||
vmx_genassym.o: vmx_genassym.c @ machine
|
||||
${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC}
|
||||
|
||||
.include <bsd.kmod.mk>
|
@ -19,6 +19,7 @@ SUBDIR= ${_ac} \
|
||||
${_auditd} \
|
||||
${_auditreduce} \
|
||||
${_authpf} \
|
||||
${_bhyve} \
|
||||
${_bluetooth} \
|
||||
${_boot0cfg} \
|
||||
${_boot98cfg} \
|
||||
@ -194,6 +195,7 @@ SUBDIR= ${_ac} \
|
||||
${_usbdevs} \
|
||||
${_usbconfig} \
|
||||
${_vidcontrol} \
|
||||
${_vmmctl} \
|
||||
vipw \
|
||||
wake \
|
||||
watch \
|
||||
@ -477,6 +479,7 @@ _boot98cfg= boot98cfg
|
||||
_acpi= acpi
|
||||
.endif
|
||||
_asf= asf
|
||||
_bhyve= bhyve
|
||||
_boot0cfg= boot0cfg
|
||||
.if ${MK_TOOLCHAIN} != "no"
|
||||
_btxld= btxld
|
||||
@ -494,6 +497,7 @@ _ndiscvt= ndiscvt
|
||||
.endif
|
||||
_sicontrol= sicontrol
|
||||
_spkrtest= spkrtest
|
||||
_vmmctl= vmmctl
|
||||
_zzz= zzz
|
||||
.endif
|
||||
|
||||
|
18
usr.sbin/bhyve/Makefile
Normal file
18
usr.sbin/bhyve/Makefile
Normal file
@ -0,0 +1,18 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= bhyve
|
||||
|
||||
SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c mevent.c
|
||||
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
|
||||
SRCS+= pci_virtio_net.c pit_8254.c post.c rtc.c uart.c xmsr.c
|
||||
|
||||
NO_MAN=
|
||||
|
||||
DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD}
|
||||
LDADD= -lvmmapi -lmd -lpthread
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../sys
|
||||
|
||||
.include <bsd.prog.mk>
|
68
usr.sbin/bhyve/atpic.c
Normal file
68
usr.sbin/bhyve/atpic.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* FreeBSD only writes to the 8259 interrupt controllers to put them in a
|
||||
* shutdown state.
|
||||
*
|
||||
* So, we just ignore the writes.
|
||||
*/
|
||||
|
||||
#define IO_ICU1 0x20
|
||||
#define IO_ICU2 0xA0
|
||||
#define ICU_IMR_OFFSET 1
|
||||
|
||||
static int
|
||||
atpic_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
if (in)
|
||||
return (-1);
|
||||
|
||||
/* Pretend all writes to the 8259 are alright */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(atpic, IO_ICU1, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU1 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
121
usr.sbin/bhyve/consport.c
Normal file
121
usr.sbin/bhyve/consport.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define BVM_CONSOLE_PORT 0x220
|
||||
|
||||
static struct termios tio_orig, tio_new;
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(void)
|
||||
{
|
||||
tcgetattr(STDIN_FILENO, &tio_orig);
|
||||
|
||||
cfmakeraw(&tio_new);
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
|
||||
|
||||
atexit(ttyclose);
|
||||
}
|
||||
|
||||
static bool
|
||||
tty_char_available(void)
|
||||
{
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(STDIN_FILENO, &rfds);
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
|
||||
return (true);
|
||||
} else {
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(void)
|
||||
{
|
||||
char rb;
|
||||
|
||||
if (tty_char_available()) {
|
||||
read(STDIN_FILENO, &rb, 1);
|
||||
return (rb & 0xff);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(unsigned char wb)
|
||||
{
|
||||
(void) write(STDOUT_FILENO, &wb, 1);
|
||||
}
|
||||
|
||||
static int
|
||||
console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
static int opened;
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
if (!opened) {
|
||||
ttyopen();
|
||||
opened = 1;
|
||||
}
|
||||
|
||||
if (in)
|
||||
*eax = ttyread();
|
||||
else
|
||||
ttywrite(*eax);
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(console, BVM_CONSOLE_PORT, IOPORT_F_INOUT, console_handler);
|
124
usr.sbin/bhyve/dbgport.c
Normal file
124
usr.sbin/bhyve/dbgport.c
Normal file
@ -0,0 +1,124 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define BVM_DBG_PORT 0x224
|
||||
|
||||
static int listen_fd, conn_fd;
|
||||
|
||||
static struct sockaddr_in sin;
|
||||
|
||||
void
|
||||
init_dbgport(int sport)
|
||||
{
|
||||
conn_fd = -1;
|
||||
|
||||
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(sport);
|
||||
|
||||
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(listen_fd, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
char ch;
|
||||
int nwritten, nread, printonce;
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
again:
|
||||
printonce = 0;
|
||||
while (conn_fd < 0) {
|
||||
if (!printonce) {
|
||||
printf("Waiting for connection from gdb\r\n");
|
||||
printonce = 1;
|
||||
}
|
||||
conn_fd = accept(listen_fd, NULL, NULL);
|
||||
if (conn_fd >= 0)
|
||||
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
|
||||
else if (errno != EINTR)
|
||||
perror("accept");
|
||||
}
|
||||
|
||||
if (in) {
|
||||
nread = read(conn_fd, &ch, 1);
|
||||
if (nread == -1 && errno == EAGAIN)
|
||||
*eax = -1;
|
||||
else if (nread == 1)
|
||||
*eax = ch;
|
||||
else {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
ch = *eax;
|
||||
nwritten = write(conn_fd, &ch, 1);
|
||||
if (nwritten != 1) {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(dbg, BVM_DBG_PORT, IOPORT_F_INOUT, dbg_handler);
|
36
usr.sbin/bhyve/dbgport.h
Normal file
36
usr.sbin/bhyve/dbgport.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _DBGPORT_H_
|
||||
#define _DBGPORT_H_
|
||||
|
||||
#define DEFAULT_GDB_PORT 6466
|
||||
|
||||
void init_dbgport(int port);
|
||||
|
||||
#endif
|
65
usr.sbin/bhyve/elcr.c
Normal file
65
usr.sbin/bhyve/elcr.c
Normal file
@ -0,0 +1,65 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* EISA interrupt Level Control Register.
|
||||
*
|
||||
* This is a 16-bit register with one bit for each of the IRQ0 through IRQ15.
|
||||
* A level triggered irq is indicated by setting the corresponding bit to '1'.
|
||||
*/
|
||||
#define ELCR_PORT 0x4d0
|
||||
|
||||
static uint8_t elcr[2] = { 0x00, 0x00 };
|
||||
|
||||
static int
|
||||
elcr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
idx = port - ELCR_PORT;
|
||||
|
||||
if (in)
|
||||
*eax = elcr[idx];
|
||||
else
|
||||
elcr[idx] = *eax;
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(elcr, ELCR_PORT + 0, IOPORT_F_INOUT, elcr_handler);
|
||||
INOUT_PORT(elcr, ELCR_PORT + 1, IOPORT_F_INOUT, elcr_handler);
|
650
usr.sbin/bhyve/fbsdrun.c
Normal file
650
usr.sbin/bhyve/fbsdrun.c
Normal file
@ -0,0 +1,650 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/segments.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <libgen.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
#include "mevent.h"
|
||||
#include "pci_emul.h"
|
||||
#include "xmsr.h"
|
||||
|
||||
#define DEFAULT_GUEST_HZ 100
|
||||
#define DEFAULT_GUEST_TSLICE 200
|
||||
|
||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
||||
|
||||
#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */
|
||||
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
|
||||
#define VMEXIT_RESTART 2 /* restart current instruction */
|
||||
#define VMEXIT_ABORT 3 /* abort the vm run loop */
|
||||
#define VMEXIT_RESET 4 /* guest machine has reset */
|
||||
|
||||
#define MB (1024UL * 1024)
|
||||
#define GB (1024UL * MB)
|
||||
|
||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
|
||||
int guest_tslice = DEFAULT_GUEST_TSLICE;
|
||||
int guest_hz = DEFAULT_GUEST_HZ;
|
||||
char *vmname;
|
||||
|
||||
u_long lomem_sz;
|
||||
u_long himem_sz;
|
||||
|
||||
int guest_ncpus;
|
||||
|
||||
static int pincpu = -1;
|
||||
static int guest_vcpu_mux;
|
||||
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
|
||||
|
||||
static int foundcpus;
|
||||
|
||||
static char *lomem_addr;
|
||||
static char *himem_addr;
|
||||
|
||||
static char *progname;
|
||||
static const int BSP = 0;
|
||||
|
||||
static int cpumask;
|
||||
|
||||
static void *oem_tbl_start;
|
||||
static int oem_tbl_size;
|
||||
|
||||
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
|
||||
|
||||
struct vm_exit vmexit[VM_MAXCPU];
|
||||
|
||||
struct fbsdstats {
|
||||
uint64_t vmexit_bogus;
|
||||
uint64_t vmexit_bogus_switch;
|
||||
uint64_t vmexit_hlt;
|
||||
uint64_t vmexit_pause;
|
||||
uint64_t vmexit_mtrap;
|
||||
uint64_t cpu_switch_rotate;
|
||||
uint64_t cpu_switch_direct;
|
||||
int io_reset;
|
||||
} stats;
|
||||
|
||||
struct mt_vmm_info {
|
||||
pthread_t mt_thr;
|
||||
struct vmctx *mt_ctx;
|
||||
int mt_vcpu;
|
||||
} mt_vmm_info[VM_MAXCPU];
|
||||
|
||||
static void
|
||||
usage(int code)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-hBHP][-g <gdb port>][-z <hz>][-s <pci>][-p pincpu]"
|
||||
"[-n <pci>][-m lowmem][-M highmem] <vm>\n"
|
||||
" -g: gdb port (default is %d and 0 means don't open)\n"
|
||||
" -c: # cpus (default 1)\n"
|
||||
" -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
|
||||
" -B: inject breakpoint exception on vm entry\n"
|
||||
" -H: vmexit from the guest on hlt\n"
|
||||
" -P: vmexit from the guest on pause\n"
|
||||
" -h: help\n"
|
||||
" -z: guest hz (default is %d)\n"
|
||||
" -s: <slot,driver,configinfo> PCI slot config\n"
|
||||
" -n: <slot,name> PCI slot naming\n"
|
||||
" -m: lowmem in MB\n"
|
||||
" -M: highmem in MB\n"
|
||||
" -x: mux vcpus to 1 hcpu\n"
|
||||
" -t: mux vcpu timeslice hz (default %d)\n",
|
||||
progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
|
||||
DEFAULT_GUEST_TSLICE);
|
||||
exit(code);
|
||||
}
|
||||
|
||||
void *
|
||||
paddr_guest2host(uintptr_t gaddr)
|
||||
{
|
||||
if (lomem_sz == 0)
|
||||
return (NULL);
|
||||
|
||||
if (gaddr < lomem_sz) {
|
||||
return ((void *)(lomem_addr + gaddr));
|
||||
} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
|
||||
return ((void *)(himem_addr + gaddr - 4*GB));
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_add_oemtbl(void *tbl, int tblsz)
|
||||
{
|
||||
oem_tbl_start = tbl;
|
||||
oem_tbl_size = tblsz;
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_pause(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_pause);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_hlt(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_hlt);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_muxed(void)
|
||||
{
|
||||
|
||||
return (guest_vcpu_mux);
|
||||
}
|
||||
|
||||
void *
|
||||
fbsdrun_start_thread(void *param)
|
||||
{
|
||||
int vcpu;
|
||||
struct mt_vmm_info *mtp = param;
|
||||
|
||||
vcpu = mtp->mt_vcpu;
|
||||
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
|
||||
|
||||
/* not reached */
|
||||
exit(1);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (cpumask & (1 << vcpu)) {
|
||||
printf("addcpu: attempting to add existing cpu %d\n", vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cpumask |= 1 << vcpu;
|
||||
foundcpus++;
|
||||
|
||||
/*
|
||||
* Set up the vmexit struct to allow execution to start
|
||||
* at the given RIP
|
||||
*/
|
||||
vmexit[vcpu].rip = rip;
|
||||
vmexit[vcpu].inst_length = 0;
|
||||
|
||||
if (vcpu == BSP || !guest_vcpu_mux){
|
||||
mt_vmm_info[vcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[vcpu].mt_vcpu = vcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fbsdrun_get_next_cpu(int curcpu)
|
||||
{
|
||||
|
||||
/*
|
||||
* Get the next available CPU. Assumes they arrive
|
||||
* in ascending order with no gaps.
|
||||
*/
|
||||
return ((curcpu + 1) % foundcpus);
|
||||
}
|
||||
|
||||
int
|
||||
vmexit_catch_reset(void)
|
||||
{
|
||||
stats.io_reset++;
|
||||
return (VMEXIT_RESET);
|
||||
}
|
||||
|
||||
int
|
||||
vmexit_catch_inout(void)
|
||||
{
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
int
|
||||
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
|
||||
uint32_t eax)
|
||||
{
|
||||
#if PG_DEBUG /* put all types of debug here */
|
||||
if (eax == 0) {
|
||||
pause_noswitch = 1;
|
||||
} else if (eax == 1) {
|
||||
pause_noswitch = 0;
|
||||
} else {
|
||||
pause_noswitch = 0;
|
||||
if (eax == 5) {
|
||||
vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int error;
|
||||
int bytes, port, in, out;
|
||||
uint32_t eax;
|
||||
int vcpu;
|
||||
|
||||
vcpu = *pvcpu;
|
||||
|
||||
port = vme->u.inout.port;
|
||||
bytes = vme->u.inout.bytes;
|
||||
eax = vme->u.inout.eax;
|
||||
in = vme->u.inout.in;
|
||||
out = !in;
|
||||
|
||||
/* We don't deal with these */
|
||||
if (vme->u.inout.string || vme->u.inout.rep)
|
||||
return (VMEXIT_ABORT);
|
||||
|
||||
/* Special case of guest reset */
|
||||
if (out && port == 0x64 && (uint8_t)eax == 0xFE)
|
||||
return (vmexit_catch_reset());
|
||||
|
||||
/* Extra-special case of host notifications */
|
||||
if (out && port == GUEST_NIO_PORT)
|
||||
return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
|
||||
|
||||
error = emulate_inout(ctx, vcpu, in, port, bytes, &eax);
|
||||
if (error == 0 && in)
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
|
||||
|
||||
if (error == 0)
|
||||
return (VMEXIT_CONTINUE);
|
||||
else {
|
||||
fprintf(stderr, "Unhandled %s%c 0x%04x\n",
|
||||
in ? "in" : "out",
|
||||
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
|
||||
return (vmexit_catch_inout());
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu);
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int newcpu;
|
||||
int retval = VMEXIT_CONTINUE;
|
||||
|
||||
newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
|
||||
|
||||
if (guest_vcpu_mux && *pvcpu != newcpu) {
|
||||
retval = VMEXIT_SWITCH;
|
||||
*pvcpu = newcpu;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
printf("vm exit[%d]\n", *pvcpu);
|
||||
printf("\treason\t\tVMX\n");
|
||||
printf("\trip\t\t0x%016lx\n", vmexit->rip);
|
||||
printf("\tinst_length\t%d\n", vmexit->inst_length);
|
||||
printf("\terror\t\t%d\n", vmexit->u.vmx.error);
|
||||
printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
|
||||
printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification);
|
||||
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int bogus_noswitch = 1;
|
||||
|
||||
static int
|
||||
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_bogus++;
|
||||
|
||||
if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
|
||||
return (VMEXIT_RESTART);
|
||||
} else {
|
||||
stats.vmexit_bogus_switch++;
|
||||
vmexit->inst_length = 0;
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_hlt++;
|
||||
if (fbsdrun_muxed()) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
/*
|
||||
* Just continue execution with the next instruction. We use
|
||||
* the HLT VM exit as a way to be friendly with the host
|
||||
* scheduler.
|
||||
*/
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int pause_noswitch;
|
||||
|
||||
static int
|
||||
vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_pause++;
|
||||
|
||||
if (fbsdrun_muxed() && !pause_noswitch) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_mtrap++;
|
||||
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
static void
|
||||
sigalrm(int sig)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_timeslice(void)
|
||||
{
|
||||
struct sigaction sa;
|
||||
struct itimerval itv;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Setup a realtime timer to generate a SIGALRM at a
|
||||
* frequency of 'guest_tslice' ticks per second.
|
||||
*/
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = 0;
|
||||
sa.sa_handler = sigalrm;
|
||||
|
||||
error = sigaction(SIGALRM, &sa, NULL);
|
||||
assert(error == 0);
|
||||
|
||||
itv.it_interval.tv_sec = 0;
|
||||
itv.it_interval.tv_usec = 1000000 / guest_tslice;
|
||||
itv.it_value.tv_sec = 0;
|
||||
itv.it_value.tv_usec = 1000000 / guest_tslice;
|
||||
|
||||
error = setitimer(ITIMER_REAL, &itv, NULL);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_INOUT] = vmexit_inout,
|
||||
[VM_EXITCODE_VMX] = vmexit_vmx,
|
||||
[VM_EXITCODE_BOGUS] = vmexit_bogus,
|
||||
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
||||
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
};
|
||||
|
||||
static void
|
||||
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error, rc, prevcpu;
|
||||
|
||||
if (guest_vcpu_mux)
|
||||
setup_timeslice();
|
||||
|
||||
if (pincpu >= 0) {
|
||||
error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
prevcpu = vcpu;
|
||||
rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
|
||||
&vcpu);
|
||||
switch (rc) {
|
||||
case VMEXIT_SWITCH:
|
||||
assert(guest_vcpu_mux);
|
||||
if (vcpu == -1) {
|
||||
stats.cpu_switch_rotate++;
|
||||
vcpu = fbsdrun_get_next_cpu(prevcpu);
|
||||
} else {
|
||||
stats.cpu_switch_direct++;
|
||||
}
|
||||
/* fall through */
|
||||
case VMEXIT_CONTINUE:
|
||||
rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
|
||||
break;
|
||||
case VMEXIT_RESTART:
|
||||
rip = vmexit[vcpu].rip;
|
||||
break;
|
||||
case VMEXIT_RESET:
|
||||
exit(0);
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int c, error, gdb_port, inject_bkpt, tmp, err;
|
||||
struct vmctx *ctx;
|
||||
uint64_t rip;
|
||||
|
||||
inject_bkpt = 0;
|
||||
progname = basename(argv[0]);
|
||||
gdb_port = DEFAULT_GDB_PORT;
|
||||
guest_ncpus = 1;
|
||||
|
||||
while ((c = getopt(argc, argv, "hBHPxp:g:c:z:s:n:m:M:")) != -1) {
|
||||
switch (c) {
|
||||
case 'B':
|
||||
inject_bkpt = 1;
|
||||
break;
|
||||
case 'x':
|
||||
guest_vcpu_mux = 1;
|
||||
break;
|
||||
case 'p':
|
||||
pincpu = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
guest_ncpus = atoi(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
gdb_port = atoi(optarg);
|
||||
break;
|
||||
case 'z':
|
||||
guest_hz = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
guest_tslice = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
pci_parse_slot(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
pci_parse_name(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
lomem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'M':
|
||||
himem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'H':
|
||||
guest_vmexit_on_hlt = 1;
|
||||
break;
|
||||
case 'P':
|
||||
guest_vmexit_on_pause = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage(0);
|
||||
default:
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc != 1)
|
||||
usage(1);
|
||||
|
||||
/* No need to mux if guest is uni-processor */
|
||||
if (guest_ncpus <= 1)
|
||||
guest_vcpu_mux = 0;
|
||||
|
||||
/* vmexit on hlt if guest is muxed */
|
||||
if (guest_vcpu_mux) {
|
||||
guest_vmexit_on_hlt = 1;
|
||||
guest_vmexit_on_pause = 1;
|
||||
}
|
||||
|
||||
vmname = argv[0];
|
||||
|
||||
ctx = vm_open(vmname);
|
||||
if (ctx == NULL) {
|
||||
perror("vm_open");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
printf("VM exit on HLT not supported\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
|
||||
handler[VM_EXITCODE_HLT] = vmexit_hlt;
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
/*
|
||||
* pause exit support required for this mode
|
||||
*/
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
printf("SMP mux requested, no pause support\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
|
||||
handler[VM_EXITCODE_PAUSE] = vmexit_pause;
|
||||
}
|
||||
|
||||
if (lomem_sz != 0) {
|
||||
lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
|
||||
if (lomem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
} else if (himem_sz != 0) {
|
||||
himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
|
||||
if (himem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
himem_sz = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
init_inout();
|
||||
init_pci(ctx);
|
||||
|
||||
if (gdb_port != 0)
|
||||
init_dbgport(gdb_port);
|
||||
|
||||
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
|
||||
assert(error == 0);
|
||||
|
||||
if (inject_bkpt) {
|
||||
error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* build the guest tables, MP etc.
|
||||
*/
|
||||
vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size);
|
||||
|
||||
/*
|
||||
* Add CPU 0
|
||||
*/
|
||||
fbsdrun_addcpu(ctx, BSP, rip);
|
||||
|
||||
/*
|
||||
* Head off to the main event dispatch loop
|
||||
*/
|
||||
mevent_dispatch();
|
||||
|
||||
exit(1);
|
||||
}
|
53
usr.sbin/bhyve/fbsdrun.h
Normal file
53
usr.sbin/bhyve/fbsdrun.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _FBSDRUN_H_
|
||||
#define _FBSDRUN_H_
|
||||
|
||||
#ifndef CTASSERT /* Allow lint to override */
|
||||
#define CTASSERT(x) _CTASSERT(x, __LINE__)
|
||||
#define _CTASSERT(x, y) __CTASSERT(x, y)
|
||||
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
struct vmctx;
|
||||
extern int guest_hz;
|
||||
extern int guest_tslice;
|
||||
extern int guest_ncpus;
|
||||
extern char *vmname;
|
||||
|
||||
extern u_long lomem_sz, himem_sz;
|
||||
|
||||
void *paddr_guest2host(uintptr_t);
|
||||
|
||||
void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip);
|
||||
void fbsdrun_add_oemtbl(void *tbl, int tblsz);
|
||||
int fbsdrun_muxed(void);
|
||||
int fbsdrun_vmexit_on_hlt(void);
|
||||
int fbsdrun_vmexit_on_pause(void);
|
||||
#endif
|
98
usr.sbin/bhyve/inout.c
Normal file
98
usr.sbin/bhyve/inout.c
Normal file
@ -0,0 +1,98 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
SET_DECLARE(inout_port_set, struct inout_port);
|
||||
|
||||
#define MAX_IOPORTS (1 << 16)
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
} inout_handlers[MAX_IOPORTS];
|
||||
|
||||
int
|
||||
emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax)
|
||||
{
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
|
||||
assert(port < MAX_IOPORTS);
|
||||
|
||||
if ((handler = inout_handlers[port].handler) == NULL)
|
||||
return (-1);
|
||||
|
||||
flags = inout_handlers[port].flags;
|
||||
arg = inout_handlers[port].arg;
|
||||
|
||||
if ((in && (flags & IOPORT_F_IN)) || (!in && (flags & IOPORT_F_OUT)))
|
||||
return ((*handler)(ctx, vcpu, in, port, bytes, eax, arg));
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
init_inout(void)
|
||||
{
|
||||
struct inout_port **iopp, *iop;
|
||||
|
||||
SET_FOREACH(iopp, inout_port_set) {
|
||||
iop = *iopp;
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
register_inout(struct inout_port *iop)
|
||||
{
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = iop->arg;
|
||||
|
||||
return (0);
|
||||
}
|
64
usr.sbin/bhyve/inout.h
Normal file
64
usr.sbin/bhyve/inout.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _INOUT_H_
|
||||
#define _INOUT_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
|
||||
int bytes, uint32_t *eax, void *arg);
|
||||
|
||||
struct inout_port {
|
||||
const char *name;
|
||||
int port;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
};
|
||||
#define IOPORT_F_IN 0x1
|
||||
#define IOPORT_F_OUT 0x2
|
||||
#define IOPORT_F_INOUT 0x3
|
||||
|
||||
#define INOUT_PORT(name, port, flags, handler) \
|
||||
static struct inout_port __CONCAT(__inout_port, __LINE__) = { \
|
||||
#name, \
|
||||
(port), \
|
||||
(flags), \
|
||||
(handler) \
|
||||
}; \
|
||||
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
|
||||
|
||||
void init_inout(void);
|
||||
int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax);
|
||||
int register_inout(struct inout_port *iop);
|
||||
|
||||
#endif /* _INOUT_H_ */
|
419
usr.sbin/bhyve/mevent.c
Normal file
419
usr.sbin/bhyve/mevent.c
Normal file
@ -0,0 +1,419 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Micro event library for FreeBSD, designed for a single i/o thread
|
||||
* using kqueue, and having events be persistent by default.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/event.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define MEVENT_MAX 64
|
||||
|
||||
#define MEV_ENABLE 1
|
||||
#define MEV_DISABLE 2
|
||||
#define MEV_DEL_PENDING 3
|
||||
|
||||
static pthread_t mevent_tid;
|
||||
static int mevent_pipefd[2];
|
||||
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct mevent {
|
||||
void (*me_func)(int, enum ev_type, void *);
|
||||
int me_fd;
|
||||
enum ev_type me_type;
|
||||
void *me_param;
|
||||
int me_cq;
|
||||
int me_state;
|
||||
int me_closefd;
|
||||
LIST_ENTRY(mevent) me_list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(listhead, mevent) global_head, change_head;
|
||||
|
||||
static void
|
||||
mevent_qlock(void)
|
||||
{
|
||||
pthread_mutex_lock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_qunlock(void)
|
||||
{
|
||||
pthread_mutex_unlock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_pipe_read(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
char buf[MEVENT_MAX];
|
||||
int status;
|
||||
|
||||
/*
|
||||
* Drain the pipe read side. The fd is non-blocking so this is
|
||||
* safe to do.
|
||||
*/
|
||||
do {
|
||||
status = read(fd, buf, sizeof(buf));
|
||||
} while (status == MEVENT_MAX);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_notify(void)
|
||||
{
|
||||
char c;
|
||||
|
||||
/*
|
||||
* If calling from outside the i/o thread, write a byte on the
|
||||
* pipe to force the i/o thread to exit the blocking kevent call.
|
||||
*/
|
||||
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
|
||||
write(mevent_pipefd[1], &c, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_filter(struct mevent *mevp)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = 0;
|
||||
|
||||
if (mevp->me_type == EVF_READ)
|
||||
retval = EVFILT_READ;
|
||||
|
||||
if (mevp->me_type == EVF_WRITE)
|
||||
retval = EVFILT_WRITE;
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_flags(struct mevent *mevp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (mevp->me_state) {
|
||||
case MEV_ENABLE:
|
||||
ret = EV_ADD;
|
||||
break;
|
||||
case MEV_DISABLE:
|
||||
ret = EV_DISABLE;
|
||||
break;
|
||||
case MEV_DEL_PENDING:
|
||||
ret = EV_DELETE;
|
||||
break;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_fflags(struct mevent *mevp)
|
||||
{
|
||||
/* XXX nothing yet, perhaps EV_EOF for reads ? */
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_build(int mfd, struct kevent *kev)
|
||||
{
|
||||
struct mevent *mevp, *tmpp;
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
|
||||
if (mevp->me_closefd) {
|
||||
/*
|
||||
* A close of the file descriptor will remove the
|
||||
* event
|
||||
*/
|
||||
close(mevp->me_fd);
|
||||
} else {
|
||||
kev[i].ident = mevp->me_fd;
|
||||
kev[i].filter = mevent_kq_filter(mevp);
|
||||
kev[i].flags = mevent_kq_flags(mevp);
|
||||
kev[i].fflags = mevent_kq_fflags(mevp);
|
||||
kev[i].data = 0;
|
||||
kev[i].udata = mevp;
|
||||
i++;
|
||||
}
|
||||
|
||||
mevp->me_cq = 0;
|
||||
LIST_REMOVE(mevp, me_list);
|
||||
|
||||
if (mevp->me_state == MEV_DEL_PENDING) {
|
||||
free(mevp);
|
||||
} else {
|
||||
LIST_INSERT_HEAD(&global_head, mevp, me_list);
|
||||
}
|
||||
|
||||
assert(i < MEVENT_MAX);
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (i);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_handle(struct kevent *kev, int numev)
|
||||
{
|
||||
struct mevent *mevp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numev; i++) {
|
||||
mevp = kev[i].udata;
|
||||
|
||||
/* XXX check for EV_ERROR ? */
|
||||
|
||||
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
|
||||
}
|
||||
}
|
||||
|
||||
struct mevent *
|
||||
mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *), void *param)
|
||||
{
|
||||
struct mevent *lp, *mevp;
|
||||
|
||||
if (fd < 0 || func == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
mevp = NULL;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Verify that the fd/type tuple is not present in any list
|
||||
*/
|
||||
LIST_FOREACH(lp, &global_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
LIST_FOREACH(lp, &change_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an entry, populate it, and add it to the change list.
|
||||
*/
|
||||
mevp = malloc(sizeof(struct mevent));
|
||||
if (mevp == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
memset(mevp, 0, sizeof(struct mevent));
|
||||
mevp->me_fd = fd;
|
||||
mevp->me_type = type;
|
||||
mevp->me_func = func;
|
||||
mevp->me_param = param;
|
||||
|
||||
LIST_INSERT_HEAD(&change_head, mevp, me_list);
|
||||
mevp->me_cq = 1;
|
||||
mevp->me_state = MEV_ENABLE;
|
||||
mevent_notify();
|
||||
|
||||
exit:
|
||||
mevent_qunlock();
|
||||
|
||||
return (mevp);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_update(struct mevent *evp, int newstate)
|
||||
{
|
||||
/*
|
||||
* It's not possible to enable/disable a deleted event
|
||||
*/
|
||||
if (evp->me_state == MEV_DEL_PENDING)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* No update needed if state isn't changing
|
||||
*/
|
||||
if (evp->me_state == newstate)
|
||||
return (0);
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
evp->me_state = newstate;
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_enable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_ENABLE));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_disable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_DISABLE));
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_delete_event(struct mevent *evp, int closefd)
|
||||
{
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there, and
|
||||
* mark as to be deleted.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
evp->me_state = MEV_DEL_PENDING;
|
||||
|
||||
if (closefd)
|
||||
evp->me_closefd = 1;
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 0));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete_close(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 1));
|
||||
}
|
||||
|
||||
void
|
||||
mevent_dispatch(void)
|
||||
{
|
||||
struct kevent changelist[MEVENT_MAX];
|
||||
struct kevent eventlist[MEVENT_MAX];
|
||||
struct mevent *pipev;
|
||||
int mfd;
|
||||
int numev;
|
||||
int ret;
|
||||
|
||||
mevent_tid = pthread_self();
|
||||
|
||||
mfd = kqueue();
|
||||
assert(mfd > 0);
|
||||
|
||||
/*
|
||||
* Open the pipe that will be used for other threads to force
|
||||
* the blocking kqueue call to exit by writing to it. Set the
|
||||
* descriptor to non-blocking.
|
||||
*/
|
||||
ret = pipe(mevent_pipefd);
|
||||
if (ret < 0) {
|
||||
perror("pipe");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add internal event handler for the pipe write fd
|
||||
*/
|
||||
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
|
||||
assert(pipev != NULL);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Build changelist if required.
|
||||
* XXX the changelist can be put into the blocking call
|
||||
* to eliminate the extra syscall. Currently better for
|
||||
* debug.
|
||||
*/
|
||||
numev = mevent_build(mfd, changelist);
|
||||
if (numev) {
|
||||
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent change");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Block awaiting events
|
||||
*/
|
||||
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent monitor");
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle reported events
|
||||
*/
|
||||
mevent_handle(eventlist, ret);
|
||||
}
|
||||
}
|
49
usr.sbin/bhyve/mevent.h
Normal file
49
usr.sbin/bhyve/mevent.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEVENT_H_
|
||||
#define _MEVENT_H_
|
||||
|
||||
enum ev_type {
|
||||
EVF_READ,
|
||||
EVF_WRITE
|
||||
};
|
||||
|
||||
struct mevent;
|
||||
|
||||
struct mevent *mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *),
|
||||
void *param);
|
||||
int mevent_enable(struct mevent *evp);
|
||||
int mevent_disable(struct mevent *evp);
|
||||
int mevent_delete(struct mevent *evp);
|
||||
int mevent_delete_close(struct mevent *evp);
|
||||
|
||||
void mevent_dispatch(void);
|
||||
|
||||
#endif /* _MEVENT_H_ */
|
180
usr.sbin/bhyve/mevent_test.c
Normal file
180
usr.sbin/bhyve/mevent_test.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test program for the micro event library. Set up a simple TCP echo
|
||||
* service.
|
||||
*
|
||||
* cc mevent_test.c mevent.c -lpthread
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define TEST_PORT 4321
|
||||
|
||||
static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
#define MEVENT_ECHO
|
||||
|
||||
#ifdef MEVENT_ECHO
|
||||
struct esync {
|
||||
pthread_mutex_t e_mt;
|
||||
pthread_cond_t e_cond;
|
||||
};
|
||||
|
||||
static void
|
||||
echoer_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct esync *sync = param;
|
||||
|
||||
pthread_mutex_lock(&sync->e_mt);
|
||||
pthread_cond_signal(&sync->e_cond);
|
||||
pthread_mutex_unlock(&sync->e_mt);
|
||||
}
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
struct esync sync;
|
||||
struct mevent *mev;
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
pthread_mutex_init(&sync.e_mt, NULL);
|
||||
pthread_cond_init(&sync.e_cond, NULL);
|
||||
|
||||
pthread_mutex_lock(&sync.e_mt);
|
||||
|
||||
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
|
||||
if (mev == NULL) {
|
||||
printf("Could not allocate echoer event\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
|
||||
len = read(fd, buf, sizeof(buf));
|
||||
if (len > 0) {
|
||||
write(fd, buf, len);
|
||||
write(0, buf, len);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mevent_delete_close(mev);
|
||||
|
||||
pthread_mutex_unlock(&sync.e_mt);
|
||||
pthread_mutex_destroy(&sync.e_mt);
|
||||
pthread_cond_destroy(&sync.e_cond);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
while ((len = read(fd, buf, sizeof(buf))) > 0) {
|
||||
write(1, buf, len);
|
||||
}
|
||||
}
|
||||
#endif /* MEVENT_ECHO */
|
||||
|
||||
static void
|
||||
acceptor_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
pthread_cond_signal(&accept_condvar);
|
||||
pthread_mutex_unlock(&accept_mutex);
|
||||
}
|
||||
|
||||
static void *
|
||||
acceptor(void *param)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
pthread_t tid;
|
||||
int news;
|
||||
int s;
|
||||
|
||||
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(TEST_PORT);
|
||||
|
||||
if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(s, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
|
||||
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
|
||||
while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
|
||||
news = accept(s, NULL, NULL);
|
||||
if (news < 0) {
|
||||
perror("accept error");
|
||||
} else {
|
||||
printf("incoming connection, spawning thread\n");
|
||||
pthread_create(&tid, NULL, echoer,
|
||||
(void *)(uintptr_t)news);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
pthread_t tid;
|
||||
|
||||
pthread_create(&tid, NULL, acceptor, NULL);
|
||||
|
||||
mevent_dispatch();
|
||||
}
|
976
usr.sbin/bhyve/pci_emul.c
Normal file
976
usr.sbin/bhyve/pci_emul.c
Normal file
@ -0,0 +1,976 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "pci_emul.h"
|
||||
|
||||
#define CONF1_ADDR_PORT 0x0cf8
|
||||
#define CONF1_DATA_PORT 0x0cfc
|
||||
|
||||
#define CFGWRITE(pi,off,val,b) \
|
||||
do { \
|
||||
if ((b) == 1) { \
|
||||
pci_set_cfgdata8((pi),(off),(val)); \
|
||||
} else if ((b) == 2) { \
|
||||
pci_set_cfgdata16((pi),(off),(val)); \
|
||||
} else { \
|
||||
pci_set_cfgdata32((pi),(off),(val)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define MAXSLOTS 32
|
||||
|
||||
static struct slotinfo {
|
||||
char *si_name;
|
||||
char *si_param;
|
||||
struct pci_devinst *si_devi;
|
||||
int si_titled;
|
||||
int si_pslot;
|
||||
char si_prefix;
|
||||
char si_suffix;
|
||||
} pci_slotinfo[MAXSLOTS];
|
||||
|
||||
/*
|
||||
* NetApp specific:
|
||||
* struct used to build an in-core OEM table to supply device names
|
||||
* to driver instances
|
||||
*/
|
||||
static struct mptable_pci_devnames {
|
||||
#define MPT_HDR_BASE 0
|
||||
#define MPT_HDR_NAME 2
|
||||
uint16_t md_hdrtype;
|
||||
uint16_t md_entries;
|
||||
uint16_t md_cksum;
|
||||
uint16_t md_pad;
|
||||
#define MPT_NTAP_SIG \
|
||||
((uint32_t)(('P' << 24) | ('A' << 16) | ('T' << 8) | 'N'))
|
||||
uint32_t md_sig;
|
||||
uint32_t md_rsvd;
|
||||
struct mptable_pci_slotinfo {
|
||||
uint16_t mds_type;
|
||||
uint16_t mds_phys_slot;
|
||||
uint8_t mds_bus;
|
||||
uint8_t mds_slot;
|
||||
uint8_t mds_func;
|
||||
uint8_t mds_pad;
|
||||
uint16_t mds_vid;
|
||||
uint16_t mds_did;
|
||||
uint8_t mds_suffix[4];
|
||||
uint8_t mds_prefix[4];
|
||||
uint32_t mds_rsvd[3];
|
||||
} md_slotinfo[MAXSLOTS];
|
||||
} pci_devnames;
|
||||
|
||||
SET_DECLARE(pci_devemu_set, struct pci_devemu);
|
||||
|
||||
static uint64_t pci_emul_iobase;
|
||||
static uint64_t pci_emul_membase32;
|
||||
static uint64_t pci_emul_membase64;
|
||||
|
||||
#define PCI_EMUL_IOBASE 0x2000
|
||||
#define PCI_EMUL_IOLIMIT 0x10000
|
||||
|
||||
#define PCI_EMUL_MEMBASE32 (lomem_sz)
|
||||
#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */
|
||||
|
||||
#define PCI_EMUL_MEMBASE64 0xD000000000UL
|
||||
#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL
|
||||
|
||||
static int pci_emul_devices;
|
||||
static int devname_elems;
|
||||
|
||||
/*
|
||||
* I/O access
|
||||
*/
|
||||
|
||||
/*
|
||||
* Slot options are in the form:
|
||||
*
|
||||
* <slot>,<emul>[,<config>]
|
||||
*
|
||||
* slot is 0..31
|
||||
* emul is a string describing the type of PCI device e.g. virtio-net
|
||||
* config is an optional string, depending on the device, that can be
|
||||
* used for configuration.
|
||||
* Examples are:
|
||||
* 1,virtio-net,tap0
|
||||
* 3,dummy
|
||||
*/
|
||||
static void
|
||||
pci_parse_slot_usage(char *aopt)
|
||||
{
|
||||
printf("Invalid PCI slot info field \"%s\"\n", aopt);
|
||||
free(aopt);
|
||||
}
|
||||
|
||||
void
|
||||
pci_parse_slot(char *opt)
|
||||
{
|
||||
char *slot, *emul, *config;
|
||||
char *str, *cpy;
|
||||
int snum;
|
||||
|
||||
str = cpy = strdup(opt);
|
||||
config = NULL;
|
||||
|
||||
slot = strsep(&str, ",");
|
||||
emul = strsep(&str, ",");
|
||||
if (str != NULL) {
|
||||
config = strsep(&str, ",");
|
||||
}
|
||||
|
||||
if (emul == NULL) {
|
||||
pci_parse_slot_usage(cpy);
|
||||
return;
|
||||
}
|
||||
|
||||
snum = 255;
|
||||
snum = atoi(slot);
|
||||
if (snum < 0 || snum >= MAXSLOTS) {
|
||||
pci_parse_slot_usage(cpy);
|
||||
} else {
|
||||
pci_slotinfo[snum].si_name = emul;
|
||||
pci_slotinfo[snum].si_param = config;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* PCI MPTable names are of the form:
|
||||
*
|
||||
* <slot>,[prefix]<digit><suffix>
|
||||
*
|
||||
* .. with <prefix> an alphabetic char, <digit> a 1 or 2-digit string,
|
||||
* and <suffix> a single char.
|
||||
*
|
||||
* Examples:
|
||||
* 1,e0c
|
||||
* 4,e0P
|
||||
* 6,43a
|
||||
* 7,0f
|
||||
* 10,1
|
||||
* 12,e0M
|
||||
* 2,12a
|
||||
*
|
||||
* Note that this is NetApp-specific, but is ignored on other o/s's.
|
||||
*/
|
||||
static void
|
||||
pci_parse_name_usage(char *aopt)
|
||||
{
|
||||
printf("Invalid PCI slot name field \"%s\"\n", aopt);
|
||||
}
|
||||
|
||||
void
|
||||
pci_parse_name(char *opt)
|
||||
{
|
||||
char csnum[4];
|
||||
char *namestr;
|
||||
char *slotend;
|
||||
char prefix, suffix;
|
||||
int i;
|
||||
int pslot;
|
||||
int snum;
|
||||
|
||||
pslot = -1;
|
||||
prefix = suffix = 0;
|
||||
slotend = strchr(opt, ',');
|
||||
|
||||
/*
|
||||
* A comma must be present, and can't be the first character
|
||||
* or no slot would be present. Also, the slot number can't be
|
||||
* more than 2 characters.
|
||||
*/
|
||||
if (slotend == NULL || slotend == opt || (slotend - opt > 2)) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < (slotend - opt); i++) {
|
||||
csnum[i] = opt[i];
|
||||
}
|
||||
csnum[i] = '\0';
|
||||
|
||||
snum = 255;
|
||||
snum = atoi(csnum);
|
||||
if (snum < 0 || snum >= MAXSLOTS) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
namestr = slotend + 1;
|
||||
|
||||
if (strlen(namestr) > 3) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isalpha(*namestr)) {
|
||||
prefix = *namestr++;
|
||||
}
|
||||
|
||||
if (!isdigit(*namestr)) {
|
||||
pci_parse_name_usage(opt);
|
||||
} else {
|
||||
pslot = *namestr++ - '0';
|
||||
if (isnumber(*namestr)) {
|
||||
pslot = 10*pslot + *namestr++ - '0';
|
||||
|
||||
}
|
||||
if (isalpha(*namestr) && *(namestr + 1) == 0) {
|
||||
suffix = *namestr;
|
||||
pci_slotinfo[snum].si_titled = 1;
|
||||
pci_slotinfo[snum].si_pslot = pslot;
|
||||
pci_slotinfo[snum].si_prefix = prefix;
|
||||
pci_slotinfo[snum].si_suffix = suffix;
|
||||
|
||||
} else {
|
||||
pci_parse_name_usage(opt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_add_mptable_name(struct slotinfo *si)
|
||||
{
|
||||
struct mptable_pci_slotinfo *ms;
|
||||
|
||||
/*
|
||||
* If naming information has been supplied for this slot, populate
|
||||
* the next available mptable OEM entry
|
||||
*/
|
||||
if (si->si_titled) {
|
||||
ms = &pci_devnames.md_slotinfo[devname_elems];
|
||||
|
||||
ms->mds_type = MPT_HDR_NAME;
|
||||
ms->mds_phys_slot = si->si_pslot;
|
||||
ms->mds_bus = si->si_devi->pi_bus;
|
||||
ms->mds_slot = si->si_devi->pi_slot;
|
||||
ms->mds_func = si->si_devi->pi_func;
|
||||
ms->mds_vid = pci_get_cfgdata16(si->si_devi, PCIR_VENDOR);
|
||||
ms->mds_did = pci_get_cfgdata16(si->si_devi, PCIR_DEVICE);
|
||||
ms->mds_suffix[0] = si->si_suffix;
|
||||
ms->mds_prefix[0] = si->si_prefix;
|
||||
|
||||
devname_elems++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_finish_mptable_names(void)
|
||||
{
|
||||
int size;
|
||||
|
||||
if (devname_elems) {
|
||||
pci_devnames.md_hdrtype = MPT_HDR_BASE;
|
||||
pci_devnames.md_entries = devname_elems;
|
||||
pci_devnames.md_cksum = 0; /* XXX */
|
||||
pci_devnames.md_sig = MPT_NTAP_SIG;
|
||||
|
||||
size = (uintptr_t)&pci_devnames.md_slotinfo[devname_elems] -
|
||||
(uintptr_t)&pci_devnames;
|
||||
|
||||
fbsdrun_add_oemtbl(&pci_devnames, size);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
struct pci_devinst *pdi = arg;
|
||||
struct pci_devemu *pe = pdi->pi_d;
|
||||
int offset, i;
|
||||
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
if (pdi->pi_bar[i].type == PCIBAR_IO &&
|
||||
port >= pdi->pi_bar[i].addr &&
|
||||
port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
|
||||
offset = port - pdi->pi_bar[i].addr;
|
||||
if (in)
|
||||
*eax = (*pe->pe_ior)(pdi, i, offset, bytes);
|
||||
else
|
||||
(*pe->pe_iow)(pdi, i, offset, bytes, *eax);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
|
||||
uint64_t *addr)
|
||||
{
|
||||
uint64_t base;
|
||||
|
||||
assert((size & (size - 1)) == 0); /* must be a power of 2 */
|
||||
|
||||
base = roundup2(*baseptr, size);
|
||||
|
||||
if (base + size <= limit) {
|
||||
*addr = base;
|
||||
*baseptr = base + size;
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
|
||||
enum pcibar_type type, uint64_t size)
|
||||
{
|
||||
int i, error;
|
||||
uint64_t *baseptr, limit, addr, mask, lobits, bar;
|
||||
struct inout_port iop;
|
||||
|
||||
assert(idx >= 0 && idx <= PCI_BARMAX);
|
||||
|
||||
if ((size & (size - 1)) != 0)
|
||||
size = 1UL << flsl(size); /* round up to a power of 2 */
|
||||
|
||||
switch (type) {
|
||||
case PCIBAR_NONE:
|
||||
baseptr = NULL;
|
||||
addr = mask = lobits = 0;
|
||||
break;
|
||||
case PCIBAR_IO:
|
||||
baseptr = &pci_emul_iobase;
|
||||
limit = PCI_EMUL_IOLIMIT;
|
||||
mask = PCIM_BAR_IO_BASE;
|
||||
lobits = PCIM_BAR_IO_SPACE;
|
||||
break;
|
||||
case PCIBAR_MEM64:
|
||||
/*
|
||||
* XXX
|
||||
* Some drivers do not work well if the 64-bit BAR is allocated
|
||||
* above 4GB. Allow for this by allocating small requests under
|
||||
* 4GB unless then allocation size is larger than some arbitrary
|
||||
* number (32MB currently).
|
||||
*/
|
||||
if (size > 32 * 1024 * 1024) {
|
||||
/*
|
||||
* XXX special case for device requiring peer-peer DMA
|
||||
*/
|
||||
if (size == 0x100000000UL)
|
||||
baseptr = &hostbase;
|
||||
else
|
||||
baseptr = &pci_emul_membase64;
|
||||
limit = PCI_EMUL_MEMLIMIT64;
|
||||
mask = PCIM_BAR_MEM_BASE;
|
||||
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
|
||||
PCIM_BAR_MEM_PREFETCH;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case PCIBAR_MEM32:
|
||||
baseptr = &pci_emul_membase32;
|
||||
limit = PCI_EMUL_MEMLIMIT32;
|
||||
mask = PCIM_BAR_MEM_BASE;
|
||||
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
|
||||
break;
|
||||
default:
|
||||
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (baseptr != NULL) {
|
||||
error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
|
||||
pdi->pi_bar[idx].type = type;
|
||||
pdi->pi_bar[idx].addr = addr;
|
||||
pdi->pi_bar[idx].size = size;
|
||||
|
||||
/* Initialize the BAR register in config space */
|
||||
bar = (addr & mask) | lobits;
|
||||
pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
|
||||
|
||||
if (type == PCIBAR_MEM64) {
|
||||
assert(idx + 1 <= PCI_BARMAX);
|
||||
pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
|
||||
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
|
||||
}
|
||||
|
||||
/* add a handler to intercept accesses to the I/O bar */
|
||||
if (type == PCIBAR_IO) {
|
||||
iop.name = pdi->pi_name;
|
||||
iop.flags = IOPORT_F_INOUT;
|
||||
iop.handler = pci_emul_handler;
|
||||
iop.arg = pdi;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
iop.port = addr + i;
|
||||
register_inout(&iop);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define CAP_START_OFFSET 0x40
|
||||
static int
|
||||
pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
|
||||
{
|
||||
int i, capoff, capid, reallen;
|
||||
uint16_t sts;
|
||||
|
||||
static u_char endofcap[4] = {
|
||||
PCIY_RESERVED, 0, 0, 0
|
||||
};
|
||||
|
||||
assert(caplen > 0 && capdata[0] != PCIY_RESERVED);
|
||||
|
||||
reallen = roundup2(caplen, 4); /* dword aligned */
|
||||
|
||||
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
|
||||
capoff = CAP_START_OFFSET;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
|
||||
pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
|
||||
} else {
|
||||
capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
|
||||
while (1) {
|
||||
assert((capoff & 0x3) == 0);
|
||||
capid = pci_get_cfgdata8(pi, capoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
capoff = pci_get_cfgdata8(pi, capoff + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we have enough space */
|
||||
if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1)
|
||||
return (-1);
|
||||
|
||||
/* Copy the capability */
|
||||
for (i = 0; i < caplen; i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
/* Set the next capability pointer */
|
||||
pci_set_cfgdata8(pi, capoff + 1, capoff + reallen);
|
||||
|
||||
/* Copy of the reserved capability which serves as the end marker */
|
||||
for (i = 0; i < sizeof(endofcap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct pci_devemu *
|
||||
pci_emul_finddev(char *name)
|
||||
{
|
||||
struct pci_devemu **pdpp, *pdp;
|
||||
|
||||
SET_FOREACH(pdpp, pci_devemu_set) {
|
||||
pdp = *pdpp;
|
||||
if (!strcmp(pdp->pe_emu, name)) {
|
||||
return (pdp);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params)
|
||||
{
|
||||
struct pci_devinst *pdi;
|
||||
pdi = malloc(sizeof(struct pci_devinst));
|
||||
bzero(pdi, sizeof(*pdi));
|
||||
|
||||
pdi->pi_vmctx = ctx;
|
||||
pdi->pi_bus = 0;
|
||||
pdi->pi_slot = slot;
|
||||
pdi->pi_func = 0;
|
||||
pdi->pi_d = pde;
|
||||
snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot);
|
||||
|
||||
/* Disable legacy interrupts */
|
||||
pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
|
||||
pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
|
||||
|
||||
pci_set_cfgdata8(pdi, PCIR_COMMAND,
|
||||
PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
|
||||
|
||||
if ((*pde->pe_init)(ctx, pdi, params) != 0) {
|
||||
free(pdi);
|
||||
} else {
|
||||
pci_emul_devices++;
|
||||
pci_slotinfo[slot].si_devi = pdi;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
|
||||
{
|
||||
int mmc;
|
||||
|
||||
CTASSERT(sizeof(struct msicap) == 14);
|
||||
|
||||
/* Number of msi messages must be a power of 2 between 1 and 32 */
|
||||
assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
|
||||
mmc = ffs(msgnum) - 1;
|
||||
|
||||
bzero(msicap, sizeof(struct msicap));
|
||||
msicap->capid = PCIY_MSI;
|
||||
msicap->nextptr = nextptr;
|
||||
msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
|
||||
}
|
||||
|
||||
int
|
||||
pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
|
||||
{
|
||||
struct msicap msicap;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, 0);
|
||||
|
||||
return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
|
||||
}
|
||||
|
||||
void
|
||||
msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val)
|
||||
{
|
||||
uint16_t msgctrl, rwmask, msgdata, mme;
|
||||
uint32_t addrlo;
|
||||
|
||||
/*
|
||||
* If guest is writing to the message control register make sure
|
||||
* we do not overwrite read-only fields.
|
||||
*/
|
||||
if ((offset - capoff) == 2 && bytes == 2) {
|
||||
rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
|
||||
msgctrl = pci_get_cfgdata16(pi, offset);
|
||||
msgctrl &= ~rwmask;
|
||||
msgctrl |= val & rwmask;
|
||||
val = msgctrl;
|
||||
|
||||
addrlo = pci_get_cfgdata32(pi, capoff + 4);
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
msgdata = pci_get_cfgdata16(pi, capoff + 12);
|
||||
else
|
||||
msgdata = pci_get_cfgdata16(pi, capoff + 8);
|
||||
|
||||
/*
|
||||
* XXX check delivery mode, destination mode etc
|
||||
*/
|
||||
mme = msgctrl & PCIM_MSICTRL_MME_MASK;
|
||||
pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
|
||||
if (pi->pi_msi.enabled) {
|
||||
pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
|
||||
pi->pi_msi.vector = msgdata & 0xff;
|
||||
pi->pi_msi.msgnum = 1 << (mme >> 4);
|
||||
} else {
|
||||
pi->pi_msi.cpu = 0;
|
||||
pi->pi_msi.vector = 0;
|
||||
pi->pi_msi.msgnum = 0;
|
||||
}
|
||||
}
|
||||
|
||||
CFGWRITE(pi, offset, val, bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function assumes that 'coff' is in the capabilities region of the
|
||||
* config space.
|
||||
*/
|
||||
static void
|
||||
pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
|
||||
{
|
||||
int capid;
|
||||
uint8_t capoff, nextoff;
|
||||
|
||||
/* Do not allow un-aligned writes */
|
||||
if ((offset & (bytes - 1)) != 0)
|
||||
return;
|
||||
|
||||
/* Find the capability that we want to update */
|
||||
capoff = CAP_START_OFFSET;
|
||||
while (1) {
|
||||
capid = pci_get_cfgdata8(pi, capoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
|
||||
nextoff = pci_get_cfgdata8(pi, capoff + 1);
|
||||
if (offset >= capoff && offset < nextoff)
|
||||
break;
|
||||
|
||||
capoff = nextoff;
|
||||
}
|
||||
assert(offset >= capoff);
|
||||
|
||||
/*
|
||||
* Capability ID and Next Capability Pointer are readonly
|
||||
*/
|
||||
if (offset == capoff || offset == capoff + 1)
|
||||
return;
|
||||
|
||||
switch (capid) {
|
||||
case PCIY_MSI:
|
||||
msicap_cfgwrite(pi, capoff, offset, bytes, val);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_iscap(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
int found;
|
||||
uint16_t sts;
|
||||
uint8_t capid, lastoff;
|
||||
|
||||
found = 0;
|
||||
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
|
||||
lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
|
||||
while (1) {
|
||||
assert((lastoff & 0x3) == 0);
|
||||
capid = pci_get_cfgdata8(pi, lastoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
lastoff = pci_get_cfgdata8(pi, lastoff + 1);
|
||||
}
|
||||
if (offset >= CAP_START_OFFSET && offset <= lastoff)
|
||||
found = 1;
|
||||
}
|
||||
return (found);
|
||||
}
|
||||
|
||||
void
|
||||
init_pci(struct vmctx *ctx)
|
||||
{
|
||||
struct pci_devemu *pde;
|
||||
struct slotinfo *si;
|
||||
int i;
|
||||
|
||||
pci_emul_iobase = PCI_EMUL_IOBASE;
|
||||
pci_emul_membase32 = PCI_EMUL_MEMBASE32;
|
||||
pci_emul_membase64 = PCI_EMUL_MEMBASE64;
|
||||
|
||||
si = pci_slotinfo;
|
||||
|
||||
for (i = 0; i < MAXSLOTS; i++, si++) {
|
||||
if (si->si_name != NULL) {
|
||||
pde = pci_emul_finddev(si->si_name);
|
||||
if (pde != NULL) {
|
||||
pci_emul_init(ctx, pde, i, si->si_param);
|
||||
pci_add_mptable_name(si);
|
||||
}
|
||||
}
|
||||
}
|
||||
pci_finish_mptable_names();
|
||||
}
|
||||
|
||||
int
|
||||
pci_msi_enabled(struct pci_devinst *pi)
|
||||
{
|
||||
return (pi->pi_msi.enabled);
|
||||
}
|
||||
|
||||
int
|
||||
pci_msi_msgnum(struct pci_devinst *pi)
|
||||
{
|
||||
if (pi->pi_msi.enabled)
|
||||
return (pi->pi_msi.msgnum);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
pci_generate_msi(struct pci_devinst *pi, int msg)
|
||||
{
|
||||
|
||||
if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
|
||||
vm_lapic_irq(pi->pi_vmctx,
|
||||
pi->pi_msi.cpu,
|
||||
pi->pi_msi.vector + msg);
|
||||
}
|
||||
}
|
||||
|
||||
static int cfgbus, cfgslot, cfgfunc, cfgoff;
|
||||
|
||||
static int
|
||||
pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
uint32_t x;
|
||||
|
||||
assert(!in);
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
x = *eax;
|
||||
cfgoff = x & PCI_REGMAX;
|
||||
cfgfunc = (x >> 8) & PCI_FUNCMAX;
|
||||
cfgslot = (x >> 11) & PCI_SLOTMAX;
|
||||
cfgbus = (x >> 16) & PCI_BUSMAX;
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr);
|
||||
|
||||
static int
|
||||
pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct pci_devemu *pe;
|
||||
int coff, idx;
|
||||
uint64_t mask, bar;
|
||||
|
||||
assert(bytes == 1 || bytes == 2 || bytes == 4);
|
||||
|
||||
pi = pci_slotinfo[cfgslot].si_devi;
|
||||
coff = cfgoff + (port - CONF1_DATA_PORT);
|
||||
|
||||
#if 0
|
||||
printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r",
|
||||
in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc);
|
||||
#endif
|
||||
|
||||
if (pi == NULL || cfgfunc != 0) {
|
||||
if (in)
|
||||
*eax = 0xffffffff;
|
||||
return (0);
|
||||
}
|
||||
|
||||
pe = pi->pi_d;
|
||||
|
||||
/*
|
||||
* Config read
|
||||
*/
|
||||
if (in) {
|
||||
/* Let the device emulation override the default handler */
|
||||
if (pe->pe_cfgread != NULL &&
|
||||
(*pe->pe_cfgread)(ctx, vcpu, pi, coff, bytes, eax) == 0)
|
||||
return (0);
|
||||
|
||||
if (bytes == 1)
|
||||
*eax = pci_get_cfgdata8(pi, coff);
|
||||
else if (bytes == 2)
|
||||
*eax = pci_get_cfgdata16(pi, coff);
|
||||
else
|
||||
*eax = pci_get_cfgdata32(pi, coff);
|
||||
} else {
|
||||
/* Let the device emulation override the default handler */
|
||||
if (pe->pe_cfgwrite != NULL &&
|
||||
(*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Special handling for write to BAR registers
|
||||
*/
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
|
||||
/*
|
||||
* Ignore writes to BAR registers that are not
|
||||
* 4-byte aligned.
|
||||
*/
|
||||
if (bytes != 4 || (coff & 0x3) != 0)
|
||||
return (0);
|
||||
idx = (coff - PCIR_BAR(0)) / 4;
|
||||
switch (pi->pi_bar[idx].type) {
|
||||
case PCIBAR_NONE:
|
||||
bar = 0;
|
||||
break;
|
||||
case PCIBAR_IO:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_IO_BASE;
|
||||
bar = (*eax & mask) | PCIM_BAR_IO_SPACE;
|
||||
break;
|
||||
case PCIBAR_MEM32:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = *eax & mask;
|
||||
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
|
||||
break;
|
||||
case PCIBAR_MEM64:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = *eax & mask;
|
||||
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
|
||||
PCIM_BAR_MEM_PREFETCH;
|
||||
break;
|
||||
case PCIBAR_MEMHI64:
|
||||
mask = ~(pi->pi_bar[idx - 1].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = ((uint64_t)*eax << 32) & mask;
|
||||
bar = bar >> 32;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
pci_set_cfgdata32(pi, coff, bar);
|
||||
} else if (pci_emul_iscap(pi, coff)) {
|
||||
pci_emul_capwrite(pi, coff, bytes, *eax);
|
||||
} else {
|
||||
CFGWRITE(pi, coff, *eax, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
|
||||
/*
|
||||
* I/O ports to configure PCI IRQ routing. We ignore all writes to it.
|
||||
*/
|
||||
static int
|
||||
pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 0);
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler);
|
||||
INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler);
|
||||
|
||||
#define PCI_EMUL_TEST
|
||||
#ifdef PCI_EMUL_TEST
|
||||
/*
|
||||
* Define a dummy test device
|
||||
*/
|
||||
#define DREGSZ 20
|
||||
struct pci_emul_dsoftc {
|
||||
uint8_t regs[DREGSZ];
|
||||
};
|
||||
|
||||
#define PCI_EMUL_MSGS 4
|
||||
|
||||
int
|
||||
pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int error;
|
||||
struct pci_emul_dsoftc *sc;
|
||||
|
||||
sc = malloc(sizeof(struct pci_emul_dsoftc));
|
||||
memset(sc, 0, sizeof(struct pci_emul_dsoftc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
|
||||
|
||||
error = pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, DREGSZ);
|
||||
assert(error == 0);
|
||||
|
||||
error = pci_emul_add_msicap(pi, PCI_EMUL_MSGS);
|
||||
assert(error == 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
pci_emul_diow(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
int i;
|
||||
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > DREGSZ) {
|
||||
printf("diow: too large, offset %d size %d\n", offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
sc->regs[offset] = value & 0xff;
|
||||
} else if (size == 2) {
|
||||
*(uint16_t *)&sc->regs[offset] = value & 0xffff;
|
||||
} else {
|
||||
*(uint32_t *)&sc->regs[offset] = value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special magic value to generate an interrupt
|
||||
*/
|
||||
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
|
||||
pci_generate_msi(pi, value % pci_msi_msgnum(pi));
|
||||
|
||||
if (value == 0xabcdef) {
|
||||
for (i = 0; i < pci_msi_msgnum(pi); i++)
|
||||
pci_generate_msi(pi, i);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > DREGSZ) {
|
||||
printf("dior: too large, offset %d size %d\n", offset, size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
value = sc->regs[offset];
|
||||
} else if (size == 2) {
|
||||
value = *(uint16_t *) &sc->regs[offset];
|
||||
} else {
|
||||
value = *(uint32_t *) &sc->regs[offset];
|
||||
}
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_dummy = {
|
||||
.pe_emu = "dummy",
|
||||
.pe_init = pci_emul_dinit,
|
||||
.pe_iow = pci_emul_diow,
|
||||
.pe_ior = pci_emul_dior
|
||||
};
|
||||
PCI_EMUL_SET(pci_dummy);
|
||||
|
||||
#endif /* PCI_EMUL_TEST */
|
171
usr.sbin/bhyve/pci_emul.h
Normal file
171
usr.sbin/bhyve/pci_emul.h
Normal file
@ -0,0 +1,171 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PCI_EMUL_H_
|
||||
#define _PCI_EMUL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
|
||||
#define PCIY_RESERVED 0x00
|
||||
|
||||
struct vmctx;
|
||||
struct pci_devinst;
|
||||
|
||||
struct pci_devemu {
|
||||
char *pe_emu; /* Name of device emulation */
|
||||
|
||||
/* instance creation */
|
||||
int (*pe_init)(struct vmctx *, struct pci_devinst *, char *opts);
|
||||
|
||||
/* config space read/write callbacks */
|
||||
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t val);
|
||||
int (*pe_cfgread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t *retval);
|
||||
|
||||
/* I/O space read/write callbacks */
|
||||
void (*pe_iow)(struct pci_devinst *pi, int baridx,
|
||||
int offset, int size, uint32_t value);
|
||||
uint32_t (*pe_ior)(struct pci_devinst *pi, int baridx,
|
||||
int offset, int size);
|
||||
};
|
||||
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
|
||||
|
||||
enum pcibar_type {
|
||||
PCIBAR_NONE,
|
||||
PCIBAR_IO,
|
||||
PCIBAR_MEM32,
|
||||
PCIBAR_MEM64,
|
||||
PCIBAR_MEMHI64
|
||||
};
|
||||
|
||||
struct pcibar {
|
||||
enum pcibar_type type; /* io or memory */
|
||||
uint64_t size;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
#define PI_NAMESZ 40
|
||||
|
||||
struct pci_devinst {
|
||||
struct pci_devemu *pi_d;
|
||||
struct vmctx *pi_vmctx;
|
||||
uint8_t pi_bus, pi_slot, pi_func;
|
||||
char pi_name[PI_NAMESZ];
|
||||
uint16_t pi_iobase;
|
||||
int pi_bar_getsize;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
int cpu;
|
||||
int vector;
|
||||
int msgnum;
|
||||
} pi_msi;
|
||||
|
||||
void *pi_arg; /* devemu-private data */
|
||||
|
||||
u_char pi_cfgdata[PCI_REGMAX + 1];
|
||||
struct pcibar pi_bar[PCI_BARMAX + 1];
|
||||
};
|
||||
|
||||
struct msicap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t addrlo;
|
||||
uint32_t addrhi;
|
||||
uint16_t msgdata;
|
||||
} __packed;
|
||||
|
||||
void init_pci(struct vmctx *ctx);
|
||||
void pci_parse_slot(char *opt);
|
||||
void pci_parse_name(char *opt);
|
||||
void pci_callback(void);
|
||||
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
|
||||
enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
|
||||
void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
|
||||
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
|
||||
int pci_msi_enabled(struct pci_devinst *pi);
|
||||
int pci_msi_msgnum(struct pci_devinst *pi);
|
||||
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
*(uint8_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
*(uint16_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
*(uint32_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline uint8_t
|
||||
pci_get_cfgdata8(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
return (*(uint8_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint16_t
|
||||
pci_get_cfgdata16(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
return (*(uint16_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint32_t
|
||||
pci_get_cfgdata32(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
return (*(uint32_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
#endif /* _PCI_EMUL_H_ */
|
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
@ -0,0 +1,52 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "pci_emul.h"
|
||||
|
||||
static int
|
||||
pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
|
||||
/* config space */
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_hostbridge = {
|
||||
.pe_emu = "hostbridge",
|
||||
.pe_init = pci_hostbridge_init,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_hostbridge);
|
508
usr.sbin/bhyve/pci_passthru.c
Normal file
508
usr.sbin/bhyve/pci_passthru.c
Normal file
@ -0,0 +1,508 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <dev/io/iodev.h>
|
||||
#include <machine/iodev.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
#include "pci_emul.h"
|
||||
|
||||
#ifndef _PATH_DEVPCI
|
||||
#define _PATH_DEVPCI "/dev/pci"
|
||||
#endif
|
||||
|
||||
#ifndef _PATH_DEVIO
|
||||
#define _PATH_DEVIO "/dev/io"
|
||||
#endif
|
||||
|
||||
#define LEGACY_SUPPORT 1
|
||||
|
||||
static int pcifd = -1;
|
||||
static int iofd = -1;
|
||||
|
||||
struct passthru_softc {
|
||||
struct pci_devinst *psc_pi;
|
||||
struct pcibar psc_bar[PCI_BARMAX + 1];
|
||||
struct {
|
||||
int capoff;
|
||||
int msgctrl;
|
||||
int emulated;
|
||||
} psc_msi;
|
||||
struct pcisel psc_sel;
|
||||
};
|
||||
|
||||
static int
|
||||
msi_caplen(int msgctrl)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = 10; /* minimum length of msi capability */
|
||||
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
len += 4;
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Ignore the 'mask' and 'pending' bits in the MSI capability.
|
||||
* We'll let the guest manipulate them directly.
|
||||
*/
|
||||
if (msgctrl & PCIM_MSICTRL_VECTOR)
|
||||
len += 10;
|
||||
#endif
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
read_config(const struct pcisel *sel, long reg, int width)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
|
||||
if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
|
||||
return (0); /* XXX */
|
||||
else
|
||||
return (pi.pi_data);
|
||||
}
|
||||
|
||||
static void
|
||||
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
pi.pi_data = data;
|
||||
|
||||
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
static int
|
||||
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
|
||||
{
|
||||
int capoff, i;
|
||||
struct msicap msicap;
|
||||
u_char *capdata;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, nextptr);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* Copy the msi capability structure in the last 16 bytes of the
|
||||
* config space. This is wrong because it could shadow something
|
||||
* useful to the device.
|
||||
*/
|
||||
capoff = 256 - roundup(sizeof(msicap), 4);
|
||||
capdata = (u_char *)&msicap;
|
||||
for (i = 0; i < sizeof(msicap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
return (capoff);
|
||||
}
|
||||
#endif /* LEGACY_SUPPORT */
|
||||
|
||||
static int
|
||||
cfginitmsi(struct passthru_softc *sc)
|
||||
{
|
||||
int ptr, cap, sts, caplen;
|
||||
uint32_t u32;
|
||||
struct pcisel sel;
|
||||
struct pci_devinst *pi;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
sel = sc->psc_sel;
|
||||
|
||||
/*
|
||||
* Parse the capabilities and cache the location of the MSI
|
||||
* capability.
|
||||
*/
|
||||
sts = read_config(&sel, PCIR_STATUS, 2);
|
||||
if (sts & PCIM_STATUS_CAPPRESENT) {
|
||||
ptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
while (ptr != 0 && ptr != 0xff) {
|
||||
cap = read_config(&sel, ptr + PCICAP_ID, 1);
|
||||
if (cap == PCIY_MSI) {
|
||||
/*
|
||||
* Copy the MSI capability into the config
|
||||
* space of the emulated pci device
|
||||
*/
|
||||
sc->psc_msi.capoff = ptr;
|
||||
sc->psc_msi.msgctrl = read_config(&sel,
|
||||
ptr + 2, 2);
|
||||
sc->psc_msi.emulated = 0;
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, ptr, 4);
|
||||
pci_set_cfgdata32(pi, ptr, u32);
|
||||
caplen -= 4;
|
||||
ptr += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If the passthrough device does not support MSI then craft a
|
||||
* MSI capability for it. We link the new MSI capability at the
|
||||
* head of the list of capabilities.
|
||||
*/
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
|
||||
int origptr, msiptr;
|
||||
origptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
msiptr = passthru_add_msicap(pi, 1, origptr);
|
||||
sc->psc_msi.capoff = msiptr;
|
||||
sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
|
||||
sc->psc_msi.emulated = 1;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (sc->psc_msi.capoff == 0) /* MSI or bust */
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
|
||||
{
|
||||
int i, error;
|
||||
struct pci_devinst *pi;
|
||||
struct pci_bar_io bar;
|
||||
enum pcibar_type bartype;
|
||||
uint64_t base;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
|
||||
/*
|
||||
* Initialize BAR registers
|
||||
*/
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
bzero(&bar, sizeof(bar));
|
||||
bar.pbi_sel = sc->psc_sel;
|
||||
bar.pbi_reg = PCIR_BAR(i);
|
||||
|
||||
if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
|
||||
continue;
|
||||
|
||||
if (PCI_BAR_IO(bar.pbi_base)) {
|
||||
bartype = PCIBAR_IO;
|
||||
base = bar.pbi_base & PCIM_BAR_IO_BASE;
|
||||
} else {
|
||||
switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
|
||||
case PCIM_BAR_MEM_64:
|
||||
bartype = PCIBAR_MEM64;
|
||||
break;
|
||||
default:
|
||||
bartype = PCIBAR_MEM32;
|
||||
break;
|
||||
}
|
||||
base = bar.pbi_base & PCIM_BAR_MEM_BASE;
|
||||
}
|
||||
|
||||
/* Cache information about the "real" BAR */
|
||||
sc->psc_bar[i].type = bartype;
|
||||
sc->psc_bar[i].size = bar.pbi_length;
|
||||
sc->psc_bar[i].addr = base;
|
||||
|
||||
/* Allocate the BAR in the guest I/O or MMIO space */
|
||||
error = pci_emul_alloc_bar(pi, i, base, bartype,
|
||||
bar.pbi_length);
|
||||
if (error)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* Map the physical MMIO space in the guest MMIO space
|
||||
*/
|
||||
if (bartype != PCIBAR_IO) {
|
||||
error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* 64-bit BAR takes up two slots so skip the next one.
|
||||
*/
|
||||
if (bartype == PCIBAR_MEM64) {
|
||||
i++;
|
||||
assert(i <= PCI_BARMAX);
|
||||
sc->psc_bar[i].type = PCIBAR_MEMHI64;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
error = 1;
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&sc->psc_sel, sizeof(struct pcisel));
|
||||
sc->psc_sel.pc_bus = bus;
|
||||
sc->psc_sel.pc_dev = slot;
|
||||
sc->psc_sel.pc_func = func;
|
||||
|
||||
if (cfginitbar(ctx, sc) != 0)
|
||||
goto done;
|
||||
|
||||
if (cfginitmsi(sc) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int bus, slot, func, error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = NULL;
|
||||
error = 1;
|
||||
|
||||
if (pcifd < 0) {
|
||||
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
|
||||
if (pcifd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (iofd < 0) {
|
||||
iofd = open(_PATH_DEVIO, O_RDWR, 0);
|
||||
if (iofd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (opts == NULL || sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
|
||||
goto done;
|
||||
|
||||
if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
|
||||
goto done;
|
||||
|
||||
sc = malloc(sizeof(struct passthru_softc));
|
||||
memset(sc, 0, sizeof(struct passthru_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->psc_pi = pi;
|
||||
|
||||
/* initialize config space */
|
||||
if (cfginit(ctx, pi, bus, slot, func) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
if (error) {
|
||||
free(sc);
|
||||
vm_unassign_pptdev(ctx, bus, slot, func);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
bar_access(int coff)
|
||||
{
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msicap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
int caplen;
|
||||
|
||||
if (sc->psc_msi.capoff == 0)
|
||||
return (0);
|
||||
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
|
||||
if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
|
||||
int bytes, uint32_t *rv)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs and MSI capability is emulated.
|
||||
*/
|
||||
if (bar_access(coff) || msicap_access(sc, coff))
|
||||
return (-1);
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* Emulate PCIR_CAP_PTR if this device does not support MSI capability
|
||||
* natively.
|
||||
*/
|
||||
if (sc->psc_msi.emulated) {
|
||||
if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
|
||||
return (-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Everything else just read from the device's config space */
|
||||
*rv = read_config(&sc->psc_sel, coff, bytes);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
|
||||
int bytes, uint32_t val)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs are emulated
|
||||
*/
|
||||
if (bar_access(coff))
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* MSI capability is emulated
|
||||
*/
|
||||
if (msicap_access(sc, coff)) {
|
||||
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
|
||||
|
||||
error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
|
||||
pi->pi_msi.vector, pi->pi_msi.msgnum);
|
||||
if (error != 0) {
|
||||
printf("vm_setup_msi returned error %d\r\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If this device does not support MSI natively then we cannot let
|
||||
* the guest disable legacy interrupts from the device. It is the
|
||||
* legacy interrupt that is triggering the virtual MSI to the guest.
|
||||
*/
|
||||
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
|
||||
if (coff == PCIR_COMMAND && bytes == 2)
|
||||
val &= ~PCIM_CMD_INTxDIS;
|
||||
}
|
||||
#endif
|
||||
|
||||
write_config(&sc->psc_sel, coff, bytes, val);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
passthru_iow(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_WRITE;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = value;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
passthru_ior(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_READ;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = 0;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
|
||||
return (pio.val);
|
||||
}
|
||||
|
||||
struct pci_devemu passthru = {
|
||||
.pe_emu = "passthru",
|
||||
.pe_init = passthru_init,
|
||||
.pe_cfgwrite = passthru_cfgwrite,
|
||||
.pe_cfgread = passthru_cfgread,
|
||||
.pe_iow = passthru_iow,
|
||||
.pe_ior = passthru_ior,
|
||||
};
|
||||
PCI_EMUL_SET(passthru);
|
502
usr.sbin/bhyve/pci_virtio_block.c
Normal file
502
usr.sbin/bhyve/pci_virtio_block.c
Normal file
@ -0,0 +1,502 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTBLK_RINGSZ 64
|
||||
|
||||
#define VTBLK_CFGSZ 28
|
||||
|
||||
#define VTBLK_R_CFG VTCFG_R_CFG0
|
||||
#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
|
||||
#define VTBLK_R_MAX VTBLK_R_CFG_END
|
||||
|
||||
#define VTBLK_REGSZ VTBLK_R_MAX+1
|
||||
|
||||
#define VTBLK_MAXSEGS 32
|
||||
|
||||
#define VTBLK_S_OK 0
|
||||
#define VTBLK_S_IOERR 1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTBLK_S_HOSTCAPS \
|
||||
( 0x00000004 | /* host maximum request segments */ \
|
||||
0x10000000 ) /* supports indirect descriptors */
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Config space
|
||||
*/
|
||||
struct vtblk_config {
|
||||
uint64_t vbc_capacity;
|
||||
uint32_t vbc_size_max;
|
||||
uint32_t vbc_seg_max;
|
||||
uint16_t vbc_geom_c;
|
||||
uint8_t vbc_geom_h;
|
||||
uint8_t vbc_geom_s;
|
||||
uint32_t vbc_blk_size;
|
||||
uint32_t vbc_sectors_max;
|
||||
} __packed;
|
||||
CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
|
||||
|
||||
/*
|
||||
* Fixed-size block header
|
||||
*/
|
||||
struct virtio_blk_hdr {
|
||||
#define VBH_OP_READ 0
|
||||
#define VBH_OP_WRITE 1
|
||||
uint32_t vbh_type;
|
||||
uint32_t vbh_ioprio;
|
||||
uint64_t vbh_sector;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtblk_debug;
|
||||
#define DPRINTF(params) if (pci_vtblk_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtblk_softc {
|
||||
struct pci_devinst *vbsc_pi;
|
||||
int vbsc_fd;
|
||||
int vbsc_status;
|
||||
int vbsc_isr;
|
||||
int vbsc_lastq;
|
||||
uint32_t vbsc_features;
|
||||
uint64_t vbsc_pfn;
|
||||
struct vring_hqueue vbsc_q;
|
||||
struct vtblk_config vbsc_cfg;
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
|
||||
{
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtblk: device reset requested !\n"));
|
||||
}
|
||||
|
||||
sc->vbsc_status = value;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTBLK_MAXSEGS];
|
||||
struct virtio_blk_hdr *vbh;
|
||||
struct virtio_desc *vd, *vid;
|
||||
struct virtio_used *vu;
|
||||
uint8_t *status;
|
||||
int i;
|
||||
int err;
|
||||
int iolen;
|
||||
int nsegs;
|
||||
int uidx, aidx, didx;
|
||||
int writeop;
|
||||
off_t offset;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Verify that the descriptor is indirect, and obtain
|
||||
* the pointer to the indirect descriptor.
|
||||
* There has to be space for at least 3 descriptors
|
||||
* in the indirect descriptor array: the block header,
|
||||
* 1 or more data descriptors, and a status byte.
|
||||
*/
|
||||
assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
|
||||
|
||||
nsegs = vd->vd_len / sizeof(struct virtio_desc);
|
||||
assert(nsegs >= 3);
|
||||
assert(nsegs < VTBLK_MAXSEGS + 2);
|
||||
|
||||
vid = paddr_guest2host(vd->vd_addr);
|
||||
assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* The first descriptor will be the read-only fixed header
|
||||
*/
|
||||
vbh = paddr_guest2host(vid[0].vd_addr);
|
||||
assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
|
||||
assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
|
||||
|
||||
writeop = (vbh->vbh_type == VBH_OP_WRITE);
|
||||
|
||||
offset = vbh->vbh_sector * DEV_BSIZE;
|
||||
|
||||
/*
|
||||
* Build up the iovec based on the guest's data descriptors
|
||||
*/
|
||||
for (i = 1, iolen = 0; i < nsegs - 1; i++) {
|
||||
iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr);
|
||||
iov[i-1].iov_len = vid[i].vd_len;
|
||||
iolen += vid[i].vd_len;
|
||||
|
||||
assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* - write op implies read-only descriptor,
|
||||
* - read op implies write-only descriptor,
|
||||
* therefore test the inverse of the descriptor bit
|
||||
* to the op.
|
||||
*/
|
||||
assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
|
||||
writeop);
|
||||
}
|
||||
|
||||
/* Lastly, get the address of the status byte */
|
||||
status = paddr_guest2host(vid[nsegs - 1].vd_addr);
|
||||
assert(vid[nsegs - 1].vd_len == 1);
|
||||
assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
|
||||
assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
|
||||
|
||||
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
|
||||
writeop ? "write" : "read", iolen, nsegs - 2, offset));
|
||||
|
||||
if (writeop){
|
||||
err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
} else {
|
||||
err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
}
|
||||
|
||||
*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
|
||||
|
||||
/*
|
||||
* Return the single indirect descriptor back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = 1;
|
||||
hq->hq_cur_aidx++;
|
||||
*hq->hq_used_idx += 1;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vbsc_q;
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtblk_proc(sc, hq);
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 &&
|
||||
sc->vbsc_isr == 0) {
|
||||
sc->vbsc_isr = 1;
|
||||
pci_generate_msi(sc->vbsc_pi, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
|
||||
sc->vbsc_pfn = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vbsc_q;
|
||||
hq->hq_size = VTBLK_RINGSZ;
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct stat sbuf;
|
||||
struct pci_vtblk_softc *sc;
|
||||
int fd;
|
||||
|
||||
if (opts == NULL) {
|
||||
printf("virtio-block: backing device required\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* The supplied backing file has to exist
|
||||
*/
|
||||
fd = open(opts, O_RDWR);
|
||||
if (fd < 0) {
|
||||
perror("Could not open backing file");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (fstat(fd, &sbuf) < 0) {
|
||||
perror("Could not stat backing file");
|
||||
close(fd);
|
||||
return (1);
|
||||
}
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtblk_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtblk_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vbsc_pi = pi;
|
||||
sc->vbsc_fd = fd;
|
||||
|
||||
/* setup virtio block config space */
|
||||
sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE;
|
||||
sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
|
||||
sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE;
|
||||
sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
|
||||
sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
|
||||
sc->vbsc_cfg.vbc_geom_h = 0;
|
||||
sc->vbsc_cfg.vbc_geom_s = 0;
|
||||
sc->vbsc_cfg.vbc_sectors_max = 0;
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
|
||||
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_write: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtblk_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
sc->vbsc_lastq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value == 0);
|
||||
pci_vtblk_qnotify(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtblk_update_status(sc, value);
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_read: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_pfn >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vbsc_lastq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = 0; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_isr;
|
||||
sc->vbsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
assert(size == 1);
|
||||
value = *((uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG);
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vblk = {
|
||||
.pe_emu = "virtio-blk",
|
||||
.pe_init = pci_vtblk_init,
|
||||
.pe_iow = pci_vtblk_write,
|
||||
.pe_ior = pci_vtblk_read,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vblk);
|
739
usr.sbin/bhyve/pci_virtio_net.c
Normal file
739
usr.sbin/bhyve/pci_virtio_net.c
Normal file
@ -0,0 +1,739 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <md5.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "mevent.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTNET_RINGSZ 256
|
||||
|
||||
#define VTNET_MAXSEGS 32
|
||||
|
||||
/*
|
||||
* PCI config-space register offsets
|
||||
*/
|
||||
#define VTNET_R_CFG0 20
|
||||
#define VTNET_R_CFG1 21
|
||||
#define VTNET_R_CFG2 22
|
||||
#define VTNET_R_CFG3 23
|
||||
#define VTNET_R_CFG4 24
|
||||
#define VTNET_R_CFG5 25
|
||||
#define VTNET_R_CFG6 26
|
||||
#define VTNET_R_CFG7 27
|
||||
#define VTNET_R_MAX 27
|
||||
|
||||
#define VTNET_REGSZ VTNET_R_MAX+1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTNET_S_HOSTCAPS \
|
||||
( 0x00000020 | /* host supplies MAC */ \
|
||||
0x00008000 | /* host can merge Rx buffers */ \
|
||||
0x00010000 ) /* config status available */
|
||||
|
||||
/*
|
||||
* Queue definitions.
|
||||
*/
|
||||
#define VTNET_RXQ 0
|
||||
#define VTNET_TXQ 1
|
||||
#define VTNET_CTLQ 2
|
||||
|
||||
#define VTNET_MAXQ 3
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Fixed network header size
|
||||
*/
|
||||
struct virtio_net_rxhdr {
|
||||
uint8_t vrh_flags;
|
||||
uint8_t vrh_gso_type;
|
||||
uint16_t vrh_hdr_len;
|
||||
uint16_t vrh_gso_size;
|
||||
uint16_t vrh_csum_start;
|
||||
uint16_t vrh_csum_offset;
|
||||
uint16_t vrh_bufs;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtnet_debug;
|
||||
#define DPRINTF(params) if (pci_vtnet_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtnet_softc {
|
||||
struct pci_devinst *vsc_pi;
|
||||
pthread_mutex_t vsc_mtx;
|
||||
struct mevent *vsc_mevp;
|
||||
|
||||
int vsc_curq;
|
||||
int vsc_status;
|
||||
int vsc_isr;
|
||||
int vsc_tapfd;
|
||||
int vsc_rx_ready;
|
||||
int vsc_rxpend;
|
||||
|
||||
uint32_t vsc_features;
|
||||
uint8_t vsc_macaddr[6];
|
||||
|
||||
uint64_t vsc_pfn[VTNET_MAXQ];
|
||||
struct vring_hqueue vsc_hq[VTNET_MAXQ];
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
pci_vtnet_qsize(int qnum)
|
||||
{
|
||||
/* XXX no ctl queue currently */
|
||||
if (qnum == VTNET_CTLQ) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* XXX fixed currently. Maybe different for tx/rx/ctl */
|
||||
return (VTNET_RINGSZ);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
|
||||
{
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtnet: device reset requested !\n"));
|
||||
}
|
||||
|
||||
sc->vsc_status = value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the tap device
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
char pad[60];
|
||||
|
||||
if (sc->vsc_tapfd == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
memset(pad, 0, 60 - len);
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) writev(sc->vsc_tapfd, iov, iovcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when there is read activity on the tap file descriptor.
|
||||
* Each buffer posted by the guest is assumed to be able to contain
|
||||
* an entire ethernet frame + rx header.
|
||||
* MP note: the dummybuf is only used for discarding frames, so there
|
||||
* is no need for it to be per-vtnet or locked.
|
||||
*/
|
||||
static uint8_t dummybuf[2048];
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
struct vring_hqueue *hq;
|
||||
struct virtio_net_rxhdr *vrx;
|
||||
uint8_t *buf;
|
||||
int i;
|
||||
int len;
|
||||
int ndescs;
|
||||
int didx, uidx, aidx; /* descriptor, avail and used index */
|
||||
|
||||
/*
|
||||
* Should never be called without a valid tap fd
|
||||
*/
|
||||
assert(sc->vsc_tapfd != -1);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up.
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of available rx buffers
|
||||
*/
|
||||
hq = &sc->vsc_hq[VTNET_RXQ];
|
||||
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0) {
|
||||
/*
|
||||
* Need to wait for host notification to read
|
||||
*/
|
||||
if (sc->vsc_rxpend == 0) {
|
||||
WPRINTF(("vtnet: no rx descriptors !\n"));
|
||||
sc->vsc_rxpend = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the packet and try later
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
aidx = hq->hq_cur_aidx;
|
||||
uidx = *hq->hq_used_idx;
|
||||
for (i = 0; i < ndescs; i++) {
|
||||
/*
|
||||
* 'aidx' indexes into the an array of descriptor indexes
|
||||
*/
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr);
|
||||
buf = (uint8_t *)(vrx + 1);
|
||||
|
||||
len = read(sc->vsc_tapfd, buf,
|
||||
vd->vd_len - sizeof(struct virtio_net_rxhdr));
|
||||
|
||||
if (len < 0 && errno == EWOULDBLOCK) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers, which is always 1 without TSO
|
||||
* support.
|
||||
*/
|
||||
memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
|
||||
vrx->vrh_bufs = 1;
|
||||
|
||||
/*
|
||||
* Write this descriptor into the used ring
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
|
||||
uidx++;
|
||||
aidx++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the used pointer, and signal an interrupt if allowed
|
||||
*/
|
||||
*hq->hq_used_idx = uidx;
|
||||
hq->hq_cur_aidx = aidx;
|
||||
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
pci_vtnet_tap_rx(sc);
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
/*
|
||||
* A qnotify means that the rx process can now begin
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
sc->vsc_rx_ready = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the rx queue was empty, attempt to receive a
|
||||
* packet that was previously blocked due to no rx bufs
|
||||
* available
|
||||
*/
|
||||
if (sc->vsc_rxpend) {
|
||||
WPRINTF(("vtnet: rx resumed\n\r"));
|
||||
sc->vsc_rxpend = 0;
|
||||
pci_vtnet_tap_rx(sc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
int i;
|
||||
int plen;
|
||||
int tlen;
|
||||
int uidx, aidx, didx;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Run through the chain of descriptors, ignoring the
|
||||
* first header descriptor. However, include the header
|
||||
* length in the total length that will be put into the
|
||||
* used queue.
|
||||
*/
|
||||
tlen = vd->vd_len;
|
||||
vd = &hq->hq_dtable[vd->vd_next];
|
||||
|
||||
for (i = 0, plen = 0;
|
||||
i < VTNET_MAXSEGS;
|
||||
i++, vd = &hq->hq_dtable[vd->vd_next]) {
|
||||
iov[i].iov_base = paddr_guest2host(vd->vd_addr);
|
||||
iov[i].iov_len = vd->vd_len;
|
||||
plen += vd->vd_len;
|
||||
tlen += vd->vd_len;
|
||||
|
||||
if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
|
||||
break;
|
||||
}
|
||||
assert(i < VTNET_MAXSEGS);
|
||||
|
||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1));
|
||||
pci_vtnet_tap_tx(sc, iov, i + 1, plen);
|
||||
|
||||
/*
|
||||
* Return this chain back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = tlen;
|
||||
hq->hq_cur_aidx = aidx + 1;
|
||||
*hq->hq_used_idx = uidx + 1;
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_txq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ];
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtnet_proctx(sc, hq);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
|
||||
DPRINTF(("vtnet: control qnotify!\n\r"));
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
int qnum = sc->vsc_curq;
|
||||
|
||||
assert(qnum < VTNET_MAXQ);
|
||||
|
||||
sc->vsc_pfn[qnum] = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vsc_hq[qnum];
|
||||
hq->hq_size = pci_vtnet_qsize(qnum);
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
MD5_CTX mdctx;
|
||||
unsigned char digest[16];
|
||||
char nstr[80];
|
||||
struct pci_vtnet_softc *sc;
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtnet_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtnet_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vsc_pi = pi;
|
||||
|
||||
pthread_mutex_init(&sc->vsc_mtx, NULL);
|
||||
|
||||
/*
|
||||
* Attempt to open the tap device
|
||||
*/
|
||||
sc->vsc_tapfd = -1;
|
||||
if (opts != NULL) {
|
||||
char tbuf[80];
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strncat(tbuf, opts, sizeof(tbuf) - strlen(tbuf));
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
} else {
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_tap_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The MAC address is the standard NetApp OUI of 00-a0-98,
|
||||
* followed by an MD5 of the vm name. The slot number is
|
||||
* prepended to this for slots other than 1, so that
|
||||
* CFE can netboot from the equivalent of slot 1.
|
||||
*/
|
||||
if (pi->pi_slot == 1) {
|
||||
strncpy(nstr, vmname, sizeof(nstr));
|
||||
} else {
|
||||
snprintf(nstr, sizeof(nstr), "%d-%s", pi->pi_slot, vmname);
|
||||
}
|
||||
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, nstr, strlen(nstr));
|
||||
MD5Final(digest, &mdctx);
|
||||
|
||||
sc->vsc_macaddr[0] = 0x00;
|
||||
sc->vsc_macaddr[1] = 0xa0;
|
||||
sc->vsc_macaddr[2] = 0x98;
|
||||
sc->vsc_macaddr[3] = digest[0];
|
||||
sc->vsc_macaddr[4] = digest[1];
|
||||
sc->vsc_macaddr[5] = digest[2];
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
|
||||
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function pointer array to handle queue notifications
|
||||
*/
|
||||
static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = {
|
||||
pci_vtnet_ping_rxq,
|
||||
pci_vtnet_ping_txq,
|
||||
pci_vtnet_ping_ctlq
|
||||
};
|
||||
|
||||
static void
|
||||
pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_write: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vsc_features = value & VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtnet_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
sc->vsc_curq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
(*pci_vtnet_qnotify[value])(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtnet_update_status(sc, value);
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
/*
|
||||
* The driver is allowed to change the MAC address
|
||||
*/
|
||||
assert(size == 1);
|
||||
sc->vsc_macaddr[offset - VTNET_R_CFG0] = value;
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTNET_R_CFG6:
|
||||
case VTNET_R_CFG7:
|
||||
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_read: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
assert(size == 2);
|
||||
value = pci_vtnet_qsize(sc->vsc_curq);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_isr;
|
||||
sc->vsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_macaddr[offset - VTNET_R_CFG0];
|
||||
break;
|
||||
case VTNET_R_CFG6:
|
||||
assert(size == 1);
|
||||
value = 0x01; /* XXX link always up */
|
||||
break;
|
||||
case VTNET_R_CFG7:
|
||||
assert(size == 1);
|
||||
value = 0; /* link status is in the LSB */
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vnet = {
|
||||
.pe_emu = "virtio-net",
|
||||
.pe_init = pci_vtnet_init,
|
||||
.pe_iow = pci_vtnet_write,
|
||||
.pe_ior = pci_vtnet_read,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vnet);
|
196
usr.sbin/bhyve/pit_8254.c
Normal file
196
usr.sbin/bhyve/pit_8254.c
Normal file
@ -0,0 +1,196 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/clock.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "pit_8254.h"
|
||||
|
||||
#define TIMER_SEL_MASK 0xc0
|
||||
#define TIMER_RW_MASK 0x30
|
||||
#define TIMER_MODE_MASK 0x0f
|
||||
#define TIMER_SEL_READBACK 0xc0
|
||||
|
||||
#define TIMER_DIV(freq, hz) (((freq) + (hz) / 2) / (hz))
|
||||
|
||||
#define PIT_8254_FREQ 1193182
|
||||
static const int nsecs_per_tick = 1000000000 / PIT_8254_FREQ;
|
||||
|
||||
struct counter {
|
||||
struct timeval tv; /* uptime when counter was loaded */
|
||||
uint16_t initial; /* initial counter value */
|
||||
uint8_t cr[2];
|
||||
uint8_t ol[2];
|
||||
int crbyte;
|
||||
int olbyte;
|
||||
};
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static void
|
||||
latch(struct counter *c)
|
||||
{
|
||||
struct timeval tv2;
|
||||
uint16_t lval;
|
||||
uint64_t delta_nsecs, delta_ticks;
|
||||
|
||||
/* cannot latch a new value until the old one has been consumed */
|
||||
if (c->olbyte != 0)
|
||||
return;
|
||||
|
||||
if (c->initial == 0 || c->initial == 1) {
|
||||
/*
|
||||
* XXX the program that runs the VM can be stopped and
|
||||
* restarted at any time. This means that state that was
|
||||
* created by the guest is destroyed between invocations
|
||||
* of the program.
|
||||
*
|
||||
* If the counter's initial value is not programmed we
|
||||
* assume a value that would be set to generate 'guest_hz'
|
||||
* interrupts per second.
|
||||
*/
|
||||
c->initial = TIMER_DIV(PIT_8254_FREQ, guest_hz);
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
|
||||
(void)gettimeofday(&tv2, NULL);
|
||||
timevalsub(&tv2, &c->tv);
|
||||
delta_nsecs = tv2.tv_sec * 1000000000 + tv2.tv_usec * 1000;
|
||||
delta_ticks = delta_nsecs / nsecs_per_tick;
|
||||
|
||||
lval = c->initial - delta_ticks % c->initial;
|
||||
c->olbyte = 2;
|
||||
c->ol[1] = lval; /* LSB */
|
||||
c->ol[0] = lval >> 8; /* MSB */
|
||||
}
|
||||
|
||||
static int
|
||||
pit_8254_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int sel, rw, mode;
|
||||
uint8_t val;
|
||||
struct counter *c;
|
||||
|
||||
static struct counter counter[3];
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
val = *eax;
|
||||
|
||||
if (port == TIMER_MODE) {
|
||||
assert(in == 0);
|
||||
sel = val & TIMER_SEL_MASK;
|
||||
rw = val & TIMER_RW_MASK;
|
||||
mode = val & TIMER_MODE_MASK;
|
||||
|
||||
if (sel == TIMER_SEL_READBACK)
|
||||
return (-1);
|
||||
if (rw != TIMER_LATCH && rw != TIMER_16BIT)
|
||||
return (-1);
|
||||
|
||||
if (rw != TIMER_LATCH) {
|
||||
/*
|
||||
* Counter mode is not affected when issuing a
|
||||
* latch command.
|
||||
*/
|
||||
if (mode != TIMER_RATEGEN && mode != TIMER_SQWAVE)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
c = &counter[sel >> 6];
|
||||
if (rw == TIMER_LATCH)
|
||||
latch(c);
|
||||
else
|
||||
c->olbyte = 0; /* reset latch after reprogramming */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* counter ports */
|
||||
assert(port >= TIMER_CNTR0 && port <= TIMER_CNTR2);
|
||||
c = &counter[port - TIMER_CNTR0];
|
||||
|
||||
if (in) {
|
||||
/*
|
||||
* XXX
|
||||
* The spec says that once the output latch is completely
|
||||
* read it should revert to "following" the counter. We don't
|
||||
* do this because it is hard and any reasonable OS should
|
||||
* always latch the counter before trying to read it.
|
||||
*/
|
||||
if (c->olbyte == 0)
|
||||
c->olbyte = 2;
|
||||
*eax = c->ol[--c->olbyte];
|
||||
} else {
|
||||
c->cr[c->crbyte++] = *eax;
|
||||
if (c->crbyte == 2) {
|
||||
c->crbyte = 0;
|
||||
c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8;
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(8254, TIMER_MODE, IOPORT_F_OUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR0, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR1, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR2, IOPORT_F_INOUT, pit_8254_handler);
|
45
usr.sbin/bhyve/pit_8254.h
Normal file
45
usr.sbin/bhyve/pit_8254.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PIT_8254_H_
|
||||
#define _PIT_8254_H_
|
||||
|
||||
/*
|
||||
* Borrowed from amd64/include/timerreg.h because in that file it is
|
||||
* conditionally compiled for #ifdef _KERNEL only.
|
||||
*/
|
||||
|
||||
#include <dev/ic/i8253reg.h>
|
||||
|
||||
#define IO_TIMER1 0x40 /* 8253 Timer #1 */
|
||||
#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0)
|
||||
#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1)
|
||||
#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2)
|
||||
#define TIMER_MODE (IO_TIMER1 + TIMER_REG_MODE)
|
||||
|
||||
#endif /* _PIT_8254_H_ */
|
51
usr.sbin/bhyve/post.c
Normal file
51
usr.sbin/bhyve/post.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
static int
|
||||
post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 1);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
*eax = 0xff; /* return some garbage */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
|
268
usr.sbin/bhyve/rtc.c
Normal file
268
usr.sbin/bhyve/rtc.c
Normal file
@ -0,0 +1,268 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define IO_RTC 0x70
|
||||
|
||||
#define RTC_SEC 0x00 /* seconds */
|
||||
#define RTC_MIN 0x02
|
||||
#define RTC_HRS 0x04
|
||||
#define RTC_WDAY 0x06
|
||||
#define RTC_DAY 0x07
|
||||
#define RTC_MONTH 0x08
|
||||
#define RTC_YEAR 0x09
|
||||
#define RTC_CENTURY 0x32 /* current century */
|
||||
|
||||
#define RTC_STATUSA 0xA
|
||||
#define RTCSA_TUP 0x80 /* time update, don't look now */
|
||||
|
||||
#define RTC_STATUSB 0xB
|
||||
#define RTCSB_DST 0x01
|
||||
#define RTCSB_24HR 0x02
|
||||
#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */
|
||||
#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
|
||||
#define RTCSB_HALT 0x80 /* stop clock updates */
|
||||
|
||||
#define RTC_INTR 0x0c /* status register C (R) interrupt source */
|
||||
|
||||
#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
|
||||
#define RTCSD_PWR 0x80 /* clock power OK */
|
||||
|
||||
#define RTC_DIAG 0x0e
|
||||
|
||||
#define RTC_RSTCODE 0x0f
|
||||
|
||||
static int addr;
|
||||
|
||||
/* XXX initialize these to default values as they would be from BIOS */
|
||||
static uint8_t status_a, status_b, rstcode;
|
||||
|
||||
static u_char const bin2bcd_data[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
||||
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
|
||||
};
|
||||
#define bin2bcd(bin) (bin2bcd_data[bin])
|
||||
|
||||
#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 0);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
switch (*eax) {
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
case RTC_STATUSA:
|
||||
case RTC_STATUSB:
|
||||
case RTC_INTR:
|
||||
case RTC_STATUSD:
|
||||
case RTC_DIAG:
|
||||
case RTC_RSTCODE:
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
addr = *eax;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int hour;
|
||||
time_t t;
|
||||
struct timeval cur, delta;
|
||||
|
||||
static struct timeval last;
|
||||
static struct tm tm;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
gettimeofday(&cur, NULL);
|
||||
|
||||
/*
|
||||
* Increment the cached time only once per second so we can guarantee
|
||||
* that the guest has at least one second to read the hour:min:sec
|
||||
* separately and still get a coherent view of the time.
|
||||
*/
|
||||
delta = cur;
|
||||
timevalsub(&delta, &last);
|
||||
if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
|
||||
t = cur.tv_sec;
|
||||
localtime_r(&t, &tm);
|
||||
last = cur;
|
||||
}
|
||||
|
||||
if (in) {
|
||||
switch (addr) {
|
||||
case RTC_SEC:
|
||||
*eax = rtcout(tm.tm_sec);
|
||||
return (0);
|
||||
case RTC_MIN:
|
||||
*eax = rtcout(tm.tm_min);
|
||||
return (0);
|
||||
case RTC_HRS:
|
||||
if (status_b & RTCSB_24HR)
|
||||
hour = tm.tm_hour;
|
||||
else
|
||||
hour = (tm.tm_hour % 12) + 1;
|
||||
|
||||
*eax = rtcout(hour);
|
||||
|
||||
/*
|
||||
* If we are representing time in the 12-hour format
|
||||
* then set the MSB to indicate PM.
|
||||
*/
|
||||
if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
|
||||
*eax |= 0x80;
|
||||
|
||||
return (0);
|
||||
case RTC_WDAY:
|
||||
*eax = rtcout(tm.tm_wday + 1);
|
||||
return (0);
|
||||
case RTC_DAY:
|
||||
*eax = rtcout(tm.tm_mday);
|
||||
return (0);
|
||||
case RTC_MONTH:
|
||||
*eax = rtcout(tm.tm_mon + 1);
|
||||
return (0);
|
||||
case RTC_YEAR:
|
||||
*eax = rtcout(tm.tm_year % 100);
|
||||
return (0);
|
||||
case RTC_CENTURY:
|
||||
*eax = rtcout(tm.tm_year / 100);
|
||||
break;
|
||||
case RTC_STATUSA:
|
||||
*eax = status_a;
|
||||
return (0);
|
||||
case RTC_INTR:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_STATUSD:
|
||||
*eax = RTCSD_PWR;
|
||||
return (0);
|
||||
case RTC_DIAG:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_RSTCODE:
|
||||
*eax = rstcode;
|
||||
return (0);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
switch (addr) {
|
||||
case RTC_STATUSA:
|
||||
status_a = *eax & ~RTCSA_TUP;
|
||||
break;
|
||||
case RTC_STATUSB:
|
||||
/* XXX not implemented yet XXX */
|
||||
if (*eax & RTCSB_PINTR)
|
||||
return (-1);
|
||||
status_b = *eax;
|
||||
break;
|
||||
case RTC_RSTCODE:
|
||||
rstcode = *eax;
|
||||
break;
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
/*
|
||||
* Ignore writes to the time of day registers
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(rtc, IO_RTC, IOPORT_F_OUT, rtc_addr_handler);
|
||||
INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
|
60
usr.sbin/bhyve/uart.c
Normal file
60
usr.sbin/bhyve/uart.c
Normal file
@ -0,0 +1,60 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define COM1 0x3F8
|
||||
#define COM2 0x2F8
|
||||
|
||||
#define REG_IIR 2
|
||||
|
||||
static int
|
||||
com_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* COM port is not implemented so we return 0xFF for all registers
|
||||
*/
|
||||
*eax = 0xFF;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(uart, COM1 + REG_IIR, IOPORT_F_IN, com_handler);
|
||||
INOUT_PORT(uart, COM2 + REG_IIR, IOPORT_F_IN, com_handler);
|
85
usr.sbin/bhyve/virtio.h
Normal file
85
usr.sbin/bhyve/virtio.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VIRTIO_H_
|
||||
#define _VIRTIO_H_
|
||||
|
||||
#define VRING_ALIGN 4096
|
||||
|
||||
#define VRING_DESC_F_NEXT (1 << 0)
|
||||
#define VRING_DESC_F_WRITE (1 << 1)
|
||||
#define VRING_DESC_F_INDIRECT (1 << 2)
|
||||
|
||||
#define VRING_AVAIL_F_NO_INTERRUPT 1
|
||||
|
||||
struct virtio_desc {
|
||||
uint64_t vd_addr;
|
||||
uint32_t vd_len;
|
||||
uint16_t vd_flags;
|
||||
uint16_t vd_next;
|
||||
} __packed;
|
||||
|
||||
struct virtio_used {
|
||||
uint32_t vu_idx;
|
||||
uint32_t vu_tlen;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* PFN register shift amount
|
||||
*/
|
||||
#define VRING_PFN 12
|
||||
|
||||
/*
|
||||
* Virtio device types
|
||||
*/
|
||||
#define VIRTIO_TYPE_NET 1
|
||||
#define VIRTIO_TYPE_BLOCK 2
|
||||
|
||||
/*
|
||||
* PCI vendor/device IDs
|
||||
*/
|
||||
#define VIRTIO_VENDOR 0x1AF4
|
||||
#define VIRTIO_DEV_NET 0x1000
|
||||
#define VIRTIO_DEV_BLOCK 0x1001
|
||||
|
||||
/*
|
||||
* PCI config space constants
|
||||
*/
|
||||
#define VTCFG_R_HOSTCAP 0
|
||||
#define VTCFG_R_GUESTCAP 4
|
||||
#define VTCFG_R_PFN 8
|
||||
#define VTCFG_R_QNUM 12
|
||||
#define VTCFG_R_QSEL 14
|
||||
#define VTCFG_R_QNOTIFY 16
|
||||
#define VTCFG_R_STATUS 18
|
||||
#define VTCFG_R_ISR 19
|
||||
#define VTCFG_R_CFG0 20 /* No MSI-X */
|
||||
#define VTCFG_R_CFG1 24 /* With MSI-X */
|
||||
#define VTCFG_R_MSIX 20
|
||||
|
||||
#endif /* _VIRTIO_H_ */
|
261
usr.sbin/bhyve/xmsr.c
Normal file
261
usr.sbin/bhyve/xmsr.c
Normal file
@ -0,0 +1,261 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <machine/apicreg.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "xmsr.h"
|
||||
|
||||
/*
|
||||
* Trampoline for hypervisor direct 64-bit jump.
|
||||
*
|
||||
* 0 - signature for guest->host verification
|
||||
* 8 - kernel virtual address of trampoline
|
||||
* 16 - instruction virtual address
|
||||
* 24 - stack pointer virtual address
|
||||
* 32 - CR3, physical address of kernel page table
|
||||
* 40 - 24-byte area for null/code/data GDT entries
|
||||
*/
|
||||
#define MP_V64T_SIG 0xcafebabecafebabeULL
|
||||
struct mp_v64tramp {
|
||||
uint64_t mt_sig;
|
||||
uint64_t mt_virt;
|
||||
uint64_t mt_eip;
|
||||
uint64_t mt_rsp;
|
||||
uint64_t mt_cr3;
|
||||
uint64_t mt_gdtr[3];
|
||||
};
|
||||
|
||||
/*
|
||||
* CPU 0 is considered to be the BSP and is set to the RUNNING state.
|
||||
* All other CPUs are set up in the INIT state.
|
||||
*/
|
||||
#define BSP 0
|
||||
enum cpu_bstate {
|
||||
CPU_S_INIT,
|
||||
CPU_S_SIPI,
|
||||
CPU_S_RUNNING
|
||||
} static cpu_b[VM_MAXCPU] = { [BSP] = CPU_S_RUNNING };
|
||||
|
||||
static void spinup_ap(struct vmctx *, int, int, uint64_t *);
|
||||
static void spinup_ap_direct64(struct vmctx *, int, uintptr_t, uint64_t *);
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
|
||||
{
|
||||
int dest;
|
||||
int mode;
|
||||
int thiscpu;
|
||||
int vec;
|
||||
int error, retval;
|
||||
uint64_t rip;
|
||||
|
||||
retval = vcpu;
|
||||
thiscpu = 1 << vcpu;
|
||||
|
||||
/*
|
||||
* The only MSR value handled is the x2apic CR register
|
||||
*/
|
||||
if (code != 0x830) {
|
||||
printf("Unknown WRMSR code %x, val %lx, cpu %d\n",
|
||||
code, val, vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The value written to the MSR will generate an IPI to
|
||||
* a set of CPUs. If this is a SIPI, create the initial
|
||||
* state for the CPU and switch to it. Otherwise, inject
|
||||
* an interrupt for the destination CPU(s), and request
|
||||
* a switch to the next available one by returning -1
|
||||
*/
|
||||
dest = val >> 32;
|
||||
vec = val & APIC_VECTOR_MASK;
|
||||
mode = val & APIC_DELMODE_MASK;
|
||||
|
||||
switch (mode) {
|
||||
case APIC_DELMODE_INIT:
|
||||
assert(dest != 0);
|
||||
assert(dest < guest_ncpus);
|
||||
|
||||
/*
|
||||
* Ignore legacy de-assert INITs in x2apic mode
|
||||
*/
|
||||
if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
|
||||
break;
|
||||
}
|
||||
assert(cpu_b[dest] == CPU_S_INIT);
|
||||
|
||||
/*
|
||||
* Move CPU to wait-for-SIPI state
|
||||
*/
|
||||
error = vcpu_reset(ctx, dest);
|
||||
assert(error == 0);
|
||||
|
||||
cpu_b[dest] = CPU_S_SIPI;
|
||||
break;
|
||||
|
||||
case APIC_DELMODE_STARTUP:
|
||||
assert(dest != 0);
|
||||
assert(dest < guest_ncpus);
|
||||
/*
|
||||
* Ignore SIPIs in any state other than wait-for-SIPI
|
||||
*/
|
||||
if (cpu_b[dest] != CPU_S_SIPI) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bring up the AP and signal the main loop that it is
|
||||
* available and to switch to it.
|
||||
*/
|
||||
spinup_ap(ctx, dest, vec, &rip);
|
||||
cpu_b[dest] = CPU_S_RUNNING;
|
||||
fbsdrun_addcpu(ctx, dest, rip);
|
||||
retval = dest;
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("APIC delivery mode %lx not supported!\n",
|
||||
val & APIC_DELMODE_MASK);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are 2 startup modes possible here:
|
||||
* - if the CPU supports 'unrestricted guest' mode, the spinup can
|
||||
* set up the processor state in power-on 16-bit mode, with the CS:IP
|
||||
* init'd to the specified low-mem 4K page.
|
||||
* - if the guest has requested a 64-bit trampoline in the low-mem 4K
|
||||
* page by placing in the specified signature, set up the register
|
||||
* state using register state in the signature. Note that this
|
||||
* requires accessing guest physical memory to read the signature
|
||||
* while 'unrestricted mode' does not.
|
||||
*/
|
||||
static void
|
||||
spinup_ap(struct vmctx *ctx, int newcpu, int vector, uint64_t *rip)
|
||||
{
|
||||
int error;
|
||||
uint16_t cs;
|
||||
uint64_t desc_base;
|
||||
uint32_t desc_limit, desc_access;
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
|
||||
if (error) {
|
||||
/*
|
||||
* If the guest does not support real-mode execution then
|
||||
* we will bring up the AP directly in 64-bit mode.
|
||||
*/
|
||||
spinup_ap_direct64(ctx, newcpu, vector << PAGE_SHIFT, rip);
|
||||
} else {
|
||||
/*
|
||||
* Update the %cs and %rip of the guest so that it starts
|
||||
* executing real mode code at at 'vector << 12'.
|
||||
*/
|
||||
*rip = 0;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
|
||||
assert(error == 0);
|
||||
|
||||
error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
|
||||
&desc_limit, &desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
desc_base = vector << PAGE_SHIFT;
|
||||
error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
cs = (vector << PAGE_SHIFT) >> 4;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spinup_ap_direct64(struct vmctx *ctx, int newcpu, uintptr_t gaddr,
|
||||
uint64_t *rip)
|
||||
{
|
||||
struct mp_v64tramp *mvt;
|
||||
char *errstr;
|
||||
int error;
|
||||
uint64_t gdtbase;
|
||||
|
||||
mvt = paddr_guest2host(gaddr);
|
||||
|
||||
assert(mvt->mt_sig == MP_V64T_SIG);
|
||||
|
||||
/*
|
||||
* Set up the 3-entry GDT using memory supplied in the
|
||||
* guest's trampoline structure.
|
||||
*/
|
||||
vm_setup_freebsd_gdt(mvt->mt_gdtr);
|
||||
|
||||
#define CHECK_ERROR(msg) \
|
||||
if (error != 0) { \
|
||||
errstr = msg; \
|
||||
goto err_exit; \
|
||||
}
|
||||
|
||||
/* entry point */
|
||||
*rip = mvt->mt_eip;
|
||||
|
||||
/* Get the guest virtual address of the GDT */
|
||||
gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr);
|
||||
|
||||
error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip,
|
||||
mvt->mt_cr3, gdtbase, mvt->mt_rsp);
|
||||
CHECK_ERROR("vm_setup_freebsd_registers");
|
||||
|
||||
return;
|
||||
err_exit:
|
||||
printf("spinup_ap_direct64: machine state error: %s", errstr);
|
||||
exit(1);
|
||||
}
|
34
usr.sbin/bhyve/xmsr.h
Normal file
34
usr.sbin/bhyve/xmsr.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _XMSR_H_
|
||||
#define _XMSR_H_
|
||||
|
||||
int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
|
||||
|
||||
#endif
|
15
usr.sbin/vmmctl/Makefile
Normal file
15
usr.sbin/vmmctl/Makefile
Normal file
@ -0,0 +1,15 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= vmmctl
|
||||
SRCS= vmmctl.c
|
||||
|
||||
NO_MAN=
|
||||
|
||||
DPADD= ${LIBVMMAPI}
|
||||
LDADD= -lvmmapi
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../sys/amd64/vmm
|
||||
|
||||
.include <bsd.prog.mk>
|
75
usr.sbin/vmmctl/sample.sh
Executable file
75
usr.sbin/vmmctl/sample.sh
Executable file
@ -0,0 +1,75 @@
|
||||
#!/bin/sh
|
||||
|
||||
# $FreeBSD$
|
||||
|
||||
VMMCTL="sudo ./vmmctl"
|
||||
VMNAME=sample
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --create
|
||||
${VMMCTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256
|
||||
${VMMCTL} --vm=${VMNAME} --get-lowmem --get-highmem
|
||||
|
||||
CR0_PE=$((1 << 0))
|
||||
CR0_PG=$((1 << 31))
|
||||
CR0=$(($CR0_PE | $CR0_PG))
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0
|
||||
|
||||
# XXX this is bogus the value of %cr3 should come from the loader
|
||||
CR3=0
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3
|
||||
|
||||
CR4_PAE=$((1 << 5))
|
||||
CR4=$((${CR4_PAE}))
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4
|
||||
|
||||
DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A
|
||||
${VMMCTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7
|
||||
|
||||
#
|
||||
# XXX the values of rsp and rip are bogus and should come from the loader.
|
||||
#
|
||||
RSP=0xa5a5a5a5
|
||||
RIP=0x0000bfbfbfbf0000
|
||||
RFLAGS=0x2
|
||||
${VMMCTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp
|
||||
${VMMCTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip
|
||||
${VMMCTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags
|
||||
|
||||
# Set "hidden" state of %cs descriptor to indicate long mode code segment.
|
||||
#
|
||||
# Note that this should match the contents of the entry pointed to by the
|
||||
# segment selector in the GDTR.
|
||||
#
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs
|
||||
|
||||
# Set "hidden" state of all data descriptors to indicate a usable segment.
|
||||
# The only useful fields are the "Present" and "Descriptor Type" bits.
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss
|
||||
|
||||
#
|
||||
# Set the code segment selector to point to entry at offset 8 in the GDTR.
|
||||
#
|
||||
${VMMCTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs
|
||||
|
||||
# Set all the remaining data segment selectors to point to entry at offset
|
||||
# 16 in the GDTR.
|
||||
${VMMCTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds
|
||||
${VMMCTL} --vm=${VMNAME} --set-es=0x0010 --get-es
|
||||
${VMMCTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs
|
||||
${VMMCTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs
|
||||
${VMMCTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss
|
||||
|
||||
# XXX the value of the GDTR should come from the loader.
|
||||
# Set the GDTR
|
||||
GDTR_BASE=0xffff0000
|
||||
GDTR_LIMIT=0x10
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --set-pinning=0 --get-pinning
|
||||
${VMMCTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --destroy
|
1485
usr.sbin/vmmctl/vmmctl.c
Normal file
1485
usr.sbin/vmmctl/vmmctl.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user