First cut to port bhyve, vmmctl, and libvmmapi to HEAD.
This commit is contained in:
commit
b67e81db43
@ -110,6 +110,7 @@ SUBDIR= ${SUBDIR_ORDERED} \
|
||||
${_libusbhid} \
|
||||
${_libusb} \
|
||||
${_libvgl} \
|
||||
${_libvmmapi} \
|
||||
libwrap \
|
||||
liby \
|
||||
libz \
|
||||
@ -197,6 +198,7 @@ _libsmb= libsmb
|
||||
.if ${MK_NCP} != "no"
|
||||
_libncp= libncp
|
||||
.endif
|
||||
_libvmmapi= libvmmapi
|
||||
.endif
|
||||
|
||||
.if ${MACHINE_CPUARCH} == "powerpc"
|
||||
|
11
lib/libvmmapi/Makefile
Normal file
11
lib/libvmmapi/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
# $FreeBSD$
|
||||
|
||||
LIB= vmmapi
|
||||
SRCS= vmmapi.c vmmapi_freebsd.c mptable.c
|
||||
INCS= vmmapi.h
|
||||
|
||||
WARNS?= 2
|
||||
|
||||
CFLAGS+= -I${.CURDIR}
|
||||
|
||||
.include <bsd.lib.mk>
|
338
lib/libvmmapi/mptable.c
Normal file
338
lib/libvmmapi/mptable.c
Normal file
@ -0,0 +1,338 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
#include "mptable.h"
|
||||
|
||||
#define LAPIC_PADDR (0xFEE00000)
|
||||
#define LAPIC_VERSION (16)
|
||||
|
||||
#define IOAPIC_PADDR (0xFEC00000)
|
||||
#define IOAPIC_VERSION (0x11)
|
||||
|
||||
extern int errno;
|
||||
|
||||
static uint8_t
|
||||
mp_compute_checksum(void *base, size_t len)
|
||||
{
|
||||
uint8_t *bytes = base;
|
||||
uint8_t sum = 0;
|
||||
for(; len > 0; len--) {
|
||||
sum += *bytes++;
|
||||
}
|
||||
return 256 - sum;
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_mpfp(struct mp_floating_pointer *mpfp, vm_paddr_t mpfp_gpa)
|
||||
{
|
||||
memset(mpfp, 0, sizeof(*mpfp));
|
||||
memcpy(mpfp->signature, MPFP_SIGNATURE, MPFP_SIGNATURE_LEN);
|
||||
mpfp->mptable_paddr = mpfp_gpa + sizeof(*mpfp);
|
||||
mpfp->specrev = MP_SPECREV;
|
||||
mpfp->feature2 = 0;
|
||||
mpfp->checksum = mp_compute_checksum(mpfp, sizeof(*mpfp));
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_mpch(struct mp_config_hdr *mpch)
|
||||
{
|
||||
memset(mpch, 0, sizeof(*mpch));
|
||||
mpch->specrev = MP_SPECREV;
|
||||
memcpy(mpch->signature, MPCH_SIGNATURE, MPCH_SIGNATURE_LEN);
|
||||
memcpy(mpch->oemid, MPCH_OEMID, MPCH_OEMID_LEN);
|
||||
memcpy(mpch->prodid, MPCH_PRODID, MPCH_PRODID_LEN);
|
||||
mpch->lapic_paddr = LAPIC_PADDR;
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_proc_entries(struct mpe_proc *mpep, int num_proc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_proc; i++) {
|
||||
memset(mpep, 0, sizeof(*mpep));
|
||||
mpep->entry_type = MP_ENTRY_PROC;
|
||||
mpep->lapic_id = i; // XXX
|
||||
mpep->lapic_version = LAPIC_VERSION;
|
||||
mpep->proc_flags = (i == 0)?MPEP_FLAGS_BSP:0;
|
||||
mpep->proc_flags |= MPEP_FLAGS_EN;
|
||||
mpep->proc_signature = MPEP_SIGNATURE;
|
||||
mpep->feature_flags = MPEP_FEATURES;
|
||||
mpep++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_bus_entries(struct mpe_bus *mpeb)
|
||||
{
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->entry_type = MP_ENTRY_BUS;
|
||||
mpeb->busid = MPE_BUSID_ISA;
|
||||
memcpy(mpeb->busname, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
|
||||
mpeb++;
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->entry_type = MP_ENTRY_BUS;
|
||||
mpeb->busid = MPE_BUSID_PCI;
|
||||
memcpy(mpeb->busname, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
|
||||
|
||||
}
|
||||
|
||||
#ifdef notyet
|
||||
static void
|
||||
mp_build_ioapic_entries(struct mpe_ioapic *mpei)
|
||||
{
|
||||
memset(mpei, 0, sizeof(*mpei));
|
||||
mpei->entry_type = MP_ENTRY_IOAPIC;
|
||||
mpei->ioapic_id = MPE_IOAPIC_ID;
|
||||
mpei->ioapic_version = IOAPIC_VERSION;
|
||||
mpei->ioapic_flags = MPE_IOAPIC_FLAG_EN;
|
||||
mpei->ioapic_paddr = IOAPIC_PADDR;
|
||||
}
|
||||
|
||||
static void
|
||||
mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins)
|
||||
{
|
||||
int pin;
|
||||
|
||||
/*
|
||||
* The following config is taken from kernel mptable.c
|
||||
* mptable_parse_default_config_ints(...), for now
|
||||
* just use the default config, tweek later if needed.
|
||||
*/
|
||||
|
||||
|
||||
/* Run through all 16 pins. */
|
||||
for (pin = 0; pin < num_pins; pin++) {
|
||||
memset(mpeii, 0, sizeof(*mpeii));
|
||||
mpeii->entry_type = MP_ENTRY_IOINT;
|
||||
mpeii->src_bus_id = MPE_BUSID_ISA;
|
||||
mpeii->dst_apic_id = MPE_IOAPIC_ID;
|
||||
|
||||
/*
|
||||
* All default configs route IRQs from bus 0 to the first 16 pins
|
||||
* of the first I/O APIC with an APIC ID of 2.
|
||||
*/
|
||||
mpeii->dst_apic_intin = pin;
|
||||
switch (pin) {
|
||||
case 0:
|
||||
/* Pin 0 is an ExtINT pin. */
|
||||
mpeii->intr_type = MPEII_INTR_EXTINT;
|
||||
break;
|
||||
case 2:
|
||||
/* IRQ 0 is routed to pin 2. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = 0;
|
||||
break;
|
||||
case 5:
|
||||
case 10:
|
||||
case 11:
|
||||
/*
|
||||
* PCI Irqs set to level triggered.
|
||||
*/
|
||||
mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL;
|
||||
mpeii->src_bus_id = MPE_BUSID_PCI;
|
||||
default:
|
||||
/* All other pins are identity mapped. */
|
||||
mpeii->intr_type = MPEII_INTR_INT;
|
||||
mpeii->src_bus_irq = pin;
|
||||
break;
|
||||
}
|
||||
mpeii++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#define COPYSTR(dest, src, bytes) \
|
||||
memcpy(dest, src, bytes); \
|
||||
str[bytes] = 0;
|
||||
|
||||
|
||||
static void
|
||||
mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch)
|
||||
{
|
||||
static char str[16];
|
||||
int i;
|
||||
char *cur;
|
||||
|
||||
union mpe {
|
||||
struct mpe_proc *proc;
|
||||
struct mpe_bus *bus;
|
||||
struct mpe_ioapic *ioapic;
|
||||
struct mpe_ioint *ioint;
|
||||
struct mpe_lint *lnit;
|
||||
char *p;
|
||||
};
|
||||
|
||||
union mpe mpe;
|
||||
|
||||
printf(" MP Floating Pointer :\n");
|
||||
COPYSTR(str, mpfp->signature, 4);
|
||||
printf(" signature: %s\n", str);
|
||||
printf(" mpch paddr: %x\n", mpfp->mptable_paddr);
|
||||
printf(" length: %x\n", mpfp->length);
|
||||
printf(" specrec: %x\n", mpfp->specrev);
|
||||
printf(" checksum: %x\n", mpfp->checksum);
|
||||
printf(" feature1: %x\n", mpfp->feature1);
|
||||
printf(" feature2: %x\n", mpfp->feature2);
|
||||
printf(" feature3: %x\n", mpfp->feature3);
|
||||
printf(" feature4: %x\n", mpfp->feature4);
|
||||
|
||||
printf(" MP Configuration Header :\n");
|
||||
COPYSTR(str, mpch->signature, 4);
|
||||
printf(" signature: %s\n", str);
|
||||
printf(" length: %x\n", mpch->length);
|
||||
printf(" specrec: %x\n", mpch->specrev);
|
||||
printf(" checksum: %x\n", mpch->checksum);
|
||||
COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN);
|
||||
printf(" oemid: %s\n", str);
|
||||
COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN);
|
||||
printf(" prodid: %s\n", str);
|
||||
printf(" oem_ptr: %x\n", mpch->oem_ptr);
|
||||
printf(" oem_sz: %x\n", mpch->oem_sz);
|
||||
printf(" nr_entries: %x\n", mpch->nr_entries);
|
||||
printf(" apic paddr: %x\n", mpch->lapic_paddr);
|
||||
printf(" ext_length: %x\n", mpch->ext_length);
|
||||
printf(" ext_checksum: %x\n", mpch->ext_checksum);
|
||||
|
||||
cur = (char *)mpch + sizeof(*mpch);
|
||||
for (i = 0; i < mpch->nr_entries; i++) {
|
||||
mpe.p = cur;
|
||||
switch(*mpe.p) {
|
||||
case MP_ENTRY_PROC:
|
||||
printf(" MP Processor Entry :\n");
|
||||
printf(" lapic_id: %x\n", mpe.proc->lapic_id);
|
||||
printf(" lapic_version: %x\n", mpe.proc->lapic_version);
|
||||
printf(" proc_flags: %x\n", mpe.proc->proc_flags);
|
||||
printf(" proc_signature: %x\n", mpe.proc->proc_signature);
|
||||
printf(" feature_flags: %x\n", mpe.proc->feature_flags);
|
||||
cur += sizeof(struct mpe_proc);
|
||||
break;
|
||||
case MP_ENTRY_BUS:
|
||||
printf(" MP Bus Entry :\n");
|
||||
printf(" busid: %x\n", mpe.bus->busid);
|
||||
COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN);
|
||||
printf(" busname: %s\n", str);
|
||||
cur += sizeof(struct mpe_bus);
|
||||
break;
|
||||
case MP_ENTRY_IOAPIC:
|
||||
printf(" MP IOAPIC Entry :\n");
|
||||
printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id);
|
||||
printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version);
|
||||
printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags);
|
||||
printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr);
|
||||
cur += sizeof(struct mpe_ioapic);
|
||||
break;
|
||||
case MP_ENTRY_IOINT:
|
||||
printf(" MP IO Interrupt Entry :\n");
|
||||
printf(" intr_type: %x\n", mpe.ioint->intr_type);
|
||||
printf(" intr_flags: %x\n", mpe.ioint->intr_flags);
|
||||
printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id);
|
||||
printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq);
|
||||
printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id);
|
||||
printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin);
|
||||
cur += sizeof(struct mpe_ioint);
|
||||
break;
|
||||
case MP_ENTRY_LINT:
|
||||
printf(" MP Local Interrupt Entry :\n");
|
||||
cur += sizeof(struct mpe_lint);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu,
|
||||
void *oemp, int oemsz)
|
||||
{
|
||||
struct mp_config_hdr *mpch;
|
||||
char *mapaddr;
|
||||
char *startaddr;
|
||||
int error;
|
||||
|
||||
mapaddr = vm_map_memory(ctx, gpa, len);
|
||||
if (mapaddr == MAP_FAILED) {
|
||||
printf("%s\n", strerror(errno));
|
||||
goto err;
|
||||
}
|
||||
startaddr = mapaddr;
|
||||
|
||||
mp_build_mpfp((struct mp_floating_pointer*) mapaddr, gpa);
|
||||
mapaddr += sizeof(struct mp_floating_pointer);
|
||||
|
||||
mpch = (struct mp_config_hdr*)mapaddr;
|
||||
mp_build_mpch(mpch);
|
||||
mapaddr += sizeof(struct mp_config_hdr);
|
||||
|
||||
mp_build_proc_entries((struct mpe_proc*) mapaddr, ncpu);
|
||||
mapaddr += (sizeof(struct mpe_proc)*ncpu);
|
||||
mpch->nr_entries += ncpu;
|
||||
|
||||
mp_build_bus_entries((struct mpe_bus*)mapaddr);
|
||||
mapaddr += (sizeof(struct mpe_bus)*MPE_NUM_BUSES);
|
||||
mpch->nr_entries += MPE_NUM_BUSES;
|
||||
#if 0
|
||||
mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr);
|
||||
mapaddr += sizeof(struct mpe_ioapic);
|
||||
mpch->nr_entries++;
|
||||
|
||||
mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ);
|
||||
mapaddr += sizeof(struct mpe_ioint)*MPEII_MAX_IRQ;
|
||||
mpch->nr_entries += MPEII_MAX_IRQ;
|
||||
|
||||
#endif
|
||||
if (oemp) {
|
||||
mpch->oem_ptr = mapaddr - startaddr + gpa;
|
||||
mpch->oem_sz = oemsz;
|
||||
memcpy(mapaddr, oemp, oemsz);
|
||||
}
|
||||
mpch->length = (mapaddr) - ((char*) mpch);
|
||||
mpch->checksum = mp_compute_checksum(mpch, sizeof(*mpch));
|
||||
|
||||
|
||||
// mptable_dump((struct mp_floating_pointer*)startaddr, mpch);
|
||||
err:
|
||||
return (error);
|
||||
}
|
171
lib/libvmmapi/mptable.h
Normal file
171
lib/libvmmapi/mptable.h
Normal file
@ -0,0 +1,171 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MPTABLE_h_
|
||||
#define _MPTABLE_h_
|
||||
|
||||
#define MP_SPECREV (4) // MP spec revision 1.1
|
||||
|
||||
/*
|
||||
* MP Floating Pointer Structure
|
||||
*/
|
||||
#define MPFP_SIGNATURE "_MP_"
|
||||
#define MPFP_SIGNATURE_LEN (4)
|
||||
#define MPFP_FEATURE2 (0x80) // IMCR is present
|
||||
struct mp_floating_pointer {
|
||||
uint8_t signature[MPFP_SIGNATURE_LEN];
|
||||
uint32_t mptable_paddr;
|
||||
uint8_t length;
|
||||
uint8_t specrev;
|
||||
uint8_t checksum;
|
||||
uint8_t feature1;
|
||||
uint8_t feature2;
|
||||
uint8_t feature3;
|
||||
uint8_t feature4;
|
||||
uint8_t feature5;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* MP Configuration Table Header
|
||||
*/
|
||||
#define MPCH_SIGNATURE "PCMP"
|
||||
#define MPCH_SIGNATURE_LEN (4)
|
||||
|
||||
#define MPCH_OEMID "NETAPP "
|
||||
#define MPCH_OEMID_LEN (8)
|
||||
#define MPCH_PRODID "vFiler "
|
||||
#define MPCH_PRODID_LEN (12)
|
||||
|
||||
struct mp_config_hdr {
|
||||
uint8_t signature[MPCH_SIGNATURE_LEN];
|
||||
uint16_t length;
|
||||
uint8_t specrev;
|
||||
uint8_t checksum;
|
||||
uint8_t oemid[MPCH_OEMID_LEN];
|
||||
uint8_t prodid[MPCH_PRODID_LEN];
|
||||
uint32_t oem_ptr;
|
||||
uint16_t oem_sz;
|
||||
uint16_t nr_entries;
|
||||
uint32_t lapic_paddr;
|
||||
uint16_t ext_length;
|
||||
uint8_t ext_checksum;
|
||||
uint8_t reserved;
|
||||
};
|
||||
|
||||
#define MP_ENTRY_PROC (0)
|
||||
#define MP_ENTRY_BUS (1)
|
||||
#define MP_ENTRY_IOAPIC (2)
|
||||
#define MP_ENTRY_IOINT (3)
|
||||
#define MP_ENTRY_LINT (4)
|
||||
|
||||
/*
|
||||
* MP Processor Entry
|
||||
*/
|
||||
|
||||
#define MPEP_FLAGS_EN (0x1)
|
||||
#define MPEP_FLAGS_BSP (0x2)
|
||||
|
||||
#define MPEP_SIG_FAMILY (6)
|
||||
#define MPEP_SIG_MODEL (26)
|
||||
#define MPEP_SIG_STEPPING (5)
|
||||
#define MPEP_SIGNATURE ((MPEP_SIG_FAMILY << 8) | (MPEP_SIG_MODEL << 4) \
|
||||
| (MPEP_SIG_STEPPING))
|
||||
|
||||
#define MPEP_FEATURES (0xBFEBFBFF) // Value from Intel i7 CPUID
|
||||
|
||||
struct mpe_proc {
|
||||
uint8_t entry_type;
|
||||
uint8_t lapic_id;
|
||||
uint8_t lapic_version;
|
||||
uint8_t proc_flags;
|
||||
uint32_t proc_signature;
|
||||
uint32_t feature_flags;
|
||||
uint8_t reserved[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* MP Bus Entry
|
||||
*/
|
||||
|
||||
#define MPE_NUM_BUSES (2)
|
||||
#define MPE_BUSNAME_LEN (6)
|
||||
#define MPE_BUSID_ISA (0)
|
||||
#define MPE_BUSID_PCI (1)
|
||||
#define MPE_BUSNAME_ISA "ISA "
|
||||
#define MPE_BUSNAME_PCI "PCI "
|
||||
struct mpe_bus {
|
||||
uint8_t entry_type;
|
||||
uint8_t busid;
|
||||
uint8_t busname[MPE_BUSNAME_LEN];
|
||||
};
|
||||
|
||||
/*
|
||||
* MP IO APIC Entry
|
||||
*/
|
||||
#define MPE_IOAPIC_ID (2)
|
||||
#define MPE_IOAPIC_FLAG_EN (1)
|
||||
struct mpe_ioapic {
|
||||
uint8_t entry_type;
|
||||
uint8_t ioapic_id;
|
||||
uint8_t ioapic_version;
|
||||
uint8_t ioapic_flags;
|
||||
uint32_t ioapic_paddr;
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* MP IO Interrupt Assignment Entry
|
||||
*/
|
||||
#define MPEII_INTR_INT (0)
|
||||
#define MPEII_INTR_NMI (1)
|
||||
#define MPEII_INTR_SMI (2)
|
||||
#define MPEII_INTR_EXTINT (3)
|
||||
#define MPEII_PCI_IRQ_MASK (0x0c20U) /* IRQ 5,10,11 are PCI connected */
|
||||
#define MPEII_MAX_IRQ (16)
|
||||
#define MPEII_FLAGS_TRIGMODE_LEVEL (0x3)
|
||||
struct mpe_ioint {
|
||||
uint8_t entry_type;
|
||||
uint8_t intr_type;
|
||||
uint16_t intr_flags;
|
||||
uint8_t src_bus_id;
|
||||
uint8_t src_bus_irq;
|
||||
uint8_t dst_apic_id;
|
||||
uint8_t dst_apic_intin;
|
||||
};
|
||||
|
||||
/*
|
||||
* MP Local Interrupt Assignment Entry
|
||||
*/
|
||||
struct mpe_lint {
|
||||
uint8_t entry_type;
|
||||
};
|
||||
|
||||
int vm_build_mptable(struct vmctx *ctxt, vm_paddr_t gpa, int len,
|
||||
int ncpu, void *oemp, int oemsz);
|
||||
#endif /* _MPTABLE_h_ */
|
645
lib/libvmmapi/vmmapi.c
Normal file
645
lib/libvmmapi/vmmapi.c
Normal file
@ -0,0 +1,645 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_dev.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
#include "mptable.h"
|
||||
|
||||
#ifndef CR4_VMXE
|
||||
#define CR4_VMXE (1UL << 13)
|
||||
#endif
|
||||
|
||||
#define BIOS_ROM_BASE (0xf0000)
|
||||
#define BIOS_ROM_SIZE (0x10000)
|
||||
|
||||
struct vmctx {
|
||||
int fd;
|
||||
char *name;
|
||||
};
|
||||
|
||||
#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
|
||||
#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
|
||||
|
||||
static int
|
||||
vm_device_open(const char *name)
|
||||
{
|
||||
int fd, len;
|
||||
char *vmfile;
|
||||
|
||||
len = strlen("/dev/vmm/") + strlen(name) + 1;
|
||||
vmfile = malloc(len);
|
||||
assert(vmfile != NULL);
|
||||
snprintf(vmfile, len, "/dev/vmm/%s", name);
|
||||
|
||||
/* Open the device file */
|
||||
fd = open(vmfile, O_RDWR, 0);
|
||||
|
||||
free(vmfile);
|
||||
return (fd);
|
||||
}
|
||||
|
||||
int
|
||||
vm_create(const char *name)
|
||||
{
|
||||
|
||||
return (CREATE((char *)name));
|
||||
}
|
||||
|
||||
struct vmctx *
|
||||
vm_open(const char *name)
|
||||
{
|
||||
struct vmctx *vm;
|
||||
|
||||
vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
|
||||
assert(vm != NULL);
|
||||
|
||||
vm->fd = -1;
|
||||
vm->name = (char *)(vm + 1);
|
||||
strcpy(vm->name, name);
|
||||
|
||||
if ((vm->fd = vm_device_open(vm->name)) < 0)
|
||||
goto err;
|
||||
|
||||
return (vm);
|
||||
err:
|
||||
vm_destroy(vm);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
vm_destroy(struct vmctx *vm)
|
||||
{
|
||||
assert(vm != NULL);
|
||||
|
||||
DESTROY(vm->name);
|
||||
if (vm->fd >= 0)
|
||||
close(vm->fd);
|
||||
free(vm);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa,
|
||||
vm_paddr_t *ret_hpa, size_t *ret_len)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
|
||||
*ret_hpa = seg.hpa;
|
||||
*ret_len = seg.len;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
|
||||
/*
|
||||
* Create and optionally map 'len' bytes of memory at guest
|
||||
* physical address 'gpa'
|
||||
*/
|
||||
bzero(&seg, sizeof(seg));
|
||||
seg.gpa = gpa;
|
||||
seg.len = len;
|
||||
error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
|
||||
if (error == 0 && mapaddr != NULL) {
|
||||
*mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
char *
|
||||
vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
|
||||
/* Map 'len' bytes of memory at guest physical address 'gpa' */
|
||||
return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
ctx->fd, gpa));
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
vmsegdesc.desc.base = base;
|
||||
vmsegdesc.desc.limit = limit;
|
||||
vmsegdesc.desc.access = access;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access)
|
||||
{
|
||||
int error;
|
||||
struct vm_seg_desc vmsegdesc;
|
||||
|
||||
bzero(&vmsegdesc, sizeof(vmsegdesc));
|
||||
vmsegdesc.cpuid = vcpu;
|
||||
vmsegdesc.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
|
||||
if (error == 0) {
|
||||
*base = vmsegdesc.desc.base;
|
||||
*limit = vmsegdesc.desc.limit;
|
||||
*access = vmsegdesc.desc.access;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
vmreg.regval = val;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
|
||||
{
|
||||
int error;
|
||||
struct vm_register vmreg;
|
||||
|
||||
bzero(&vmreg, sizeof(vmreg));
|
||||
vmreg.cpuid = vcpu;
|
||||
vmreg.regnum = reg;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
|
||||
*ret_val = vmreg.regval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
|
||||
*host_cpuid = vmpin.host_cpuid;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
vmpin.host_cpuid = host_cpuid;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
|
||||
{
|
||||
int error;
|
||||
struct vm_run vmrun;
|
||||
|
||||
bzero(&vmrun, sizeof(vmrun));
|
||||
vmrun.cpuid = vcpu;
|
||||
vmrun.rip = rip;
|
||||
|
||||
error = ioctl(ctx->fd, VM_RUN, &vmrun);
|
||||
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code, int error_code_valid)
|
||||
{
|
||||
struct vm_event ev;
|
||||
|
||||
bzero(&ev, sizeof(ev));
|
||||
ev.cpuid = vcpu;
|
||||
ev.type = type;
|
||||
ev.vector = vector;
|
||||
ev.error_code = error_code;
|
||||
ev.error_code_valid = error_code_valid;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code)
|
||||
{
|
||||
|
||||
return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1));
|
||||
}
|
||||
|
||||
int
|
||||
vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz)
|
||||
{
|
||||
|
||||
return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu,
|
||||
oemtbl, oemtblsz));
|
||||
}
|
||||
|
||||
int
|
||||
vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
|
||||
{
|
||||
struct vm_lapic_irq vmirq;
|
||||
|
||||
bzero(&vmirq, sizeof(vmirq));
|
||||
vmirq.cpuid = vcpu;
|
||||
vmirq.vector = vector;
|
||||
|
||||
return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
|
||||
}
|
||||
|
||||
int
|
||||
vm_inject_nmi(struct vmctx *ctx, int vcpu)
|
||||
{
|
||||
struct vm_nmi vmnmi;
|
||||
|
||||
bzero(&vmnmi, sizeof(vmnmi));
|
||||
vmnmi.cpuid = vcpu;
|
||||
|
||||
return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
|
||||
}
|
||||
|
||||
int
|
||||
vm_capability_name2type(const char *capname)
|
||||
{
|
||||
int i;
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int type;
|
||||
} capstrmap[] = {
|
||||
{ "hlt_exit", VM_CAP_HALT_EXIT },
|
||||
{ "mtrap_exit", VM_CAP_MTRAP_EXIT },
|
||||
{ "pause_exit", VM_CAP_PAUSE_EXIT },
|
||||
{ "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
|
||||
if (strcmp(capstrmap[i].name, capname) == 0)
|
||||
return (capstrmap[i].type);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval)
|
||||
{
|
||||
int error;
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
|
||||
*retval = vmcap.capval;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
|
||||
{
|
||||
struct vm_capability vmcap;
|
||||
|
||||
bzero(&vmcap, sizeof(vmcap));
|
||||
vmcap.cpuid = vcpu;
|
||||
vmcap.captype = cap;
|
||||
vmcap.capval = val;
|
||||
|
||||
return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
|
||||
}
|
||||
|
||||
int
|
||||
vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
|
||||
{
|
||||
struct vm_pptdev pptdev;
|
||||
|
||||
bzero(&pptdev, sizeof(pptdev));
|
||||
pptdev.bus = bus;
|
||||
pptdev.slot = slot;
|
||||
pptdev.func = func;
|
||||
|
||||
return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
|
||||
}
|
||||
|
||||
int
|
||||
vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
struct vm_pptdev_mmio pptmmio;
|
||||
|
||||
bzero(&pptmmio, sizeof(pptmmio));
|
||||
pptmmio.bus = bus;
|
||||
pptmmio.slot = slot;
|
||||
pptmmio.func = func;
|
||||
pptmmio.gpa = gpa;
|
||||
pptmmio.len = len;
|
||||
pptmmio.hpa = hpa;
|
||||
|
||||
return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
|
||||
}
|
||||
|
||||
int
|
||||
vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
{
|
||||
struct vm_pptdev_msi pptmsi;
|
||||
|
||||
bzero(&pptmsi, sizeof(pptmsi));
|
||||
pptmsi.vcpu = vcpu;
|
||||
pptmsi.bus = bus;
|
||||
pptmsi.slot = slot;
|
||||
pptmsi.func = func;
|
||||
pptmsi.destcpu = destcpu;
|
||||
pptmsi.vector = vector;
|
||||
pptmsi.numvec = numvec;
|
||||
|
||||
return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
|
||||
}
|
||||
|
||||
uint64_t *
|
||||
vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries)
|
||||
{
|
||||
int error;
|
||||
|
||||
static struct vm_stats vmstats;
|
||||
|
||||
vmstats.cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_STATS, &vmstats);
|
||||
if (error == 0) {
|
||||
if (ret_entries)
|
||||
*ret_entries = vmstats.num_entries;
|
||||
if (ret_tv)
|
||||
*ret_tv = vmstats.tv;
|
||||
return (vmstats.statbuf);
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
const char *
|
||||
vm_get_stat_desc(struct vmctx *ctx, int index)
|
||||
{
|
||||
static struct vm_stat_desc statdesc;
|
||||
|
||||
statdesc.index = index;
|
||||
if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
|
||||
return (statdesc.desc);
|
||||
else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Intel Vol 3a:
|
||||
* Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
|
||||
*/
|
||||
int
|
||||
vcpu_reset(struct vmctx *vmctx, int vcpu)
|
||||
{
|
||||
int error;
|
||||
uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t sel;
|
||||
|
||||
zero = 0;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
rip = 0xfff0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
cr0 = CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = CR4_VMXE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* CS: present, r/w, accessed, 16-bit, byte granularity, usable
|
||||
*/
|
||||
desc_base = 0xffff0000;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0xf000;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
|
||||
*/
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* General purpose registers */
|
||||
rdx = 0xf00;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
|
||||
goto done;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
|
||||
goto done;
|
||||
|
||||
/* GDTR, IDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
/* TR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR */
|
||||
desc_base = 0;
|
||||
desc_limit = 0xffff;
|
||||
desc_access = 0x00000082;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
|
||||
desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
sel = 0;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX cr2, debug registers */
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
98
lib/libvmmapi/vmmapi.h
Normal file
98
lib/libvmmapi/vmmapi.h
Normal file
@ -0,0 +1,98 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMMAPI_H_
|
||||
#define _VMMAPI_H_
|
||||
|
||||
struct vmctx;
|
||||
|
||||
int vm_create(const char *name);
|
||||
struct vmctx *vm_open(const char *name);
|
||||
void vm_destroy(struct vmctx *ctx);
|
||||
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa,
|
||||
vm_paddr_t *ret_hpa, size_t *ret_len);
|
||||
/*
|
||||
* Create a memory segment of 'len' bytes in the guest physical address space
|
||||
* at offset 'gpa'.
|
||||
*
|
||||
* If 'mapaddr' is not NULL then this region is mmap'ed into the address
|
||||
* space of the calling process. If there is an mmap error then *mapaddr
|
||||
* will be set to MAP_FAILED.
|
||||
*/
|
||||
|
||||
int vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
|
||||
char **mapaddr);
|
||||
char * vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
|
||||
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t base, uint32_t limit, uint32_t access);
|
||||
int vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access);
|
||||
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
|
||||
int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
|
||||
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
|
||||
struct vm_exit *ret_vmexit);
|
||||
int vm_build_tables(struct vmctx *ctxt, int ncpus, void *oemtbl,
|
||||
int oemtblsz);
|
||||
int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector);
|
||||
int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
|
||||
int vector, int error_code);
|
||||
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
|
||||
int vm_inject_nmi(struct vmctx *ctx, int vcpu);
|
||||
int vm_capability_name2type(const char *capname);
|
||||
int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int *retval);
|
||||
int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
|
||||
int val);
|
||||
int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
|
||||
int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
|
||||
int dest, int vector, int numvec);
|
||||
|
||||
/*
|
||||
* Return a pointer to the statistics buffer. Note that this is not MT-safe.
|
||||
*/
|
||||
uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
|
||||
int *ret_entries);
|
||||
const char *vm_get_stat_desc(struct vmctx *ctx, int index);
|
||||
|
||||
/* Reset vcpu register state */
|
||||
int vcpu_reset(struct vmctx *ctx, int vcpu);
|
||||
|
||||
/*
|
||||
* FreeBSD specific APIs
|
||||
*/
|
||||
int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp);
|
||||
void vm_setup_freebsd_gdt(uint64_t *gdtr);
|
||||
#endif /* _VMMAPI_H_ */
|
187
lib/libvmmapi/vmmapi_freebsd.c
Normal file
187
lib/libvmmapi/vmmapi_freebsd.c
Normal file
@ -0,0 +1,187 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/specialreg.h>
|
||||
#include <machine/segments.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmmapi.h"
|
||||
|
||||
#ifndef CR4_VMXE
|
||||
#define CR4_VMXE (1UL << 13)
|
||||
#endif
|
||||
|
||||
#define DESC_UNUSABLE 0x00010000
|
||||
|
||||
#define GUEST_NULL_SEL 0
|
||||
#define GUEST_CODE_SEL 1
|
||||
#define GUEST_DATA_SEL 2
|
||||
#define GUEST_GDTR_LIMIT (3 * 8 - 1)
|
||||
|
||||
void
|
||||
vm_setup_freebsd_gdt(uint64_t *gdtr)
|
||||
{
|
||||
gdtr[GUEST_NULL_SEL] = 0;
|
||||
gdtr[GUEST_CODE_SEL] = 0x0020980000000000;
|
||||
gdtr[GUEST_DATA_SEL] = 0x0000900000000000;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the 'vcpu' register set such that it will begin execution at
|
||||
* 'rip' in long mode.
|
||||
*/
|
||||
int
|
||||
vm_setup_freebsd_registers(struct vmctx *vmctx, int vcpu,
|
||||
uint64_t rip, uint64_t cr3, uint64_t gdtbase,
|
||||
uint64_t rsp)
|
||||
{
|
||||
int error;
|
||||
uint64_t cr0, cr4, efer, rflags, desc_base;
|
||||
uint32_t desc_access, desc_limit;
|
||||
uint16_t gsel;
|
||||
|
||||
cr0 = CR0_PE | CR0_PG | CR0_NE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
|
||||
goto done;
|
||||
|
||||
cr4 = CR4_PAE | CR4_VMXE;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
|
||||
goto done;
|
||||
|
||||
efer = EFER_LME | EFER_LMA;
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, efer)))
|
||||
goto done;
|
||||
|
||||
rflags = 0x2;
|
||||
error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_base = 0;
|
||||
desc_limit = 0;
|
||||
desc_access = 0x0000209B;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
desc_access = 0x00000093;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* XXX TR is pointing to null selector even though we set the
|
||||
* TSS segment to be usable with a base address and limit of 0.
|
||||
*/
|
||||
desc_access = 0x0000008b;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0,
|
||||
DESC_UNUSABLE);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_CODE_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
gsel = GSEL(GUEST_DATA_SEL, SEL_KPL);
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0)
|
||||
goto done;
|
||||
|
||||
/* XXX TR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* LDTR is pointing to the null selector */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
|
||||
goto done;
|
||||
|
||||
/* entry point */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
|
||||
goto done;
|
||||
|
||||
/* page table base */
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, cr3)) != 0)
|
||||
goto done;
|
||||
|
||||
desc_base = gdtbase;
|
||||
desc_limit = GUEST_GDTR_LIMIT;
|
||||
error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
|
||||
desc_base, desc_limit, 0);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
|
||||
if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
return (error);
|
||||
}
|
@ -160,6 +160,7 @@ LIBULOG?= ${DESTDIR}${LIBDIR}/libulog.a
|
||||
LIBUTIL?= ${DESTDIR}${LIBDIR}/libutil.a
|
||||
LIBUUTIL?= ${DESTDIR}${LIBDIR}/libuutil.a
|
||||
LIBVGL?= ${DESTDIR}${LIBDIR}/libvgl.a
|
||||
LIBVMMAPI?= ${DESTDIR}${LIBDIR}/libvmmapi.a
|
||||
LIBWRAP?= ${DESTDIR}${LIBDIR}/libwrap.a
|
||||
LIBXPG4?= ${DESTDIR}${LIBDIR}/libxpg4.a
|
||||
LIBY?= ${DESTDIR}${LIBDIR}/liby.a
|
||||
|
@ -10,6 +10,7 @@ SUBDIR+= acpi
|
||||
SUBDIR+= apm
|
||||
.endif
|
||||
SUBDIR+= asf
|
||||
SUBDIR+= bhyve
|
||||
SUBDIR+= boot0cfg
|
||||
.if ${MK_TOOLCHAIN} != "no"
|
||||
SUBDIR+= btxld
|
||||
@ -30,4 +31,5 @@ SUBDIR+= spkrtest
|
||||
.if ${MK_SYSINSTALL} != "no"
|
||||
SUBDIR+= sade
|
||||
.endif
|
||||
SUBDIR+= vmmctl
|
||||
SUBDIR+= zzz
|
||||
|
20
usr.sbin/bhyve/Makefile
Normal file
20
usr.sbin/bhyve/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= bhyve
|
||||
|
||||
SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c mevent.c
|
||||
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
|
||||
SRCS+= pci_virtio_net.c pit_8254.c post.c rtc.c uart.c xmsr.c
|
||||
|
||||
NO_MAN=
|
||||
|
||||
DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD}
|
||||
LDADD= -lvmmapi -lmd -lpthread
|
||||
|
||||
WARNS?= 2
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../sys
|
||||
|
||||
.include <bsd.prog.mk>
|
68
usr.sbin/bhyve/atpic.c
Normal file
68
usr.sbin/bhyve/atpic.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* FreeBSD only writes to the 8259 interrupt controllers to put them in a
|
||||
* shutdown state.
|
||||
*
|
||||
* So, we just ignore the writes.
|
||||
*/
|
||||
|
||||
#define IO_ICU1 0x20
|
||||
#define IO_ICU2 0xA0
|
||||
#define ICU_IMR_OFFSET 1
|
||||
|
||||
static int
|
||||
atpic_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
if (in)
|
||||
return (-1);
|
||||
|
||||
/* Pretend all writes to the 8259 are alright */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(atpic, IO_ICU1, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU1 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2, IOPORT_F_INOUT, atpic_handler);
|
||||
INOUT_PORT(atpic, IO_ICU2 + ICU_IMR_OFFSET, IOPORT_F_INOUT, atpic_handler);
|
121
usr.sbin/bhyve/consport.c
Normal file
121
usr.sbin/bhyve/consport.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define BVM_CONSOLE_PORT 0x220
|
||||
|
||||
static struct termios tio_orig, tio_new;
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(void)
|
||||
{
|
||||
tcgetattr(STDIN_FILENO, &tio_orig);
|
||||
|
||||
cfmakeraw(&tio_new);
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
|
||||
|
||||
atexit(ttyclose);
|
||||
}
|
||||
|
||||
static bool
|
||||
tty_char_available(void)
|
||||
{
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(STDIN_FILENO, &rfds);
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
|
||||
return (true);
|
||||
} else {
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(void)
|
||||
{
|
||||
char rb;
|
||||
|
||||
if (tty_char_available()) {
|
||||
read(STDIN_FILENO, &rb, 1);
|
||||
return (rb & 0xff);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(unsigned char wb)
|
||||
{
|
||||
(void) write(STDOUT_FILENO, &wb, 1);
|
||||
}
|
||||
|
||||
static int
|
||||
console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
static int opened;
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
if (!opened) {
|
||||
ttyopen();
|
||||
opened = 1;
|
||||
}
|
||||
|
||||
if (in)
|
||||
*eax = ttyread();
|
||||
else
|
||||
ttywrite(*eax);
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(console, BVM_CONSOLE_PORT, IOPORT_F_INOUT, console_handler);
|
125
usr.sbin/bhyve/dbgport.c
Normal file
125
usr.sbin/bhyve/dbgport.c
Normal file
@ -0,0 +1,125 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
|
||||
#define BVM_DBG_PORT 0x224
|
||||
|
||||
static int listen_fd, conn_fd;
|
||||
|
||||
static struct sockaddr_in sin;
|
||||
|
||||
void
|
||||
init_dbgport(int sport)
|
||||
{
|
||||
conn_fd = -1;
|
||||
|
||||
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(sport);
|
||||
|
||||
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(listen_fd, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
char ch;
|
||||
int nwritten, nread, printonce;
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
again:
|
||||
printonce = 0;
|
||||
while (conn_fd < 0) {
|
||||
if (!printonce) {
|
||||
printf("Waiting for connection from gdb\r\n");
|
||||
printonce = 1;
|
||||
}
|
||||
conn_fd = accept(listen_fd, NULL, NULL);
|
||||
if (conn_fd >= 0)
|
||||
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
|
||||
else if (errno != EINTR)
|
||||
perror("accept");
|
||||
}
|
||||
|
||||
if (in) {
|
||||
nread = read(conn_fd, &ch, 1);
|
||||
if (nread == -1 && errno == EAGAIN)
|
||||
*eax = -1;
|
||||
else if (nread == 1)
|
||||
*eax = ch;
|
||||
else {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
ch = *eax;
|
||||
nwritten = write(conn_fd, &ch, 1);
|
||||
if (nwritten != 1) {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(dbg, BVM_DBG_PORT, IOPORT_F_INOUT, dbg_handler);
|
36
usr.sbin/bhyve/dbgport.h
Normal file
36
usr.sbin/bhyve/dbgport.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _DBGPORT_H_
|
||||
#define _DBGPORT_H_
|
||||
|
||||
#define DEFAULT_GDB_PORT 6466
|
||||
|
||||
void init_dbgport(int port);
|
||||
|
||||
#endif
|
65
usr.sbin/bhyve/elcr.c
Normal file
65
usr.sbin/bhyve/elcr.c
Normal file
@ -0,0 +1,65 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
/*
|
||||
* EISA interrupt Level Control Register.
|
||||
*
|
||||
* This is a 16-bit register with one bit for each of the IRQ0 through IRQ15.
|
||||
* A level triggered irq is indicated by setting the corresponding bit to '1'.
|
||||
*/
|
||||
#define ELCR_PORT 0x4d0
|
||||
|
||||
static uint8_t elcr[2] = { 0x00, 0x00 };
|
||||
|
||||
static int
|
||||
elcr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
idx = port - ELCR_PORT;
|
||||
|
||||
if (in)
|
||||
*eax = elcr[idx];
|
||||
else
|
||||
elcr[idx] = *eax;
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(elcr, ELCR_PORT + 0, IOPORT_F_INOUT, elcr_handler);
|
||||
INOUT_PORT(elcr, ELCR_PORT + 1, IOPORT_F_INOUT, elcr_handler);
|
650
usr.sbin/bhyve/fbsdrun.c
Normal file
650
usr.sbin/bhyve/fbsdrun.c
Normal file
@ -0,0 +1,650 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/segments.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <libgen.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
#include "mevent.h"
|
||||
#include "pci_emul.h"
|
||||
#include "xmsr.h"
|
||||
|
||||
#define DEFAULT_GUEST_HZ 100
|
||||
#define DEFAULT_GUEST_TSLICE 200
|
||||
|
||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
||||
|
||||
#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */
|
||||
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
|
||||
#define VMEXIT_RESTART 2 /* restart current instruction */
|
||||
#define VMEXIT_ABORT 3 /* abort the vm run loop */
|
||||
#define VMEXIT_RESET 4 /* guest machine has reset */
|
||||
|
||||
#define MB (1024UL * 1024)
|
||||
#define GB (1024UL * MB)
|
||||
|
||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
|
||||
int guest_tslice = DEFAULT_GUEST_TSLICE;
|
||||
int guest_hz = DEFAULT_GUEST_HZ;
|
||||
char *vmname;
|
||||
|
||||
u_long lomem_sz;
|
||||
u_long himem_sz;
|
||||
|
||||
int guest_ncpus;
|
||||
|
||||
static int pincpu = -1;
|
||||
static int guest_vcpu_mux;
|
||||
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
|
||||
|
||||
static int foundcpus;
|
||||
|
||||
static char *lomem_addr;
|
||||
static char *himem_addr;
|
||||
|
||||
static char *progname;
|
||||
static const int BSP = 0;
|
||||
|
||||
static int cpumask;
|
||||
|
||||
static void *oem_tbl_start;
|
||||
static int oem_tbl_size;
|
||||
|
||||
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
|
||||
|
||||
struct vm_exit vmexit[VM_MAXCPU];
|
||||
|
||||
struct fbsdstats {
|
||||
uint64_t vmexit_bogus;
|
||||
uint64_t vmexit_bogus_switch;
|
||||
uint64_t vmexit_hlt;
|
||||
uint64_t vmexit_pause;
|
||||
uint64_t vmexit_mtrap;
|
||||
uint64_t cpu_switch_rotate;
|
||||
uint64_t cpu_switch_direct;
|
||||
int io_reset;
|
||||
} stats;
|
||||
|
||||
struct mt_vmm_info {
|
||||
pthread_t mt_thr;
|
||||
struct vmctx *mt_ctx;
|
||||
int mt_vcpu;
|
||||
} mt_vmm_info[VM_MAXCPU];
|
||||
|
||||
static void
|
||||
usage(int code)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-hBHP][-g <gdb port>][-z <hz>][-s <pci>][-p pincpu]"
|
||||
"[-n <pci>][-m lowmem][-M highmem] <vm>\n"
|
||||
" -g: gdb port (default is %d and 0 means don't open)\n"
|
||||
" -c: # cpus (default 1)\n"
|
||||
" -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
|
||||
" -B: inject breakpoint exception on vm entry\n"
|
||||
" -H: vmexit from the guest on hlt\n"
|
||||
" -P: vmexit from the guest on pause\n"
|
||||
" -h: help\n"
|
||||
" -z: guest hz (default is %d)\n"
|
||||
" -s: <slot,driver,configinfo> PCI slot config\n"
|
||||
" -n: <slot,name> PCI slot naming\n"
|
||||
" -m: lowmem in MB\n"
|
||||
" -M: highmem in MB\n"
|
||||
" -x: mux vcpus to 1 hcpu\n"
|
||||
" -t: mux vcpu timeslice hz (default %d)\n",
|
||||
progname, DEFAULT_GDB_PORT, DEFAULT_GUEST_HZ,
|
||||
DEFAULT_GUEST_TSLICE);
|
||||
exit(code);
|
||||
}
|
||||
|
||||
void *
|
||||
paddr_guest2host(uintptr_t gaddr)
|
||||
{
|
||||
if (lomem_sz == 0)
|
||||
return (NULL);
|
||||
|
||||
if (gaddr < lomem_sz) {
|
||||
return ((void *)(lomem_addr + gaddr));
|
||||
} else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) {
|
||||
return ((void *)(himem_addr + gaddr - 4*GB));
|
||||
} else
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_add_oemtbl(void *tbl, int tblsz)
|
||||
{
|
||||
oem_tbl_start = tbl;
|
||||
oem_tbl_size = tblsz;
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_pause(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_pause);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_hlt(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_hlt);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_muxed(void)
|
||||
{
|
||||
|
||||
return (guest_vcpu_mux);
|
||||
}
|
||||
|
||||
static void *
|
||||
fbsdrun_start_thread(void *param)
|
||||
{
|
||||
int vcpu;
|
||||
struct mt_vmm_info *mtp = param;
|
||||
|
||||
vcpu = mtp->mt_vcpu;
|
||||
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
|
||||
|
||||
/* not reached */
|
||||
exit(1);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (cpumask & (1 << vcpu)) {
|
||||
printf("addcpu: attempting to add existing cpu %d\n", vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cpumask |= 1 << vcpu;
|
||||
foundcpus++;
|
||||
|
||||
/*
|
||||
* Set up the vmexit struct to allow execution to start
|
||||
* at the given RIP
|
||||
*/
|
||||
vmexit[vcpu].rip = rip;
|
||||
vmexit[vcpu].inst_length = 0;
|
||||
|
||||
if (vcpu == BSP || !guest_vcpu_mux){
|
||||
mt_vmm_info[vcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[vcpu].mt_vcpu = vcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fbsdrun_get_next_cpu(int curcpu)
|
||||
{
|
||||
|
||||
/*
|
||||
* Get the next available CPU. Assumes they arrive
|
||||
* in ascending order with no gaps.
|
||||
*/
|
||||
return ((curcpu + 1) % foundcpus);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_catch_reset(void)
|
||||
{
|
||||
stats.io_reset++;
|
||||
return (VMEXIT_RESET);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_catch_inout(void)
|
||||
{
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
|
||||
uint32_t eax)
|
||||
{
|
||||
#if PG_DEBUG /* put all types of debug here */
|
||||
if (eax == 0) {
|
||||
pause_noswitch = 1;
|
||||
} else if (eax == 1) {
|
||||
pause_noswitch = 0;
|
||||
} else {
|
||||
pause_noswitch = 0;
|
||||
if (eax == 5) {
|
||||
vm_set_capability(ctx, *pvcpu, VM_CAP_MTRAP_EXIT, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int error;
|
||||
int bytes, port, in, out;
|
||||
uint32_t eax;
|
||||
int vcpu;
|
||||
|
||||
vcpu = *pvcpu;
|
||||
|
||||
port = vme->u.inout.port;
|
||||
bytes = vme->u.inout.bytes;
|
||||
eax = vme->u.inout.eax;
|
||||
in = vme->u.inout.in;
|
||||
out = !in;
|
||||
|
||||
/* We don't deal with these */
|
||||
if (vme->u.inout.string || vme->u.inout.rep)
|
||||
return (VMEXIT_ABORT);
|
||||
|
||||
/* Special case of guest reset */
|
||||
if (out && port == 0x64 && (uint8_t)eax == 0xFE)
|
||||
return (vmexit_catch_reset());
|
||||
|
||||
/* Extra-special case of host notifications */
|
||||
if (out && port == GUEST_NIO_PORT)
|
||||
return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
|
||||
|
||||
error = emulate_inout(ctx, vcpu, in, port, bytes, &eax);
|
||||
if (error == 0 && in)
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
|
||||
|
||||
if (error == 0)
|
||||
return (VMEXIT_CONTINUE);
|
||||
else {
|
||||
fprintf(stderr, "Unhandled %s%c 0x%04x\n",
|
||||
in ? "in" : "out",
|
||||
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
|
||||
return (vmexit_catch_inout());
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
printf("vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu);
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int newcpu;
|
||||
int retval = VMEXIT_CONTINUE;
|
||||
|
||||
newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval);
|
||||
|
||||
if (guest_vcpu_mux && *pvcpu != newcpu) {
|
||||
retval = VMEXIT_SWITCH;
|
||||
*pvcpu = newcpu;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
printf("vm exit[%d]\n", *pvcpu);
|
||||
printf("\treason\t\tVMX\n");
|
||||
printf("\trip\t\t0x%016lx\n", vmexit->rip);
|
||||
printf("\tinst_length\t%d\n", vmexit->inst_length);
|
||||
printf("\terror\t\t%d\n", vmexit->u.vmx.error);
|
||||
printf("\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
|
||||
printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification);
|
||||
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int bogus_noswitch = 1;
|
||||
|
||||
static int
|
||||
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_bogus++;
|
||||
|
||||
if (!guest_vcpu_mux || guest_ncpus == 1 || bogus_noswitch) {
|
||||
return (VMEXIT_RESTART);
|
||||
} else {
|
||||
stats.vmexit_bogus_switch++;
|
||||
vmexit->inst_length = 0;
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_hlt++;
|
||||
if (fbsdrun_muxed()) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
/*
|
||||
* Just continue execution with the next instruction. We use
|
||||
* the HLT VM exit as a way to be friendly with the host
|
||||
* scheduler.
|
||||
*/
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int pause_noswitch;
|
||||
|
||||
static int
|
||||
vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_pause++;
|
||||
|
||||
if (fbsdrun_muxed() && !pause_noswitch) {
|
||||
*pvcpu = -1;
|
||||
return (VMEXIT_SWITCH);
|
||||
} else {
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
stats.vmexit_mtrap++;
|
||||
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
static void
|
||||
sigalrm(int sig)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_timeslice(void)
|
||||
{
|
||||
struct sigaction sa;
|
||||
struct itimerval itv;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Setup a realtime timer to generate a SIGALRM at a
|
||||
* frequency of 'guest_tslice' ticks per second.
|
||||
*/
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = 0;
|
||||
sa.sa_handler = sigalrm;
|
||||
|
||||
error = sigaction(SIGALRM, &sa, NULL);
|
||||
assert(error == 0);
|
||||
|
||||
itv.it_interval.tv_sec = 0;
|
||||
itv.it_interval.tv_usec = 1000000 / guest_tslice;
|
||||
itv.it_value.tv_sec = 0;
|
||||
itv.it_value.tv_usec = 1000000 / guest_tslice;
|
||||
|
||||
error = setitimer(ITIMER_REAL, &itv, NULL);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_INOUT] = vmexit_inout,
|
||||
[VM_EXITCODE_VMX] = vmexit_vmx,
|
||||
[VM_EXITCODE_BOGUS] = vmexit_bogus,
|
||||
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
||||
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
};
|
||||
|
||||
static void
|
||||
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
int error, rc, prevcpu;
|
||||
|
||||
if (guest_vcpu_mux)
|
||||
setup_timeslice();
|
||||
|
||||
if (pincpu >= 0) {
|
||||
error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
prevcpu = vcpu;
|
||||
rc = (*handler[vmexit[vcpu].exitcode])(ctx, &vmexit[vcpu],
|
||||
&vcpu);
|
||||
switch (rc) {
|
||||
case VMEXIT_SWITCH:
|
||||
assert(guest_vcpu_mux);
|
||||
if (vcpu == -1) {
|
||||
stats.cpu_switch_rotate++;
|
||||
vcpu = fbsdrun_get_next_cpu(prevcpu);
|
||||
} else {
|
||||
stats.cpu_switch_direct++;
|
||||
}
|
||||
/* fall through */
|
||||
case VMEXIT_CONTINUE:
|
||||
rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
|
||||
break;
|
||||
case VMEXIT_RESTART:
|
||||
rip = vmexit[vcpu].rip;
|
||||
break;
|
||||
case VMEXIT_RESET:
|
||||
exit(0);
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int c, error, gdb_port, inject_bkpt, tmp, err;
|
||||
struct vmctx *ctx;
|
||||
uint64_t rip;
|
||||
|
||||
inject_bkpt = 0;
|
||||
progname = basename(argv[0]);
|
||||
gdb_port = DEFAULT_GDB_PORT;
|
||||
guest_ncpus = 1;
|
||||
|
||||
while ((c = getopt(argc, argv, "hBHPxp:g:c:z:s:n:m:M:")) != -1) {
|
||||
switch (c) {
|
||||
case 'B':
|
||||
inject_bkpt = 1;
|
||||
break;
|
||||
case 'x':
|
||||
guest_vcpu_mux = 1;
|
||||
break;
|
||||
case 'p':
|
||||
pincpu = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
guest_ncpus = atoi(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
gdb_port = atoi(optarg);
|
||||
break;
|
||||
case 'z':
|
||||
guest_hz = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
guest_tslice = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
pci_parse_slot(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
pci_parse_name(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
lomem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'M':
|
||||
himem_sz = strtoul(optarg, NULL, 0) * MB;
|
||||
break;
|
||||
case 'H':
|
||||
guest_vmexit_on_hlt = 1;
|
||||
break;
|
||||
case 'P':
|
||||
guest_vmexit_on_pause = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage(0);
|
||||
default:
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc != 1)
|
||||
usage(1);
|
||||
|
||||
/* No need to mux if guest is uni-processor */
|
||||
if (guest_ncpus <= 1)
|
||||
guest_vcpu_mux = 0;
|
||||
|
||||
/* vmexit on hlt if guest is muxed */
|
||||
if (guest_vcpu_mux) {
|
||||
guest_vmexit_on_hlt = 1;
|
||||
guest_vmexit_on_pause = 1;
|
||||
}
|
||||
|
||||
vmname = argv[0];
|
||||
|
||||
ctx = vm_open(vmname);
|
||||
if (ctx == NULL) {
|
||||
perror("vm_open");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_HALT_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
printf("VM exit on HLT not supported\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_HALT_EXIT, 1);
|
||||
handler[VM_EXITCODE_HLT] = vmexit_hlt;
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
/*
|
||||
* pause exit support required for this mode
|
||||
*/
|
||||
err = vm_get_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
printf("SMP mux requested, no pause support\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, BSP, VM_CAP_PAUSE_EXIT, 1);
|
||||
handler[VM_EXITCODE_PAUSE] = vmexit_pause;
|
||||
}
|
||||
|
||||
if (lomem_sz != 0) {
|
||||
lomem_addr = vm_map_memory(ctx, 0, lomem_sz);
|
||||
if (lomem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
} else if (himem_sz != 0) {
|
||||
himem_addr = vm_map_memory(ctx, 4*GB, himem_sz);
|
||||
if (himem_addr == (char *) MAP_FAILED) {
|
||||
lomem_sz = 0;
|
||||
himem_sz = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
init_inout();
|
||||
init_pci(ctx);
|
||||
|
||||
if (gdb_port != 0)
|
||||
init_dbgport(gdb_port);
|
||||
|
||||
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
|
||||
assert(error == 0);
|
||||
|
||||
if (inject_bkpt) {
|
||||
error = vm_inject_event(ctx, BSP, VM_HW_EXCEPTION, IDT_BP);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* build the guest tables, MP etc.
|
||||
*/
|
||||
vm_build_tables(ctx, guest_ncpus, oem_tbl_start, oem_tbl_size);
|
||||
|
||||
/*
|
||||
* Add CPU 0
|
||||
*/
|
||||
fbsdrun_addcpu(ctx, BSP, rip);
|
||||
|
||||
/*
|
||||
* Head off to the main event dispatch loop
|
||||
*/
|
||||
mevent_dispatch();
|
||||
|
||||
exit(1);
|
||||
}
|
53
usr.sbin/bhyve/fbsdrun.h
Normal file
53
usr.sbin/bhyve/fbsdrun.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _FBSDRUN_H_
|
||||
#define _FBSDRUN_H_
|
||||
|
||||
#ifndef CTASSERT /* Allow lint to override */
|
||||
#define CTASSERT(x) _CTASSERT(x, __LINE__)
|
||||
#define _CTASSERT(x, y) __CTASSERT(x, y)
|
||||
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
struct vmctx;
|
||||
extern int guest_hz;
|
||||
extern int guest_tslice;
|
||||
extern int guest_ncpus;
|
||||
extern char *vmname;
|
||||
|
||||
extern u_long lomem_sz, himem_sz;
|
||||
|
||||
void *paddr_guest2host(uintptr_t);
|
||||
|
||||
void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip);
|
||||
void fbsdrun_add_oemtbl(void *tbl, int tblsz);
|
||||
int fbsdrun_muxed(void);
|
||||
int fbsdrun_vmexit_on_hlt(void);
|
||||
int fbsdrun_vmexit_on_pause(void);
|
||||
#endif
|
98
usr.sbin/bhyve/inout.c
Normal file
98
usr.sbin/bhyve/inout.c
Normal file
@ -0,0 +1,98 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
SET_DECLARE(inout_port_set, struct inout_port);
|
||||
|
||||
#define MAX_IOPORTS (1 << 16)
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
} inout_handlers[MAX_IOPORTS];
|
||||
|
||||
int
|
||||
emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax)
|
||||
{
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
|
||||
assert(port < MAX_IOPORTS);
|
||||
|
||||
if ((handler = inout_handlers[port].handler) == NULL)
|
||||
return (-1);
|
||||
|
||||
flags = inout_handlers[port].flags;
|
||||
arg = inout_handlers[port].arg;
|
||||
|
||||
if ((in && (flags & IOPORT_F_IN)) || (!in && (flags & IOPORT_F_OUT)))
|
||||
return ((*handler)(ctx, vcpu, in, port, bytes, eax, arg));
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
init_inout(void)
|
||||
{
|
||||
struct inout_port **iopp, *iop;
|
||||
|
||||
SET_FOREACH(iopp, inout_port_set) {
|
||||
iop = *iopp;
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
register_inout(struct inout_port *iop)
|
||||
{
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = iop->arg;
|
||||
|
||||
return (0);
|
||||
}
|
65
usr.sbin/bhyve/inout.h
Normal file
65
usr.sbin/bhyve/inout.h
Normal file
@ -0,0 +1,65 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _INOUT_H_
|
||||
#define _INOUT_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
|
||||
int bytes, uint32_t *eax, void *arg);
|
||||
|
||||
struct inout_port {
|
||||
const char *name;
|
||||
int port;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
};
|
||||
#define IOPORT_F_IN 0x1
|
||||
#define IOPORT_F_OUT 0x2
|
||||
#define IOPORT_F_INOUT 0x3
|
||||
|
||||
#define INOUT_PORT(name, port, flags, handler) \
|
||||
static struct inout_port __CONCAT(__inout_port, __LINE__) = { \
|
||||
#name, \
|
||||
(port), \
|
||||
(flags), \
|
||||
(handler), \
|
||||
0 \
|
||||
}; \
|
||||
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
|
||||
|
||||
void init_inout(void);
|
||||
int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax);
|
||||
int register_inout(struct inout_port *iop);
|
||||
|
||||
#endif /* _INOUT_H_ */
|
419
usr.sbin/bhyve/mevent.c
Normal file
419
usr.sbin/bhyve/mevent.c
Normal file
@ -0,0 +1,419 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Micro event library for FreeBSD, designed for a single i/o thread
|
||||
* using kqueue, and having events be persistent by default.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/event.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define MEVENT_MAX 64
|
||||
|
||||
#define MEV_ENABLE 1
|
||||
#define MEV_DISABLE 2
|
||||
#define MEV_DEL_PENDING 3
|
||||
|
||||
static pthread_t mevent_tid;
|
||||
static int mevent_pipefd[2];
|
||||
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct mevent {
|
||||
void (*me_func)(int, enum ev_type, void *);
|
||||
int me_fd;
|
||||
enum ev_type me_type;
|
||||
void *me_param;
|
||||
int me_cq;
|
||||
int me_state;
|
||||
int me_closefd;
|
||||
LIST_ENTRY(mevent) me_list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(listhead, mevent) global_head, change_head;
|
||||
|
||||
static void
|
||||
mevent_qlock(void)
|
||||
{
|
||||
pthread_mutex_lock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_qunlock(void)
|
||||
{
|
||||
pthread_mutex_unlock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_pipe_read(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
char buf[MEVENT_MAX];
|
||||
int status;
|
||||
|
||||
/*
|
||||
* Drain the pipe read side. The fd is non-blocking so this is
|
||||
* safe to do.
|
||||
*/
|
||||
do {
|
||||
status = read(fd, buf, sizeof(buf));
|
||||
} while (status == MEVENT_MAX);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_notify(void)
|
||||
{
|
||||
char c;
|
||||
|
||||
/*
|
||||
* If calling from outside the i/o thread, write a byte on the
|
||||
* pipe to force the i/o thread to exit the blocking kevent call.
|
||||
*/
|
||||
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
|
||||
write(mevent_pipefd[1], &c, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_filter(struct mevent *mevp)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = 0;
|
||||
|
||||
if (mevp->me_type == EVF_READ)
|
||||
retval = EVFILT_READ;
|
||||
|
||||
if (mevp->me_type == EVF_WRITE)
|
||||
retval = EVFILT_WRITE;
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_flags(struct mevent *mevp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (mevp->me_state) {
|
||||
case MEV_ENABLE:
|
||||
ret = EV_ADD;
|
||||
break;
|
||||
case MEV_DISABLE:
|
||||
ret = EV_DISABLE;
|
||||
break;
|
||||
case MEV_DEL_PENDING:
|
||||
ret = EV_DELETE;
|
||||
break;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_fflags(struct mevent *mevp)
|
||||
{
|
||||
/* XXX nothing yet, perhaps EV_EOF for reads ? */
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_build(int mfd, struct kevent *kev)
|
||||
{
|
||||
struct mevent *mevp, *tmpp;
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
|
||||
if (mevp->me_closefd) {
|
||||
/*
|
||||
* A close of the file descriptor will remove the
|
||||
* event
|
||||
*/
|
||||
close(mevp->me_fd);
|
||||
} else {
|
||||
kev[i].ident = mevp->me_fd;
|
||||
kev[i].filter = mevent_kq_filter(mevp);
|
||||
kev[i].flags = mevent_kq_flags(mevp);
|
||||
kev[i].fflags = mevent_kq_fflags(mevp);
|
||||
kev[i].data = 0;
|
||||
kev[i].udata = mevp;
|
||||
i++;
|
||||
}
|
||||
|
||||
mevp->me_cq = 0;
|
||||
LIST_REMOVE(mevp, me_list);
|
||||
|
||||
if (mevp->me_state == MEV_DEL_PENDING) {
|
||||
free(mevp);
|
||||
} else {
|
||||
LIST_INSERT_HEAD(&global_head, mevp, me_list);
|
||||
}
|
||||
|
||||
assert(i < MEVENT_MAX);
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (i);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_handle(struct kevent *kev, int numev)
|
||||
{
|
||||
struct mevent *mevp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numev; i++) {
|
||||
mevp = kev[i].udata;
|
||||
|
||||
/* XXX check for EV_ERROR ? */
|
||||
|
||||
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
|
||||
}
|
||||
}
|
||||
|
||||
struct mevent *
|
||||
mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *), void *param)
|
||||
{
|
||||
struct mevent *lp, *mevp;
|
||||
|
||||
if (fd < 0 || func == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
mevp = NULL;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Verify that the fd/type tuple is not present in any list
|
||||
*/
|
||||
LIST_FOREACH(lp, &global_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
LIST_FOREACH(lp, &change_head, me_list) {
|
||||
if (lp->me_fd == fd && lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an entry, populate it, and add it to the change list.
|
||||
*/
|
||||
mevp = malloc(sizeof(struct mevent));
|
||||
if (mevp == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
memset(mevp, 0, sizeof(struct mevent));
|
||||
mevp->me_fd = fd;
|
||||
mevp->me_type = type;
|
||||
mevp->me_func = func;
|
||||
mevp->me_param = param;
|
||||
|
||||
LIST_INSERT_HEAD(&change_head, mevp, me_list);
|
||||
mevp->me_cq = 1;
|
||||
mevp->me_state = MEV_ENABLE;
|
||||
mevent_notify();
|
||||
|
||||
exit:
|
||||
mevent_qunlock();
|
||||
|
||||
return (mevp);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_update(struct mevent *evp, int newstate)
|
||||
{
|
||||
/*
|
||||
* It's not possible to enable/disable a deleted event
|
||||
*/
|
||||
if (evp->me_state == MEV_DEL_PENDING)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* No update needed if state isn't changing
|
||||
*/
|
||||
if (evp->me_state == newstate)
|
||||
return (0);
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
evp->me_state = newstate;
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_enable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_ENABLE));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_disable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_DISABLE));
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_delete_event(struct mevent *evp, int closefd)
|
||||
{
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there, and
|
||||
* mark as to be deleted.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
evp->me_state = MEV_DEL_PENDING;
|
||||
|
||||
if (closefd)
|
||||
evp->me_closefd = 1;
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 0));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete_close(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 1));
|
||||
}
|
||||
|
||||
void
|
||||
mevent_dispatch(void)
|
||||
{
|
||||
struct kevent changelist[MEVENT_MAX];
|
||||
struct kevent eventlist[MEVENT_MAX];
|
||||
struct mevent *pipev;
|
||||
int mfd;
|
||||
int numev;
|
||||
int ret;
|
||||
|
||||
mevent_tid = pthread_self();
|
||||
|
||||
mfd = kqueue();
|
||||
assert(mfd > 0);
|
||||
|
||||
/*
|
||||
* Open the pipe that will be used for other threads to force
|
||||
* the blocking kqueue call to exit by writing to it. Set the
|
||||
* descriptor to non-blocking.
|
||||
*/
|
||||
ret = pipe(mevent_pipefd);
|
||||
if (ret < 0) {
|
||||
perror("pipe");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add internal event handler for the pipe write fd
|
||||
*/
|
||||
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
|
||||
assert(pipev != NULL);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Build changelist if required.
|
||||
* XXX the changelist can be put into the blocking call
|
||||
* to eliminate the extra syscall. Currently better for
|
||||
* debug.
|
||||
*/
|
||||
numev = mevent_build(mfd, changelist);
|
||||
if (numev) {
|
||||
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent change");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Block awaiting events
|
||||
*/
|
||||
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent monitor");
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle reported events
|
||||
*/
|
||||
mevent_handle(eventlist, ret);
|
||||
}
|
||||
}
|
49
usr.sbin/bhyve/mevent.h
Normal file
49
usr.sbin/bhyve/mevent.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEVENT_H_
|
||||
#define _MEVENT_H_
|
||||
|
||||
enum ev_type {
|
||||
EVF_READ,
|
||||
EVF_WRITE
|
||||
};
|
||||
|
||||
struct mevent;
|
||||
|
||||
struct mevent *mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *),
|
||||
void *param);
|
||||
int mevent_enable(struct mevent *evp);
|
||||
int mevent_disable(struct mevent *evp);
|
||||
int mevent_delete(struct mevent *evp);
|
||||
int mevent_delete_close(struct mevent *evp);
|
||||
|
||||
void mevent_dispatch(void);
|
||||
|
||||
#endif /* _MEVENT_H_ */
|
180
usr.sbin/bhyve/mevent_test.c
Normal file
180
usr.sbin/bhyve/mevent_test.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test program for the micro event library. Set up a simple TCP echo
|
||||
* service.
|
||||
*
|
||||
* cc mevent_test.c mevent.c -lpthread
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define TEST_PORT 4321
|
||||
|
||||
static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
#define MEVENT_ECHO
|
||||
|
||||
#ifdef MEVENT_ECHO
|
||||
struct esync {
|
||||
pthread_mutex_t e_mt;
|
||||
pthread_cond_t e_cond;
|
||||
};
|
||||
|
||||
static void
|
||||
echoer_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct esync *sync = param;
|
||||
|
||||
pthread_mutex_lock(&sync->e_mt);
|
||||
pthread_cond_signal(&sync->e_cond);
|
||||
pthread_mutex_unlock(&sync->e_mt);
|
||||
}
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
struct esync sync;
|
||||
struct mevent *mev;
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
pthread_mutex_init(&sync.e_mt, NULL);
|
||||
pthread_cond_init(&sync.e_cond, NULL);
|
||||
|
||||
pthread_mutex_lock(&sync.e_mt);
|
||||
|
||||
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
|
||||
if (mev == NULL) {
|
||||
printf("Could not allocate echoer event\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
|
||||
len = read(fd, buf, sizeof(buf));
|
||||
if (len > 0) {
|
||||
write(fd, buf, len);
|
||||
write(0, buf, len);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mevent_delete_close(mev);
|
||||
|
||||
pthread_mutex_unlock(&sync.e_mt);
|
||||
pthread_mutex_destroy(&sync.e_mt);
|
||||
pthread_cond_destroy(&sync.e_cond);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
while ((len = read(fd, buf, sizeof(buf))) > 0) {
|
||||
write(1, buf, len);
|
||||
}
|
||||
}
|
||||
#endif /* MEVENT_ECHO */
|
||||
|
||||
static void
|
||||
acceptor_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
pthread_cond_signal(&accept_condvar);
|
||||
pthread_mutex_unlock(&accept_mutex);
|
||||
}
|
||||
|
||||
static void *
|
||||
acceptor(void *param)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
pthread_t tid;
|
||||
int news;
|
||||
int s;
|
||||
|
||||
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(TEST_PORT);
|
||||
|
||||
if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(s, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
|
||||
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
|
||||
while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
|
||||
news = accept(s, NULL, NULL);
|
||||
if (news < 0) {
|
||||
perror("accept error");
|
||||
} else {
|
||||
printf("incoming connection, spawning thread\n");
|
||||
pthread_create(&tid, NULL, echoer,
|
||||
(void *)(uintptr_t)news);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
pthread_t tid;
|
||||
|
||||
pthread_create(&tid, NULL, acceptor, NULL);
|
||||
|
||||
mevent_dispatch();
|
||||
}
|
976
usr.sbin/bhyve/pci_emul.c
Normal file
976
usr.sbin/bhyve/pci_emul.c
Normal file
@ -0,0 +1,976 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "pci_emul.h"
|
||||
|
||||
#define CONF1_ADDR_PORT 0x0cf8
|
||||
#define CONF1_DATA_PORT 0x0cfc
|
||||
|
||||
#define CFGWRITE(pi,off,val,b) \
|
||||
do { \
|
||||
if ((b) == 1) { \
|
||||
pci_set_cfgdata8((pi),(off),(val)); \
|
||||
} else if ((b) == 2) { \
|
||||
pci_set_cfgdata16((pi),(off),(val)); \
|
||||
} else { \
|
||||
pci_set_cfgdata32((pi),(off),(val)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define MAXSLOTS 32
|
||||
|
||||
static struct slotinfo {
|
||||
char *si_name;
|
||||
char *si_param;
|
||||
struct pci_devinst *si_devi;
|
||||
int si_titled;
|
||||
int si_pslot;
|
||||
char si_prefix;
|
||||
char si_suffix;
|
||||
} pci_slotinfo[MAXSLOTS];
|
||||
|
||||
/*
|
||||
* NetApp specific:
|
||||
* struct used to build an in-core OEM table to supply device names
|
||||
* to driver instances
|
||||
*/
|
||||
static struct mptable_pci_devnames {
|
||||
#define MPT_HDR_BASE 0
|
||||
#define MPT_HDR_NAME 2
|
||||
uint16_t md_hdrtype;
|
||||
uint16_t md_entries;
|
||||
uint16_t md_cksum;
|
||||
uint16_t md_pad;
|
||||
#define MPT_NTAP_SIG \
|
||||
((uint32_t)(('P' << 24) | ('A' << 16) | ('T' << 8) | 'N'))
|
||||
uint32_t md_sig;
|
||||
uint32_t md_rsvd;
|
||||
struct mptable_pci_slotinfo {
|
||||
uint16_t mds_type;
|
||||
uint16_t mds_phys_slot;
|
||||
uint8_t mds_bus;
|
||||
uint8_t mds_slot;
|
||||
uint8_t mds_func;
|
||||
uint8_t mds_pad;
|
||||
uint16_t mds_vid;
|
||||
uint16_t mds_did;
|
||||
uint8_t mds_suffix[4];
|
||||
uint8_t mds_prefix[4];
|
||||
uint32_t mds_rsvd[3];
|
||||
} md_slotinfo[MAXSLOTS];
|
||||
} pci_devnames;
|
||||
|
||||
SET_DECLARE(pci_devemu_set, struct pci_devemu);
|
||||
|
||||
static uint64_t pci_emul_iobase;
|
||||
static uint64_t pci_emul_membase32;
|
||||
static uint64_t pci_emul_membase64;
|
||||
|
||||
#define PCI_EMUL_IOBASE 0x2000
|
||||
#define PCI_EMUL_IOLIMIT 0x10000
|
||||
|
||||
#define PCI_EMUL_MEMBASE32 (lomem_sz)
|
||||
#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */
|
||||
|
||||
#define PCI_EMUL_MEMBASE64 0xD000000000UL
|
||||
#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL
|
||||
|
||||
static int pci_emul_devices;
|
||||
static int devname_elems;
|
||||
|
||||
/*
|
||||
* I/O access
|
||||
*/
|
||||
|
||||
/*
|
||||
* Slot options are in the form:
|
||||
*
|
||||
* <slot>,<emul>[,<config>]
|
||||
*
|
||||
* slot is 0..31
|
||||
* emul is a string describing the type of PCI device e.g. virtio-net
|
||||
* config is an optional string, depending on the device, that can be
|
||||
* used for configuration.
|
||||
* Examples are:
|
||||
* 1,virtio-net,tap0
|
||||
* 3,dummy
|
||||
*/
|
||||
static void
|
||||
pci_parse_slot_usage(char *aopt)
|
||||
{
|
||||
printf("Invalid PCI slot info field \"%s\"\n", aopt);
|
||||
free(aopt);
|
||||
}
|
||||
|
||||
void
|
||||
pci_parse_slot(char *opt)
|
||||
{
|
||||
char *slot, *emul, *config;
|
||||
char *str, *cpy;
|
||||
int snum;
|
||||
|
||||
str = cpy = strdup(opt);
|
||||
config = NULL;
|
||||
|
||||
slot = strsep(&str, ",");
|
||||
emul = strsep(&str, ",");
|
||||
if (str != NULL) {
|
||||
config = strsep(&str, ",");
|
||||
}
|
||||
|
||||
if (emul == NULL) {
|
||||
pci_parse_slot_usage(cpy);
|
||||
return;
|
||||
}
|
||||
|
||||
snum = 255;
|
||||
snum = atoi(slot);
|
||||
if (snum < 0 || snum >= MAXSLOTS) {
|
||||
pci_parse_slot_usage(cpy);
|
||||
} else {
|
||||
pci_slotinfo[snum].si_name = emul;
|
||||
pci_slotinfo[snum].si_param = config;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* PCI MPTable names are of the form:
|
||||
*
|
||||
* <slot>,[prefix]<digit><suffix>
|
||||
*
|
||||
* .. with <prefix> an alphabetic char, <digit> a 1 or 2-digit string,
|
||||
* and <suffix> a single char.
|
||||
*
|
||||
* Examples:
|
||||
* 1,e0c
|
||||
* 4,e0P
|
||||
* 6,43a
|
||||
* 7,0f
|
||||
* 10,1
|
||||
* 12,e0M
|
||||
* 2,12a
|
||||
*
|
||||
* Note that this is NetApp-specific, but is ignored on other o/s's.
|
||||
*/
|
||||
static void
|
||||
pci_parse_name_usage(char *aopt)
|
||||
{
|
||||
printf("Invalid PCI slot name field \"%s\"\n", aopt);
|
||||
}
|
||||
|
||||
void
|
||||
pci_parse_name(char *opt)
|
||||
{
|
||||
char csnum[4];
|
||||
char *namestr;
|
||||
char *slotend;
|
||||
char prefix, suffix;
|
||||
int i;
|
||||
int pslot;
|
||||
int snum;
|
||||
|
||||
pslot = -1;
|
||||
prefix = suffix = 0;
|
||||
slotend = strchr(opt, ',');
|
||||
|
||||
/*
|
||||
* A comma must be present, and can't be the first character
|
||||
* or no slot would be present. Also, the slot number can't be
|
||||
* more than 2 characters.
|
||||
*/
|
||||
if (slotend == NULL || slotend == opt || (slotend - opt > 2)) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < (slotend - opt); i++) {
|
||||
csnum[i] = opt[i];
|
||||
}
|
||||
csnum[i] = '\0';
|
||||
|
||||
snum = 255;
|
||||
snum = atoi(csnum);
|
||||
if (snum < 0 || snum >= MAXSLOTS) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
namestr = slotend + 1;
|
||||
|
||||
if (strlen(namestr) > 3) {
|
||||
pci_parse_name_usage(opt);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isalpha(*namestr)) {
|
||||
prefix = *namestr++;
|
||||
}
|
||||
|
||||
if (!isdigit(*namestr)) {
|
||||
pci_parse_name_usage(opt);
|
||||
} else {
|
||||
pslot = *namestr++ - '0';
|
||||
if (isnumber(*namestr)) {
|
||||
pslot = 10*pslot + *namestr++ - '0';
|
||||
|
||||
}
|
||||
if (isalpha(*namestr) && *(namestr + 1) == 0) {
|
||||
suffix = *namestr;
|
||||
pci_slotinfo[snum].si_titled = 1;
|
||||
pci_slotinfo[snum].si_pslot = pslot;
|
||||
pci_slotinfo[snum].si_prefix = prefix;
|
||||
pci_slotinfo[snum].si_suffix = suffix;
|
||||
|
||||
} else {
|
||||
pci_parse_name_usage(opt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_add_mptable_name(struct slotinfo *si)
|
||||
{
|
||||
struct mptable_pci_slotinfo *ms;
|
||||
|
||||
/*
|
||||
* If naming information has been supplied for this slot, populate
|
||||
* the next available mptable OEM entry
|
||||
*/
|
||||
if (si->si_titled) {
|
||||
ms = &pci_devnames.md_slotinfo[devname_elems];
|
||||
|
||||
ms->mds_type = MPT_HDR_NAME;
|
||||
ms->mds_phys_slot = si->si_pslot;
|
||||
ms->mds_bus = si->si_devi->pi_bus;
|
||||
ms->mds_slot = si->si_devi->pi_slot;
|
||||
ms->mds_func = si->si_devi->pi_func;
|
||||
ms->mds_vid = pci_get_cfgdata16(si->si_devi, PCIR_VENDOR);
|
||||
ms->mds_did = pci_get_cfgdata16(si->si_devi, PCIR_DEVICE);
|
||||
ms->mds_suffix[0] = si->si_suffix;
|
||||
ms->mds_prefix[0] = si->si_prefix;
|
||||
|
||||
devname_elems++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_finish_mptable_names(void)
|
||||
{
|
||||
int size;
|
||||
|
||||
if (devname_elems) {
|
||||
pci_devnames.md_hdrtype = MPT_HDR_BASE;
|
||||
pci_devnames.md_entries = devname_elems;
|
||||
pci_devnames.md_cksum = 0; /* XXX */
|
||||
pci_devnames.md_sig = MPT_NTAP_SIG;
|
||||
|
||||
size = (uintptr_t)&pci_devnames.md_slotinfo[devname_elems] -
|
||||
(uintptr_t)&pci_devnames;
|
||||
|
||||
fbsdrun_add_oemtbl(&pci_devnames, size);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
struct pci_devinst *pdi = arg;
|
||||
struct pci_devemu *pe = pdi->pi_d;
|
||||
int offset, i;
|
||||
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
if (pdi->pi_bar[i].type == PCIBAR_IO &&
|
||||
port >= pdi->pi_bar[i].addr &&
|
||||
port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
|
||||
offset = port - pdi->pi_bar[i].addr;
|
||||
if (in)
|
||||
*eax = (*pe->pe_ior)(pdi, i, offset, bytes);
|
||||
else
|
||||
(*pe->pe_iow)(pdi, i, offset, bytes, *eax);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
|
||||
uint64_t *addr)
|
||||
{
|
||||
uint64_t base;
|
||||
|
||||
assert((size & (size - 1)) == 0); /* must be a power of 2 */
|
||||
|
||||
base = roundup2(*baseptr, size);
|
||||
|
||||
if (base + size <= limit) {
|
||||
*addr = base;
|
||||
*baseptr = base + size;
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
|
||||
enum pcibar_type type, uint64_t size)
|
||||
{
|
||||
int i, error;
|
||||
uint64_t *baseptr, limit, addr, mask, lobits, bar;
|
||||
struct inout_port iop;
|
||||
|
||||
assert(idx >= 0 && idx <= PCI_BARMAX);
|
||||
|
||||
if ((size & (size - 1)) != 0)
|
||||
size = 1UL << flsl(size); /* round up to a power of 2 */
|
||||
|
||||
switch (type) {
|
||||
case PCIBAR_NONE:
|
||||
baseptr = NULL;
|
||||
addr = mask = lobits = 0;
|
||||
break;
|
||||
case PCIBAR_IO:
|
||||
baseptr = &pci_emul_iobase;
|
||||
limit = PCI_EMUL_IOLIMIT;
|
||||
mask = PCIM_BAR_IO_BASE;
|
||||
lobits = PCIM_BAR_IO_SPACE;
|
||||
break;
|
||||
case PCIBAR_MEM64:
|
||||
/*
|
||||
* XXX
|
||||
* Some drivers do not work well if the 64-bit BAR is allocated
|
||||
* above 4GB. Allow for this by allocating small requests under
|
||||
* 4GB unless then allocation size is larger than some arbitrary
|
||||
* number (32MB currently).
|
||||
*/
|
||||
if (size > 32 * 1024 * 1024) {
|
||||
/*
|
||||
* XXX special case for device requiring peer-peer DMA
|
||||
*/
|
||||
if (size == 0x100000000UL)
|
||||
baseptr = &hostbase;
|
||||
else
|
||||
baseptr = &pci_emul_membase64;
|
||||
limit = PCI_EMUL_MEMLIMIT64;
|
||||
mask = PCIM_BAR_MEM_BASE;
|
||||
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
|
||||
PCIM_BAR_MEM_PREFETCH;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case PCIBAR_MEM32:
|
||||
baseptr = &pci_emul_membase32;
|
||||
limit = PCI_EMUL_MEMLIMIT32;
|
||||
mask = PCIM_BAR_MEM_BASE;
|
||||
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
|
||||
break;
|
||||
default:
|
||||
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (baseptr != NULL) {
|
||||
error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
|
||||
pdi->pi_bar[idx].type = type;
|
||||
pdi->pi_bar[idx].addr = addr;
|
||||
pdi->pi_bar[idx].size = size;
|
||||
|
||||
/* Initialize the BAR register in config space */
|
||||
bar = (addr & mask) | lobits;
|
||||
pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
|
||||
|
||||
if (type == PCIBAR_MEM64) {
|
||||
assert(idx + 1 <= PCI_BARMAX);
|
||||
pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
|
||||
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
|
||||
}
|
||||
|
||||
/* add a handler to intercept accesses to the I/O bar */
|
||||
if (type == PCIBAR_IO) {
|
||||
iop.name = pdi->pi_name;
|
||||
iop.flags = IOPORT_F_INOUT;
|
||||
iop.handler = pci_emul_handler;
|
||||
iop.arg = pdi;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
iop.port = addr + i;
|
||||
register_inout(&iop);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define CAP_START_OFFSET 0x40
|
||||
static int
|
||||
pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
|
||||
{
|
||||
int i, capoff, capid, reallen;
|
||||
uint16_t sts;
|
||||
|
||||
static u_char endofcap[4] = {
|
||||
PCIY_RESERVED, 0, 0, 0
|
||||
};
|
||||
|
||||
assert(caplen > 0 && capdata[0] != PCIY_RESERVED);
|
||||
|
||||
reallen = roundup2(caplen, 4); /* dword aligned */
|
||||
|
||||
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
|
||||
capoff = CAP_START_OFFSET;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
|
||||
pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
|
||||
} else {
|
||||
capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
|
||||
while (1) {
|
||||
assert((capoff & 0x3) == 0);
|
||||
capid = pci_get_cfgdata8(pi, capoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
capoff = pci_get_cfgdata8(pi, capoff + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we have enough space */
|
||||
if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1)
|
||||
return (-1);
|
||||
|
||||
/* Copy the capability */
|
||||
for (i = 0; i < caplen; i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
/* Set the next capability pointer */
|
||||
pci_set_cfgdata8(pi, capoff + 1, capoff + reallen);
|
||||
|
||||
/* Copy of the reserved capability which serves as the end marker */
|
||||
for (i = 0; i < sizeof(endofcap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct pci_devemu *
|
||||
pci_emul_finddev(char *name)
|
||||
{
|
||||
struct pci_devemu **pdpp, *pdp;
|
||||
|
||||
SET_FOREACH(pdpp, pci_devemu_set) {
|
||||
pdp = *pdpp;
|
||||
if (!strcmp(pdp->pe_emu, name)) {
|
||||
return (pdp);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, char *params)
|
||||
{
|
||||
struct pci_devinst *pdi;
|
||||
pdi = malloc(sizeof(struct pci_devinst));
|
||||
bzero(pdi, sizeof(*pdi));
|
||||
|
||||
pdi->pi_vmctx = ctx;
|
||||
pdi->pi_bus = 0;
|
||||
pdi->pi_slot = slot;
|
||||
pdi->pi_func = 0;
|
||||
pdi->pi_d = pde;
|
||||
snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot);
|
||||
|
||||
/* Disable legacy interrupts */
|
||||
pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
|
||||
pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
|
||||
|
||||
pci_set_cfgdata8(pdi, PCIR_COMMAND,
|
||||
PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
|
||||
|
||||
if ((*pde->pe_init)(ctx, pdi, params) != 0) {
|
||||
free(pdi);
|
||||
} else {
|
||||
pci_emul_devices++;
|
||||
pci_slotinfo[slot].si_devi = pdi;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
|
||||
{
|
||||
int mmc;
|
||||
|
||||
CTASSERT(sizeof(struct msicap) == 14);
|
||||
|
||||
/* Number of msi messages must be a power of 2 between 1 and 32 */
|
||||
assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
|
||||
mmc = ffs(msgnum) - 1;
|
||||
|
||||
bzero(msicap, sizeof(struct msicap));
|
||||
msicap->capid = PCIY_MSI;
|
||||
msicap->nextptr = nextptr;
|
||||
msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
|
||||
}
|
||||
|
||||
int
|
||||
pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
|
||||
{
|
||||
struct msicap msicap;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, 0);
|
||||
|
||||
return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
|
||||
}
|
||||
|
||||
void
|
||||
msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val)
|
||||
{
|
||||
uint16_t msgctrl, rwmask, msgdata, mme;
|
||||
uint32_t addrlo;
|
||||
|
||||
/*
|
||||
* If guest is writing to the message control register make sure
|
||||
* we do not overwrite read-only fields.
|
||||
*/
|
||||
if ((offset - capoff) == 2 && bytes == 2) {
|
||||
rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
|
||||
msgctrl = pci_get_cfgdata16(pi, offset);
|
||||
msgctrl &= ~rwmask;
|
||||
msgctrl |= val & rwmask;
|
||||
val = msgctrl;
|
||||
|
||||
addrlo = pci_get_cfgdata32(pi, capoff + 4);
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
msgdata = pci_get_cfgdata16(pi, capoff + 12);
|
||||
else
|
||||
msgdata = pci_get_cfgdata16(pi, capoff + 8);
|
||||
|
||||
/*
|
||||
* XXX check delivery mode, destination mode etc
|
||||
*/
|
||||
mme = msgctrl & PCIM_MSICTRL_MME_MASK;
|
||||
pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
|
||||
if (pi->pi_msi.enabled) {
|
||||
pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
|
||||
pi->pi_msi.vector = msgdata & 0xff;
|
||||
pi->pi_msi.msgnum = 1 << (mme >> 4);
|
||||
} else {
|
||||
pi->pi_msi.cpu = 0;
|
||||
pi->pi_msi.vector = 0;
|
||||
pi->pi_msi.msgnum = 0;
|
||||
}
|
||||
}
|
||||
|
||||
CFGWRITE(pi, offset, val, bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function assumes that 'coff' is in the capabilities region of the
|
||||
* config space.
|
||||
*/
|
||||
static void
|
||||
pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
|
||||
{
|
||||
int capid;
|
||||
uint8_t capoff, nextoff;
|
||||
|
||||
/* Do not allow un-aligned writes */
|
||||
if ((offset & (bytes - 1)) != 0)
|
||||
return;
|
||||
|
||||
/* Find the capability that we want to update */
|
||||
capoff = CAP_START_OFFSET;
|
||||
while (1) {
|
||||
capid = pci_get_cfgdata8(pi, capoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
|
||||
nextoff = pci_get_cfgdata8(pi, capoff + 1);
|
||||
if (offset >= capoff && offset < nextoff)
|
||||
break;
|
||||
|
||||
capoff = nextoff;
|
||||
}
|
||||
assert(offset >= capoff);
|
||||
|
||||
/*
|
||||
* Capability ID and Next Capability Pointer are readonly
|
||||
*/
|
||||
if (offset == capoff || offset == capoff + 1)
|
||||
return;
|
||||
|
||||
switch (capid) {
|
||||
case PCIY_MSI:
|
||||
msicap_cfgwrite(pi, capoff, offset, bytes, val);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_emul_iscap(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
int found;
|
||||
uint16_t sts;
|
||||
uint8_t capid, lastoff;
|
||||
|
||||
found = 0;
|
||||
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
|
||||
lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
|
||||
while (1) {
|
||||
assert((lastoff & 0x3) == 0);
|
||||
capid = pci_get_cfgdata8(pi, lastoff);
|
||||
if (capid == PCIY_RESERVED)
|
||||
break;
|
||||
lastoff = pci_get_cfgdata8(pi, lastoff + 1);
|
||||
}
|
||||
if (offset >= CAP_START_OFFSET && offset <= lastoff)
|
||||
found = 1;
|
||||
}
|
||||
return (found);
|
||||
}
|
||||
|
||||
void
|
||||
init_pci(struct vmctx *ctx)
|
||||
{
|
||||
struct pci_devemu *pde;
|
||||
struct slotinfo *si;
|
||||
int i;
|
||||
|
||||
pci_emul_iobase = PCI_EMUL_IOBASE;
|
||||
pci_emul_membase32 = PCI_EMUL_MEMBASE32;
|
||||
pci_emul_membase64 = PCI_EMUL_MEMBASE64;
|
||||
|
||||
si = pci_slotinfo;
|
||||
|
||||
for (i = 0; i < MAXSLOTS; i++, si++) {
|
||||
if (si->si_name != NULL) {
|
||||
pde = pci_emul_finddev(si->si_name);
|
||||
if (pde != NULL) {
|
||||
pci_emul_init(ctx, pde, i, si->si_param);
|
||||
pci_add_mptable_name(si);
|
||||
}
|
||||
}
|
||||
}
|
||||
pci_finish_mptable_names();
|
||||
}
|
||||
|
||||
int
|
||||
pci_msi_enabled(struct pci_devinst *pi)
|
||||
{
|
||||
return (pi->pi_msi.enabled);
|
||||
}
|
||||
|
||||
int
|
||||
pci_msi_msgnum(struct pci_devinst *pi)
|
||||
{
|
||||
if (pi->pi_msi.enabled)
|
||||
return (pi->pi_msi.msgnum);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
pci_generate_msi(struct pci_devinst *pi, int msg)
|
||||
{
|
||||
|
||||
if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
|
||||
vm_lapic_irq(pi->pi_vmctx,
|
||||
pi->pi_msi.cpu,
|
||||
pi->pi_msi.vector + msg);
|
||||
}
|
||||
}
|
||||
|
||||
static int cfgbus, cfgslot, cfgfunc, cfgoff;
|
||||
|
||||
static int
|
||||
pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
uint32_t x;
|
||||
|
||||
assert(!in);
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
x = *eax;
|
||||
cfgoff = x & PCI_REGMAX;
|
||||
cfgfunc = (x >> 8) & PCI_FUNCMAX;
|
||||
cfgslot = (x >> 11) & PCI_SLOTMAX;
|
||||
cfgbus = (x >> 16) & PCI_BUSMAX;
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr);
|
||||
|
||||
static int
|
||||
pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct pci_devemu *pe;
|
||||
int coff, idx;
|
||||
uint64_t mask, bar;
|
||||
|
||||
assert(bytes == 1 || bytes == 2 || bytes == 4);
|
||||
|
||||
pi = pci_slotinfo[cfgslot].si_devi;
|
||||
coff = cfgoff + (port - CONF1_DATA_PORT);
|
||||
|
||||
#if 0
|
||||
printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r",
|
||||
in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc);
|
||||
#endif
|
||||
|
||||
if (pi == NULL || cfgfunc != 0) {
|
||||
if (in)
|
||||
*eax = 0xffffffff;
|
||||
return (0);
|
||||
}
|
||||
|
||||
pe = pi->pi_d;
|
||||
|
||||
/*
|
||||
* Config read
|
||||
*/
|
||||
if (in) {
|
||||
/* Let the device emulation override the default handler */
|
||||
if (pe->pe_cfgread != NULL &&
|
||||
(*pe->pe_cfgread)(ctx, vcpu, pi, coff, bytes, eax) == 0)
|
||||
return (0);
|
||||
|
||||
if (bytes == 1)
|
||||
*eax = pci_get_cfgdata8(pi, coff);
|
||||
else if (bytes == 2)
|
||||
*eax = pci_get_cfgdata16(pi, coff);
|
||||
else
|
||||
*eax = pci_get_cfgdata32(pi, coff);
|
||||
} else {
|
||||
/* Let the device emulation override the default handler */
|
||||
if (pe->pe_cfgwrite != NULL &&
|
||||
(*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Special handling for write to BAR registers
|
||||
*/
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
|
||||
/*
|
||||
* Ignore writes to BAR registers that are not
|
||||
* 4-byte aligned.
|
||||
*/
|
||||
if (bytes != 4 || (coff & 0x3) != 0)
|
||||
return (0);
|
||||
idx = (coff - PCIR_BAR(0)) / 4;
|
||||
switch (pi->pi_bar[idx].type) {
|
||||
case PCIBAR_NONE:
|
||||
bar = 0;
|
||||
break;
|
||||
case PCIBAR_IO:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_IO_BASE;
|
||||
bar = (*eax & mask) | PCIM_BAR_IO_SPACE;
|
||||
break;
|
||||
case PCIBAR_MEM32:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = *eax & mask;
|
||||
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
|
||||
break;
|
||||
case PCIBAR_MEM64:
|
||||
mask = ~(pi->pi_bar[idx].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = *eax & mask;
|
||||
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
|
||||
PCIM_BAR_MEM_PREFETCH;
|
||||
break;
|
||||
case PCIBAR_MEMHI64:
|
||||
mask = ~(pi->pi_bar[idx - 1].size - 1);
|
||||
mask &= PCIM_BAR_MEM_BASE;
|
||||
bar = ((uint64_t)*eax << 32) & mask;
|
||||
bar = bar >> 32;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
pci_set_cfgdata32(pi, coff, bar);
|
||||
} else if (pci_emul_iscap(pi, coff)) {
|
||||
pci_emul_capwrite(pi, coff, bytes, *eax);
|
||||
} else {
|
||||
CFGWRITE(pi, coff, *eax, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
|
||||
|
||||
/*
|
||||
* I/O ports to configure PCI IRQ routing. We ignore all writes to it.
|
||||
*/
|
||||
static int
|
||||
pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 0);
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler);
|
||||
INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler);
|
||||
|
||||
#define PCI_EMUL_TEST
|
||||
#ifdef PCI_EMUL_TEST
|
||||
/*
|
||||
* Define a dummy test device
|
||||
*/
|
||||
#define DREGSZ 20
|
||||
struct pci_emul_dsoftc {
|
||||
uint8_t regs[DREGSZ];
|
||||
};
|
||||
|
||||
#define PCI_EMUL_MSGS 4
|
||||
|
||||
static int
|
||||
pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int error;
|
||||
struct pci_emul_dsoftc *sc;
|
||||
|
||||
sc = malloc(sizeof(struct pci_emul_dsoftc));
|
||||
memset(sc, 0, sizeof(struct pci_emul_dsoftc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
|
||||
|
||||
error = pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, DREGSZ);
|
||||
assert(error == 0);
|
||||
|
||||
error = pci_emul_add_msicap(pi, PCI_EMUL_MSGS);
|
||||
assert(error == 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_emul_diow(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
int i;
|
||||
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > DREGSZ) {
|
||||
printf("diow: too large, offset %d size %d\n", offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
sc->regs[offset] = value & 0xff;
|
||||
} else if (size == 2) {
|
||||
*(uint16_t *)&sc->regs[offset] = value & 0xffff;
|
||||
} else {
|
||||
*(uint32_t *)&sc->regs[offset] = value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special magic value to generate an interrupt
|
||||
*/
|
||||
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
|
||||
pci_generate_msi(pi, value % pci_msi_msgnum(pi));
|
||||
|
||||
if (value == 0xabcdef) {
|
||||
for (i = 0; i < pci_msi_msgnum(pi); i++)
|
||||
pci_generate_msi(pi, i);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > DREGSZ) {
|
||||
printf("dior: too large, offset %d size %d\n", offset, size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
value = sc->regs[offset];
|
||||
} else if (size == 2) {
|
||||
value = *(uint16_t *) &sc->regs[offset];
|
||||
} else {
|
||||
value = *(uint32_t *) &sc->regs[offset];
|
||||
}
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_dummy = {
|
||||
.pe_emu = "dummy",
|
||||
.pe_init = pci_emul_dinit,
|
||||
.pe_iow = pci_emul_diow,
|
||||
.pe_ior = pci_emul_dior
|
||||
};
|
||||
PCI_EMUL_SET(pci_dummy);
|
||||
|
||||
#endif /* PCI_EMUL_TEST */
|
171
usr.sbin/bhyve/pci_emul.h
Normal file
171
usr.sbin/bhyve/pci_emul.h
Normal file
@ -0,0 +1,171 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PCI_EMUL_H_
|
||||
#define _PCI_EMUL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
|
||||
#define PCIY_RESERVED 0x00
|
||||
|
||||
struct vmctx;
|
||||
struct pci_devinst;
|
||||
|
||||
struct pci_devemu {
|
||||
char *pe_emu; /* Name of device emulation */
|
||||
|
||||
/* instance creation */
|
||||
int (*pe_init)(struct vmctx *, struct pci_devinst *, char *opts);
|
||||
|
||||
/* config space read/write callbacks */
|
||||
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t val);
|
||||
int (*pe_cfgread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t *retval);
|
||||
|
||||
/* I/O space read/write callbacks */
|
||||
void (*pe_iow)(struct pci_devinst *pi, int baridx,
|
||||
int offset, int size, uint32_t value);
|
||||
uint32_t (*pe_ior)(struct pci_devinst *pi, int baridx,
|
||||
int offset, int size);
|
||||
};
|
||||
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
|
||||
|
||||
enum pcibar_type {
|
||||
PCIBAR_NONE,
|
||||
PCIBAR_IO,
|
||||
PCIBAR_MEM32,
|
||||
PCIBAR_MEM64,
|
||||
PCIBAR_MEMHI64
|
||||
};
|
||||
|
||||
struct pcibar {
|
||||
enum pcibar_type type; /* io or memory */
|
||||
uint64_t size;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
#define PI_NAMESZ 40
|
||||
|
||||
struct pci_devinst {
|
||||
struct pci_devemu *pi_d;
|
||||
struct vmctx *pi_vmctx;
|
||||
uint8_t pi_bus, pi_slot, pi_func;
|
||||
char pi_name[PI_NAMESZ];
|
||||
uint16_t pi_iobase;
|
||||
int pi_bar_getsize;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
int cpu;
|
||||
int vector;
|
||||
int msgnum;
|
||||
} pi_msi;
|
||||
|
||||
void *pi_arg; /* devemu-private data */
|
||||
|
||||
u_char pi_cfgdata[PCI_REGMAX + 1];
|
||||
struct pcibar pi_bar[PCI_BARMAX + 1];
|
||||
};
|
||||
|
||||
struct msicap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t addrlo;
|
||||
uint32_t addrhi;
|
||||
uint16_t msgdata;
|
||||
} __packed;
|
||||
|
||||
void init_pci(struct vmctx *ctx);
|
||||
void pci_parse_slot(char *opt);
|
||||
void pci_parse_name(char *opt);
|
||||
void pci_callback(void);
|
||||
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
|
||||
enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
|
||||
void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
|
||||
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
|
||||
int pci_msi_enabled(struct pci_devinst *pi);
|
||||
int pci_msi_msgnum(struct pci_devinst *pi);
|
||||
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
*(uint8_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
*(uint16_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
*(uint32_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline uint8_t
|
||||
pci_get_cfgdata8(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
return (*(uint8_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint16_t
|
||||
pci_get_cfgdata16(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
return (*(uint16_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint32_t
|
||||
pci_get_cfgdata32(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
return (*(uint32_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
#endif /* _PCI_EMUL_H_ */
|
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
52
usr.sbin/bhyve/pci_hostbridge.c
Normal file
@ -0,0 +1,52 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "pci_emul.h"
|
||||
|
||||
static int
|
||||
pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
|
||||
/* config space */
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_hostbridge = {
|
||||
.pe_emu = "hostbridge",
|
||||
.pe_init = pci_hostbridge_init,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_hostbridge);
|
508
usr.sbin/bhyve/pci_passthru.c
Normal file
508
usr.sbin/bhyve/pci_passthru.c
Normal file
@ -0,0 +1,508 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <dev/io/iodev.h>
|
||||
#include <machine/iodev.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
#include "pci_emul.h"
|
||||
|
||||
#ifndef _PATH_DEVPCI
|
||||
#define _PATH_DEVPCI "/dev/pci"
|
||||
#endif
|
||||
|
||||
#ifndef _PATH_DEVIO
|
||||
#define _PATH_DEVIO "/dev/io"
|
||||
#endif
|
||||
|
||||
#define LEGACY_SUPPORT 1
|
||||
|
||||
static int pcifd = -1;
|
||||
static int iofd = -1;
|
||||
|
||||
struct passthru_softc {
|
||||
struct pci_devinst *psc_pi;
|
||||
struct pcibar psc_bar[PCI_BARMAX + 1];
|
||||
struct {
|
||||
int capoff;
|
||||
int msgctrl;
|
||||
int emulated;
|
||||
} psc_msi;
|
||||
struct pcisel psc_sel;
|
||||
};
|
||||
|
||||
static int
|
||||
msi_caplen(int msgctrl)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = 10; /* minimum length of msi capability */
|
||||
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
len += 4;
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Ignore the 'mask' and 'pending' bits in the MSI capability.
|
||||
* We'll let the guest manipulate them directly.
|
||||
*/
|
||||
if (msgctrl & PCIM_MSICTRL_VECTOR)
|
||||
len += 10;
|
||||
#endif
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
read_config(const struct pcisel *sel, long reg, int width)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
|
||||
if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
|
||||
return (0); /* XXX */
|
||||
else
|
||||
return (pi.pi_data);
|
||||
}
|
||||
|
||||
static void
|
||||
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
pi.pi_data = data;
|
||||
|
||||
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
static int
|
||||
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
|
||||
{
|
||||
int capoff, i;
|
||||
struct msicap msicap;
|
||||
u_char *capdata;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, nextptr);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* Copy the msi capability structure in the last 16 bytes of the
|
||||
* config space. This is wrong because it could shadow something
|
||||
* useful to the device.
|
||||
*/
|
||||
capoff = 256 - roundup(sizeof(msicap), 4);
|
||||
capdata = (u_char *)&msicap;
|
||||
for (i = 0; i < sizeof(msicap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
return (capoff);
|
||||
}
|
||||
#endif /* LEGACY_SUPPORT */
|
||||
|
||||
static int
|
||||
cfginitmsi(struct passthru_softc *sc)
|
||||
{
|
||||
int ptr, cap, sts, caplen;
|
||||
uint32_t u32;
|
||||
struct pcisel sel;
|
||||
struct pci_devinst *pi;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
sel = sc->psc_sel;
|
||||
|
||||
/*
|
||||
* Parse the capabilities and cache the location of the MSI
|
||||
* capability.
|
||||
*/
|
||||
sts = read_config(&sel, PCIR_STATUS, 2);
|
||||
if (sts & PCIM_STATUS_CAPPRESENT) {
|
||||
ptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
while (ptr != 0 && ptr != 0xff) {
|
||||
cap = read_config(&sel, ptr + PCICAP_ID, 1);
|
||||
if (cap == PCIY_MSI) {
|
||||
/*
|
||||
* Copy the MSI capability into the config
|
||||
* space of the emulated pci device
|
||||
*/
|
||||
sc->psc_msi.capoff = ptr;
|
||||
sc->psc_msi.msgctrl = read_config(&sel,
|
||||
ptr + 2, 2);
|
||||
sc->psc_msi.emulated = 0;
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, ptr, 4);
|
||||
pci_set_cfgdata32(pi, ptr, u32);
|
||||
caplen -= 4;
|
||||
ptr += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If the passthrough device does not support MSI then craft a
|
||||
* MSI capability for it. We link the new MSI capability at the
|
||||
* head of the list of capabilities.
|
||||
*/
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
|
||||
int origptr, msiptr;
|
||||
origptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
msiptr = passthru_add_msicap(pi, 1, origptr);
|
||||
sc->psc_msi.capoff = msiptr;
|
||||
sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
|
||||
sc->psc_msi.emulated = 1;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (sc->psc_msi.capoff == 0) /* MSI or bust */
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
|
||||
{
|
||||
int i, error;
|
||||
struct pci_devinst *pi;
|
||||
struct pci_bar_io bar;
|
||||
enum pcibar_type bartype;
|
||||
uint64_t base;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
|
||||
/*
|
||||
* Initialize BAR registers
|
||||
*/
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
bzero(&bar, sizeof(bar));
|
||||
bar.pbi_sel = sc->psc_sel;
|
||||
bar.pbi_reg = PCIR_BAR(i);
|
||||
|
||||
if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
|
||||
continue;
|
||||
|
||||
if (PCI_BAR_IO(bar.pbi_base)) {
|
||||
bartype = PCIBAR_IO;
|
||||
base = bar.pbi_base & PCIM_BAR_IO_BASE;
|
||||
} else {
|
||||
switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
|
||||
case PCIM_BAR_MEM_64:
|
||||
bartype = PCIBAR_MEM64;
|
||||
break;
|
||||
default:
|
||||
bartype = PCIBAR_MEM32;
|
||||
break;
|
||||
}
|
||||
base = bar.pbi_base & PCIM_BAR_MEM_BASE;
|
||||
}
|
||||
|
||||
/* Cache information about the "real" BAR */
|
||||
sc->psc_bar[i].type = bartype;
|
||||
sc->psc_bar[i].size = bar.pbi_length;
|
||||
sc->psc_bar[i].addr = base;
|
||||
|
||||
/* Allocate the BAR in the guest I/O or MMIO space */
|
||||
error = pci_emul_alloc_bar(pi, i, base, bartype,
|
||||
bar.pbi_length);
|
||||
if (error)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* Map the physical MMIO space in the guest MMIO space
|
||||
*/
|
||||
if (bartype != PCIBAR_IO) {
|
||||
error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* 64-bit BAR takes up two slots so skip the next one.
|
||||
*/
|
||||
if (bartype == PCIBAR_MEM64) {
|
||||
i++;
|
||||
assert(i <= PCI_BARMAX);
|
||||
sc->psc_bar[i].type = PCIBAR_MEMHI64;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
error = 1;
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&sc->psc_sel, sizeof(struct pcisel));
|
||||
sc->psc_sel.pc_bus = bus;
|
||||
sc->psc_sel.pc_dev = slot;
|
||||
sc->psc_sel.pc_func = func;
|
||||
|
||||
if (cfginitbar(ctx, sc) != 0)
|
||||
goto done;
|
||||
|
||||
if (cfginitmsi(sc) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int bus, slot, func, error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = NULL;
|
||||
error = 1;
|
||||
|
||||
if (pcifd < 0) {
|
||||
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
|
||||
if (pcifd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (iofd < 0) {
|
||||
iofd = open(_PATH_DEVIO, O_RDWR, 0);
|
||||
if (iofd < 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (opts == NULL || sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
|
||||
goto done;
|
||||
|
||||
if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
|
||||
goto done;
|
||||
|
||||
sc = malloc(sizeof(struct passthru_softc));
|
||||
memset(sc, 0, sizeof(struct passthru_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->psc_pi = pi;
|
||||
|
||||
/* initialize config space */
|
||||
if (cfginit(ctx, pi, bus, slot, func) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
if (error) {
|
||||
free(sc);
|
||||
vm_unassign_pptdev(ctx, bus, slot, func);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
bar_access(int coff)
|
||||
{
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msicap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
int caplen;
|
||||
|
||||
if (sc->psc_msi.capoff == 0)
|
||||
return (0);
|
||||
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
|
||||
if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
|
||||
int bytes, uint32_t *rv)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs and MSI capability is emulated.
|
||||
*/
|
||||
if (bar_access(coff) || msicap_access(sc, coff))
|
||||
return (-1);
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* Emulate PCIR_CAP_PTR if this device does not support MSI capability
|
||||
* natively.
|
||||
*/
|
||||
if (sc->psc_msi.emulated) {
|
||||
if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
|
||||
return (-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Everything else just read from the device's config space */
|
||||
*rv = read_config(&sc->psc_sel, coff, bytes);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
|
||||
int bytes, uint32_t val)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs are emulated
|
||||
*/
|
||||
if (bar_access(coff))
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* MSI capability is emulated
|
||||
*/
|
||||
if (msicap_access(sc, coff)) {
|
||||
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
|
||||
|
||||
error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
|
||||
pi->pi_msi.vector, pi->pi_msi.msgnum);
|
||||
if (error != 0) {
|
||||
printf("vm_setup_msi returned error %d\r\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If this device does not support MSI natively then we cannot let
|
||||
* the guest disable legacy interrupts from the device. It is the
|
||||
* legacy interrupt that is triggering the virtual MSI to the guest.
|
||||
*/
|
||||
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
|
||||
if (coff == PCIR_COMMAND && bytes == 2)
|
||||
val &= ~PCIM_CMD_INTxDIS;
|
||||
}
|
||||
#endif
|
||||
|
||||
write_config(&sc->psc_sel, coff, bytes, val);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
passthru_iow(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_WRITE;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = value;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
passthru_ior(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_READ;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = 0;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
|
||||
return (pio.val);
|
||||
}
|
||||
|
||||
struct pci_devemu passthru = {
|
||||
.pe_emu = "passthru",
|
||||
.pe_init = passthru_init,
|
||||
.pe_cfgwrite = passthru_cfgwrite,
|
||||
.pe_cfgread = passthru_cfgread,
|
||||
.pe_iow = passthru_iow,
|
||||
.pe_ior = passthru_ior,
|
||||
};
|
||||
PCI_EMUL_SET(passthru);
|
502
usr.sbin/bhyve/pci_virtio_block.c
Normal file
502
usr.sbin/bhyve/pci_virtio_block.c
Normal file
@ -0,0 +1,502 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTBLK_RINGSZ 64
|
||||
|
||||
#define VTBLK_CFGSZ 28
|
||||
|
||||
#define VTBLK_R_CFG VTCFG_R_CFG0
|
||||
#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
|
||||
#define VTBLK_R_MAX VTBLK_R_CFG_END
|
||||
|
||||
#define VTBLK_REGSZ VTBLK_R_MAX+1
|
||||
|
||||
#define VTBLK_MAXSEGS 32
|
||||
|
||||
#define VTBLK_S_OK 0
|
||||
#define VTBLK_S_IOERR 1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTBLK_S_HOSTCAPS \
|
||||
( 0x00000004 | /* host maximum request segments */ \
|
||||
0x10000000 ) /* supports indirect descriptors */
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Config space
|
||||
*/
|
||||
struct vtblk_config {
|
||||
uint64_t vbc_capacity;
|
||||
uint32_t vbc_size_max;
|
||||
uint32_t vbc_seg_max;
|
||||
uint16_t vbc_geom_c;
|
||||
uint8_t vbc_geom_h;
|
||||
uint8_t vbc_geom_s;
|
||||
uint32_t vbc_blk_size;
|
||||
uint32_t vbc_sectors_max;
|
||||
} __packed;
|
||||
CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
|
||||
|
||||
/*
|
||||
* Fixed-size block header
|
||||
*/
|
||||
struct virtio_blk_hdr {
|
||||
#define VBH_OP_READ 0
|
||||
#define VBH_OP_WRITE 1
|
||||
uint32_t vbh_type;
|
||||
uint32_t vbh_ioprio;
|
||||
uint64_t vbh_sector;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtblk_debug;
|
||||
#define DPRINTF(params) if (pci_vtblk_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtblk_softc {
|
||||
struct pci_devinst *vbsc_pi;
|
||||
int vbsc_fd;
|
||||
int vbsc_status;
|
||||
int vbsc_isr;
|
||||
int vbsc_lastq;
|
||||
uint32_t vbsc_features;
|
||||
uint64_t vbsc_pfn;
|
||||
struct vring_hqueue vbsc_q;
|
||||
struct vtblk_config vbsc_cfg;
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
|
||||
{
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtblk: device reset requested !\n"));
|
||||
}
|
||||
|
||||
sc->vbsc_status = value;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTBLK_MAXSEGS];
|
||||
struct virtio_blk_hdr *vbh;
|
||||
struct virtio_desc *vd, *vid;
|
||||
struct virtio_used *vu;
|
||||
uint8_t *status;
|
||||
int i;
|
||||
int err;
|
||||
int iolen;
|
||||
int nsegs;
|
||||
int uidx, aidx, didx;
|
||||
int writeop;
|
||||
off_t offset;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Verify that the descriptor is indirect, and obtain
|
||||
* the pointer to the indirect descriptor.
|
||||
* There has to be space for at least 3 descriptors
|
||||
* in the indirect descriptor array: the block header,
|
||||
* 1 or more data descriptors, and a status byte.
|
||||
*/
|
||||
assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
|
||||
|
||||
nsegs = vd->vd_len / sizeof(struct virtio_desc);
|
||||
assert(nsegs >= 3);
|
||||
assert(nsegs < VTBLK_MAXSEGS + 2);
|
||||
|
||||
vid = paddr_guest2host(vd->vd_addr);
|
||||
assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* The first descriptor will be the read-only fixed header
|
||||
*/
|
||||
vbh = paddr_guest2host(vid[0].vd_addr);
|
||||
assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
|
||||
assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
|
||||
|
||||
writeop = (vbh->vbh_type == VBH_OP_WRITE);
|
||||
|
||||
offset = vbh->vbh_sector * DEV_BSIZE;
|
||||
|
||||
/*
|
||||
* Build up the iovec based on the guest's data descriptors
|
||||
*/
|
||||
for (i = 1, iolen = 0; i < nsegs - 1; i++) {
|
||||
iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr);
|
||||
iov[i-1].iov_len = vid[i].vd_len;
|
||||
iolen += vid[i].vd_len;
|
||||
|
||||
assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
|
||||
assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
|
||||
|
||||
/*
|
||||
* - write op implies read-only descriptor,
|
||||
* - read op implies write-only descriptor,
|
||||
* therefore test the inverse of the descriptor bit
|
||||
* to the op.
|
||||
*/
|
||||
assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
|
||||
writeop);
|
||||
}
|
||||
|
||||
/* Lastly, get the address of the status byte */
|
||||
status = paddr_guest2host(vid[nsegs - 1].vd_addr);
|
||||
assert(vid[nsegs - 1].vd_len == 1);
|
||||
assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
|
||||
assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
|
||||
|
||||
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
|
||||
writeop ? "write" : "read", iolen, nsegs - 2, offset));
|
||||
|
||||
if (writeop){
|
||||
err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
} else {
|
||||
err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
|
||||
}
|
||||
|
||||
*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
|
||||
|
||||
/*
|
||||
* Return the single indirect descriptor back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = 1;
|
||||
hq->hq_cur_aidx++;
|
||||
*hq->hq_used_idx += 1;
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vbsc_q;
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtblk_proc(sc, hq);
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 &&
|
||||
sc->vbsc_isr == 0) {
|
||||
sc->vbsc_isr = 1;
|
||||
pci_generate_msi(sc->vbsc_pi, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
|
||||
sc->vbsc_pfn = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vbsc_q;
|
||||
hq->hq_size = VTBLK_RINGSZ;
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct stat sbuf;
|
||||
struct pci_vtblk_softc *sc;
|
||||
int fd;
|
||||
|
||||
if (opts == NULL) {
|
||||
printf("virtio-block: backing device required\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* The supplied backing file has to exist
|
||||
*/
|
||||
fd = open(opts, O_RDWR);
|
||||
if (fd < 0) {
|
||||
perror("Could not open backing file");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (fstat(fd, &sbuf) < 0) {
|
||||
perror("Could not stat backing file");
|
||||
close(fd);
|
||||
return (1);
|
||||
}
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtblk_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtblk_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vbsc_pi = pi;
|
||||
sc->vbsc_fd = fd;
|
||||
|
||||
/* setup virtio block config space */
|
||||
sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE;
|
||||
sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
|
||||
sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE;
|
||||
sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
|
||||
sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
|
||||
sc->vbsc_cfg.vbc_geom_h = 0;
|
||||
sc->vbsc_cfg.vbc_geom_s = 0;
|
||||
sc->vbsc_cfg.vbc_sectors_max = 0;
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
|
||||
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_write: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtblk_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
sc->vbsc_lastq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value == 0);
|
||||
pci_vtblk_qnotify(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtblk_update_status(sc, value);
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > VTBLK_REGSZ) {
|
||||
DPRINTF(("vtblk_read: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTBLK_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vbsc_pfn >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vbsc_lastq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = 0; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vbsc_isr;
|
||||
sc->vbsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
|
||||
assert(size == 1);
|
||||
value = *((uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG);
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vblk = {
|
||||
.pe_emu = "virtio-blk",
|
||||
.pe_init = pci_vtblk_init,
|
||||
.pe_iow = pci_vtblk_write,
|
||||
.pe_ior = pci_vtblk_read,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vblk);
|
739
usr.sbin/bhyve/pci_virtio_net.c
Normal file
739
usr.sbin/bhyve/pci_virtio_net.c
Normal file
@ -0,0 +1,739 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <md5.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "mevent.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTNET_RINGSZ 256
|
||||
|
||||
#define VTNET_MAXSEGS 32
|
||||
|
||||
/*
|
||||
* PCI config-space register offsets
|
||||
*/
|
||||
#define VTNET_R_CFG0 20
|
||||
#define VTNET_R_CFG1 21
|
||||
#define VTNET_R_CFG2 22
|
||||
#define VTNET_R_CFG3 23
|
||||
#define VTNET_R_CFG4 24
|
||||
#define VTNET_R_CFG5 25
|
||||
#define VTNET_R_CFG6 26
|
||||
#define VTNET_R_CFG7 27
|
||||
#define VTNET_R_MAX 27
|
||||
|
||||
#define VTNET_REGSZ VTNET_R_MAX+1
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTNET_S_HOSTCAPS \
|
||||
( 0x00000020 | /* host supplies MAC */ \
|
||||
0x00008000 | /* host can merge Rx buffers */ \
|
||||
0x00010000 ) /* config status available */
|
||||
|
||||
/*
|
||||
* Queue definitions.
|
||||
*/
|
||||
#define VTNET_RXQ 0
|
||||
#define VTNET_TXQ 1
|
||||
#define VTNET_CTLQ 2
|
||||
|
||||
#define VTNET_MAXQ 3
|
||||
|
||||
struct vring_hqueue {
|
||||
/* Internal state */
|
||||
uint16_t hq_size;
|
||||
uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
|
||||
|
||||
/* Host-context pointers to the queue */
|
||||
struct virtio_desc *hq_dtable;
|
||||
uint16_t *hq_avail_flags;
|
||||
uint16_t *hq_avail_idx; /* monotonically increasing */
|
||||
uint16_t *hq_avail_ring;
|
||||
|
||||
uint16_t *hq_used_flags;
|
||||
uint16_t *hq_used_idx; /* monotonically increasing */
|
||||
struct virtio_used *hq_used_ring;
|
||||
};
|
||||
|
||||
/*
|
||||
* Fixed network header size
|
||||
*/
|
||||
struct virtio_net_rxhdr {
|
||||
uint8_t vrh_flags;
|
||||
uint8_t vrh_gso_type;
|
||||
uint16_t vrh_hdr_len;
|
||||
uint16_t vrh_gso_size;
|
||||
uint16_t vrh_csum_start;
|
||||
uint16_t vrh_csum_offset;
|
||||
uint16_t vrh_bufs;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtnet_debug;
|
||||
#define DPRINTF(params) if (pci_vtnet_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtnet_softc {
|
||||
struct pci_devinst *vsc_pi;
|
||||
pthread_mutex_t vsc_mtx;
|
||||
struct mevent *vsc_mevp;
|
||||
|
||||
int vsc_curq;
|
||||
int vsc_status;
|
||||
int vsc_isr;
|
||||
int vsc_tapfd;
|
||||
int vsc_rx_ready;
|
||||
int vsc_rxpend;
|
||||
|
||||
uint32_t vsc_features;
|
||||
uint8_t vsc_macaddr[6];
|
||||
|
||||
uint64_t vsc_pfn[VTNET_MAXQ];
|
||||
struct vring_hqueue vsc_hq[VTNET_MAXQ];
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the number of available descriptors in the vring taking care
|
||||
* of the 16-bit index wraparound.
|
||||
*/
|
||||
static int
|
||||
hq_num_avail(struct vring_hqueue *hq)
|
||||
{
|
||||
int ndesc;
|
||||
|
||||
if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
|
||||
ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
|
||||
else
|
||||
ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
|
||||
|
||||
assert(ndesc >= 0 && ndesc <= hq->hq_size);
|
||||
|
||||
return (ndesc);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
pci_vtnet_qsize(int qnum)
|
||||
{
|
||||
/* XXX no ctl queue currently */
|
||||
if (qnum == VTNET_CTLQ) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* XXX fixed currently. Maybe different for tx/rx/ctl */
|
||||
return (VTNET_RINGSZ);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
|
||||
{
|
||||
if (value == 0) {
|
||||
DPRINTF(("vtnet: device reset requested !\n"));
|
||||
}
|
||||
|
||||
sc->vsc_status = value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the tap device
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
char pad[60];
|
||||
|
||||
if (sc->vsc_tapfd == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
memset(pad, 0, 60 - len);
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) writev(sc->vsc_tapfd, iov, iovcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when there is read activity on the tap file descriptor.
|
||||
* Each buffer posted by the guest is assumed to be able to contain
|
||||
* an entire ethernet frame + rx header.
|
||||
* MP note: the dummybuf is only used for discarding frames, so there
|
||||
* is no need for it to be per-vtnet or locked.
|
||||
*/
|
||||
static uint8_t dummybuf[2048];
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
struct vring_hqueue *hq;
|
||||
struct virtio_net_rxhdr *vrx;
|
||||
uint8_t *buf;
|
||||
int i;
|
||||
int len;
|
||||
int ndescs;
|
||||
int didx, uidx, aidx; /* descriptor, avail and used index */
|
||||
|
||||
/*
|
||||
* Should never be called without a valid tap fd
|
||||
*/
|
||||
assert(sc->vsc_tapfd != -1);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up.
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of available rx buffers
|
||||
*/
|
||||
hq = &sc->vsc_hq[VTNET_RXQ];
|
||||
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0) {
|
||||
/*
|
||||
* Need to wait for host notification to read
|
||||
*/
|
||||
if (sc->vsc_rxpend == 0) {
|
||||
WPRINTF(("vtnet: no rx descriptors !\n"));
|
||||
sc->vsc_rxpend = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the packet and try later
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
aidx = hq->hq_cur_aidx;
|
||||
uidx = *hq->hq_used_idx;
|
||||
for (i = 0; i < ndescs; i++) {
|
||||
/*
|
||||
* 'aidx' indexes into the an array of descriptor indexes
|
||||
*/
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr);
|
||||
buf = (uint8_t *)(vrx + 1);
|
||||
|
||||
len = read(sc->vsc_tapfd, buf,
|
||||
vd->vd_len - sizeof(struct virtio_net_rxhdr));
|
||||
|
||||
if (len < 0 && errno == EWOULDBLOCK) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers, which is always 1 without TSO
|
||||
* support.
|
||||
*/
|
||||
memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
|
||||
vrx->vrh_bufs = 1;
|
||||
|
||||
/*
|
||||
* Write this descriptor into the used ring
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
|
||||
uidx++;
|
||||
aidx++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the used pointer, and signal an interrupt if allowed
|
||||
*/
|
||||
*hq->hq_used_idx = uidx;
|
||||
hq->hq_cur_aidx = aidx;
|
||||
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
pci_vtnet_tap_rx(sc);
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
/*
|
||||
* A qnotify means that the rx process can now begin
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
sc->vsc_rx_ready = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the rx queue was empty, attempt to receive a
|
||||
* packet that was previously blocked due to no rx bufs
|
||||
* available
|
||||
*/
|
||||
if (sc->vsc_rxpend) {
|
||||
WPRINTF(("vtnet: rx resumed\n\r"));
|
||||
sc->vsc_rxpend = 0;
|
||||
pci_vtnet_tap_rx(sc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
struct virtio_desc *vd;
|
||||
struct virtio_used *vu;
|
||||
int i;
|
||||
int plen;
|
||||
int tlen;
|
||||
int uidx, aidx, didx;
|
||||
|
||||
uidx = *hq->hq_used_idx;
|
||||
aidx = hq->hq_cur_aidx;
|
||||
didx = hq->hq_avail_ring[aidx % hq->hq_size];
|
||||
assert(didx >= 0 && didx < hq->hq_size);
|
||||
|
||||
vd = &hq->hq_dtable[didx];
|
||||
|
||||
/*
|
||||
* Run through the chain of descriptors, ignoring the
|
||||
* first header descriptor. However, include the header
|
||||
* length in the total length that will be put into the
|
||||
* used queue.
|
||||
*/
|
||||
tlen = vd->vd_len;
|
||||
vd = &hq->hq_dtable[vd->vd_next];
|
||||
|
||||
for (i = 0, plen = 0;
|
||||
i < VTNET_MAXSEGS;
|
||||
i++, vd = &hq->hq_dtable[vd->vd_next]) {
|
||||
iov[i].iov_base = paddr_guest2host(vd->vd_addr);
|
||||
iov[i].iov_len = vd->vd_len;
|
||||
plen += vd->vd_len;
|
||||
tlen += vd->vd_len;
|
||||
|
||||
if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
|
||||
break;
|
||||
}
|
||||
assert(i < VTNET_MAXSEGS);
|
||||
|
||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1));
|
||||
pci_vtnet_tap_tx(sc, iov, i + 1, plen);
|
||||
|
||||
/*
|
||||
* Return this chain back to the host
|
||||
*/
|
||||
vu = &hq->hq_used_ring[uidx % hq->hq_size];
|
||||
vu->vu_idx = didx;
|
||||
vu->vu_tlen = tlen;
|
||||
hq->hq_cur_aidx = aidx + 1;
|
||||
*hq->hq_used_idx = uidx + 1;
|
||||
|
||||
/*
|
||||
* Generate an interrupt if able
|
||||
*/
|
||||
if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
|
||||
sc->vsc_isr |= 1;
|
||||
pci_generate_msi(sc->vsc_pi, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_txq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ];
|
||||
int i;
|
||||
int ndescs;
|
||||
|
||||
/*
|
||||
* Calculate number of ring entries to process
|
||||
*/
|
||||
ndescs = hq_num_avail(hq);
|
||||
|
||||
if (ndescs == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Run through all the entries, placing them into iovecs and
|
||||
* sending when an end-of-packet is found
|
||||
*/
|
||||
for (i = 0; i < ndescs; i++)
|
||||
pci_vtnet_proctx(sc, hq);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
|
||||
DPRINTF(("vtnet: control qnotify!\n\r"));
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn)
|
||||
{
|
||||
struct vring_hqueue *hq;
|
||||
int qnum = sc->vsc_curq;
|
||||
|
||||
assert(qnum < VTNET_MAXQ);
|
||||
|
||||
sc->vsc_pfn[qnum] = pfn << VRING_PFN;
|
||||
|
||||
/*
|
||||
* Set up host pointers to the various parts of the
|
||||
* queue
|
||||
*/
|
||||
hq = &sc->vsc_hq[qnum];
|
||||
hq->hq_size = pci_vtnet_qsize(qnum);
|
||||
|
||||
hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
|
||||
hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
|
||||
hq->hq_avail_idx = hq->hq_avail_flags + 1;
|
||||
hq->hq_avail_ring = hq->hq_avail_flags + 2;
|
||||
hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
|
||||
VRING_ALIGN);
|
||||
hq->hq_used_idx = hq->hq_used_flags + 1;
|
||||
hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
|
||||
|
||||
/*
|
||||
* Initialize queue indexes
|
||||
*/
|
||||
hq->hq_cur_aidx = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
MD5_CTX mdctx;
|
||||
unsigned char digest[16];
|
||||
char nstr[80];
|
||||
struct pci_vtnet_softc *sc;
|
||||
|
||||
/*
|
||||
* Access to guest memory is required. Fail if
|
||||
* memory not mapped
|
||||
*/
|
||||
if (paddr_guest2host(0) == NULL)
|
||||
return (1);
|
||||
|
||||
sc = malloc(sizeof(struct pci_vtnet_softc));
|
||||
memset(sc, 0, sizeof(struct pci_vtnet_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->vsc_pi = pi;
|
||||
|
||||
pthread_mutex_init(&sc->vsc_mtx, NULL);
|
||||
|
||||
/*
|
||||
* Attempt to open the tap device
|
||||
*/
|
||||
sc->vsc_tapfd = -1;
|
||||
if (opts != NULL) {
|
||||
char tbuf[80];
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strncat(tbuf, opts, sizeof(tbuf) - strlen(tbuf));
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
} else {
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_tap_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The MAC address is the standard NetApp OUI of 00-a0-98,
|
||||
* followed by an MD5 of the vm name. The slot number is
|
||||
* prepended to this for slots other than 1, so that
|
||||
* CFE can netboot from the equivalent of slot 1.
|
||||
*/
|
||||
if (pi->pi_slot == 1) {
|
||||
strncpy(nstr, vmname, sizeof(nstr));
|
||||
} else {
|
||||
snprintf(nstr, sizeof(nstr), "%d-%s", pi->pi_slot, vmname);
|
||||
}
|
||||
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, nstr, strlen(nstr));
|
||||
MD5Final(digest, &mdctx);
|
||||
|
||||
sc->vsc_macaddr[0] = 0x00;
|
||||
sc->vsc_macaddr[1] = 0xa0;
|
||||
sc->vsc_macaddr[2] = 0x98;
|
||||
sc->vsc_macaddr[3] = digest[0];
|
||||
sc->vsc_macaddr[4] = digest[1];
|
||||
sc->vsc_macaddr[5] = digest[2];
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
|
||||
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ);
|
||||
pci_emul_add_msicap(pi, 1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function pointer array to handle queue notifications
|
||||
*/
|
||||
static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = {
|
||||
pci_vtnet_ping_rxq,
|
||||
pci_vtnet_ping_txq,
|
||||
pci_vtnet_ping_ctlq
|
||||
};
|
||||
|
||||
static void
|
||||
pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
|
||||
uint32_t value)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_write: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
sc->vsc_features = value & VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
pci_vtnet_ring_init(sc, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
sc->vsc_curq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
assert(value < VTNET_MAXQ);
|
||||
(*pci_vtnet_qnotify[value])(sc);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
pci_vtnet_update_status(sc, value);
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
/*
|
||||
* The driver is allowed to change the MAC address
|
||||
*/
|
||||
assert(size == 1);
|
||||
sc->vsc_macaddr[offset - VTNET_R_CFG0] = value;
|
||||
break;
|
||||
case VTCFG_R_HOSTCAP:
|
||||
case VTCFG_R_QNUM:
|
||||
case VTCFG_R_ISR:
|
||||
case VTNET_R_CFG6:
|
||||
case VTNET_R_CFG7:
|
||||
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = pi->pi_arg;
|
||||
uint32_t value;
|
||||
|
||||
if (offset + size > VTNET_REGSZ) {
|
||||
DPRINTF(("vtnet_read: 2big, offset %d size %d\n",
|
||||
offset, size));
|
||||
return (0);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
assert(size == 4);
|
||||
value = VTNET_S_HOSTCAPS;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_features; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
assert(size == 4);
|
||||
value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
assert(size == 2);
|
||||
value = pci_vtnet_qsize(sc->vsc_curq);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
assert(size == 2);
|
||||
value = sc->vsc_curq; /* XXX never read ? */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_isr;
|
||||
sc->vsc_isr = 0; /* a read clears this flag */
|
||||
break;
|
||||
case VTNET_R_CFG0:
|
||||
case VTNET_R_CFG1:
|
||||
case VTNET_R_CFG2:
|
||||
case VTNET_R_CFG3:
|
||||
case VTNET_R_CFG4:
|
||||
case VTNET_R_CFG5:
|
||||
assert(size == 1);
|
||||
value = sc->vsc_macaddr[offset - VTNET_R_CFG0];
|
||||
break;
|
||||
case VTNET_R_CFG6:
|
||||
assert(size == 1);
|
||||
value = 0x01; /* XXX link always up */
|
||||
break;
|
||||
case VTNET_R_CFG7:
|
||||
assert(size == 1);
|
||||
value = 0; /* link status is in the LSB */
|
||||
break;
|
||||
default:
|
||||
DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset));
|
||||
value = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
|
||||
return (value);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vnet = {
|
||||
.pe_emu = "virtio-net",
|
||||
.pe_init = pci_vtnet_init,
|
||||
.pe_iow = pci_vtnet_write,
|
||||
.pe_ior = pci_vtnet_read,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vnet);
|
196
usr.sbin/bhyve/pit_8254.c
Normal file
196
usr.sbin/bhyve/pit_8254.c
Normal file
@ -0,0 +1,196 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/clock.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "inout.h"
|
||||
#include "pit_8254.h"
|
||||
|
||||
#define TIMER_SEL_MASK 0xc0
|
||||
#define TIMER_RW_MASK 0x30
|
||||
#define TIMER_MODE_MASK 0x0f
|
||||
#define TIMER_SEL_READBACK 0xc0
|
||||
|
||||
#define TIMER_DIV(freq, hz) (((freq) + (hz) / 2) / (hz))
|
||||
|
||||
#define PIT_8254_FREQ 1193182
|
||||
static const int nsecs_per_tick = 1000000000 / PIT_8254_FREQ;
|
||||
|
||||
struct counter {
|
||||
struct timeval tv; /* uptime when counter was loaded */
|
||||
uint16_t initial; /* initial counter value */
|
||||
uint8_t cr[2];
|
||||
uint8_t ol[2];
|
||||
int crbyte;
|
||||
int olbyte;
|
||||
};
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static void
|
||||
latch(struct counter *c)
|
||||
{
|
||||
struct timeval tv2;
|
||||
uint16_t lval;
|
||||
uint64_t delta_nsecs, delta_ticks;
|
||||
|
||||
/* cannot latch a new value until the old one has been consumed */
|
||||
if (c->olbyte != 0)
|
||||
return;
|
||||
|
||||
if (c->initial == 0 || c->initial == 1) {
|
||||
/*
|
||||
* XXX the program that runs the VM can be stopped and
|
||||
* restarted at any time. This means that state that was
|
||||
* created by the guest is destroyed between invocations
|
||||
* of the program.
|
||||
*
|
||||
* If the counter's initial value is not programmed we
|
||||
* assume a value that would be set to generate 'guest_hz'
|
||||
* interrupts per second.
|
||||
*/
|
||||
c->initial = TIMER_DIV(PIT_8254_FREQ, guest_hz);
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
|
||||
(void)gettimeofday(&tv2, NULL);
|
||||
timevalsub(&tv2, &c->tv);
|
||||
delta_nsecs = tv2.tv_sec * 1000000000 + tv2.tv_usec * 1000;
|
||||
delta_ticks = delta_nsecs / nsecs_per_tick;
|
||||
|
||||
lval = c->initial - delta_ticks % c->initial;
|
||||
c->olbyte = 2;
|
||||
c->ol[1] = lval; /* LSB */
|
||||
c->ol[0] = lval >> 8; /* MSB */
|
||||
}
|
||||
|
||||
static int
|
||||
pit_8254_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int sel, rw, mode;
|
||||
uint8_t val;
|
||||
struct counter *c;
|
||||
|
||||
static struct counter counter[3];
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
val = *eax;
|
||||
|
||||
if (port == TIMER_MODE) {
|
||||
assert(in == 0);
|
||||
sel = val & TIMER_SEL_MASK;
|
||||
rw = val & TIMER_RW_MASK;
|
||||
mode = val & TIMER_MODE_MASK;
|
||||
|
||||
if (sel == TIMER_SEL_READBACK)
|
||||
return (-1);
|
||||
if (rw != TIMER_LATCH && rw != TIMER_16BIT)
|
||||
return (-1);
|
||||
|
||||
if (rw != TIMER_LATCH) {
|
||||
/*
|
||||
* Counter mode is not affected when issuing a
|
||||
* latch command.
|
||||
*/
|
||||
if (mode != TIMER_RATEGEN && mode != TIMER_SQWAVE)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
c = &counter[sel >> 6];
|
||||
if (rw == TIMER_LATCH)
|
||||
latch(c);
|
||||
else
|
||||
c->olbyte = 0; /* reset latch after reprogramming */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* counter ports */
|
||||
assert(port >= TIMER_CNTR0 && port <= TIMER_CNTR2);
|
||||
c = &counter[port - TIMER_CNTR0];
|
||||
|
||||
if (in) {
|
||||
/*
|
||||
* XXX
|
||||
* The spec says that once the output latch is completely
|
||||
* read it should revert to "following" the counter. We don't
|
||||
* do this because it is hard and any reasonable OS should
|
||||
* always latch the counter before trying to read it.
|
||||
*/
|
||||
if (c->olbyte == 0)
|
||||
c->olbyte = 2;
|
||||
*eax = c->ol[--c->olbyte];
|
||||
} else {
|
||||
c->cr[c->crbyte++] = *eax;
|
||||
if (c->crbyte == 2) {
|
||||
c->crbyte = 0;
|
||||
c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8;
|
||||
gettimeofday(&c->tv, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(8254, TIMER_MODE, IOPORT_F_OUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR0, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR1, IOPORT_F_INOUT, pit_8254_handler);
|
||||
INOUT_PORT(8254, TIMER_CNTR2, IOPORT_F_INOUT, pit_8254_handler);
|
45
usr.sbin/bhyve/pit_8254.h
Normal file
45
usr.sbin/bhyve/pit_8254.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PIT_8254_H_
|
||||
#define _PIT_8254_H_
|
||||
|
||||
/*
|
||||
* Borrowed from amd64/include/timerreg.h because in that file it is
|
||||
* conditionally compiled for #ifdef _KERNEL only.
|
||||
*/
|
||||
|
||||
#include <dev/ic/i8253reg.h>
|
||||
|
||||
#define IO_TIMER1 0x40 /* 8253 Timer #1 */
|
||||
#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0)
|
||||
#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1)
|
||||
#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2)
|
||||
#define TIMER_MODE (IO_TIMER1 + TIMER_REG_MODE)
|
||||
|
||||
#endif /* _PIT_8254_H_ */
|
51
usr.sbin/bhyve/post.c
Normal file
51
usr.sbin/bhyve/post.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
static int
|
||||
post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 1);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
*eax = 0xff; /* return some garbage */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
|
268
usr.sbin/bhyve/rtc.c
Normal file
268
usr.sbin/bhyve/rtc.c
Normal file
@ -0,0 +1,268 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define IO_RTC 0x70
|
||||
|
||||
#define RTC_SEC 0x00 /* seconds */
|
||||
#define RTC_MIN 0x02
|
||||
#define RTC_HRS 0x04
|
||||
#define RTC_WDAY 0x06
|
||||
#define RTC_DAY 0x07
|
||||
#define RTC_MONTH 0x08
|
||||
#define RTC_YEAR 0x09
|
||||
#define RTC_CENTURY 0x32 /* current century */
|
||||
|
||||
#define RTC_STATUSA 0xA
|
||||
#define RTCSA_TUP 0x80 /* time update, don't look now */
|
||||
|
||||
#define RTC_STATUSB 0xB
|
||||
#define RTCSB_DST 0x01
|
||||
#define RTCSB_24HR 0x02
|
||||
#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */
|
||||
#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
|
||||
#define RTCSB_HALT 0x80 /* stop clock updates */
|
||||
|
||||
#define RTC_INTR 0x0c /* status register C (R) interrupt source */
|
||||
|
||||
#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
|
||||
#define RTCSD_PWR 0x80 /* clock power OK */
|
||||
|
||||
#define RTC_DIAG 0x0e
|
||||
|
||||
#define RTC_RSTCODE 0x0f
|
||||
|
||||
static int addr;
|
||||
|
||||
/* XXX initialize these to default values as they would be from BIOS */
|
||||
static uint8_t status_a, status_b, rstcode;
|
||||
|
||||
static u_char const bin2bcd_data[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
||||
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
|
||||
};
|
||||
#define bin2bcd(bin) (bin2bcd_data[bin])
|
||||
|
||||
#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
|
||||
|
||||
static void
|
||||
timevalfix(struct timeval *t1)
|
||||
{
|
||||
|
||||
if (t1->tv_usec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_usec += 1000000;
|
||||
}
|
||||
if (t1->tv_usec >= 1000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
timevalsub(struct timeval *t1, const struct timeval *t2)
|
||||
{
|
||||
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_usec -= t2->tv_usec;
|
||||
timevalfix(t1);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 0);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
switch (*eax) {
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
case RTC_STATUSA:
|
||||
case RTC_STATUSB:
|
||||
case RTC_INTR:
|
||||
case RTC_STATUSD:
|
||||
case RTC_DIAG:
|
||||
case RTC_RSTCODE:
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
addr = *eax;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int hour;
|
||||
time_t t;
|
||||
struct timeval cur, delta;
|
||||
|
||||
static struct timeval last;
|
||||
static struct tm tm;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
gettimeofday(&cur, NULL);
|
||||
|
||||
/*
|
||||
* Increment the cached time only once per second so we can guarantee
|
||||
* that the guest has at least one second to read the hour:min:sec
|
||||
* separately and still get a coherent view of the time.
|
||||
*/
|
||||
delta = cur;
|
||||
timevalsub(&delta, &last);
|
||||
if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
|
||||
t = cur.tv_sec;
|
||||
localtime_r(&t, &tm);
|
||||
last = cur;
|
||||
}
|
||||
|
||||
if (in) {
|
||||
switch (addr) {
|
||||
case RTC_SEC:
|
||||
*eax = rtcout(tm.tm_sec);
|
||||
return (0);
|
||||
case RTC_MIN:
|
||||
*eax = rtcout(tm.tm_min);
|
||||
return (0);
|
||||
case RTC_HRS:
|
||||
if (status_b & RTCSB_24HR)
|
||||
hour = tm.tm_hour;
|
||||
else
|
||||
hour = (tm.tm_hour % 12) + 1;
|
||||
|
||||
*eax = rtcout(hour);
|
||||
|
||||
/*
|
||||
* If we are representing time in the 12-hour format
|
||||
* then set the MSB to indicate PM.
|
||||
*/
|
||||
if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
|
||||
*eax |= 0x80;
|
||||
|
||||
return (0);
|
||||
case RTC_WDAY:
|
||||
*eax = rtcout(tm.tm_wday + 1);
|
||||
return (0);
|
||||
case RTC_DAY:
|
||||
*eax = rtcout(tm.tm_mday);
|
||||
return (0);
|
||||
case RTC_MONTH:
|
||||
*eax = rtcout(tm.tm_mon + 1);
|
||||
return (0);
|
||||
case RTC_YEAR:
|
||||
*eax = rtcout(tm.tm_year % 100);
|
||||
return (0);
|
||||
case RTC_CENTURY:
|
||||
*eax = rtcout(tm.tm_year / 100);
|
||||
break;
|
||||
case RTC_STATUSA:
|
||||
*eax = status_a;
|
||||
return (0);
|
||||
case RTC_INTR:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_STATUSD:
|
||||
*eax = RTCSD_PWR;
|
||||
return (0);
|
||||
case RTC_DIAG:
|
||||
*eax = 0;
|
||||
return (0);
|
||||
case RTC_RSTCODE:
|
||||
*eax = rstcode;
|
||||
return (0);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
switch (addr) {
|
||||
case RTC_STATUSA:
|
||||
status_a = *eax & ~RTCSA_TUP;
|
||||
break;
|
||||
case RTC_STATUSB:
|
||||
/* XXX not implemented yet XXX */
|
||||
if (*eax & RTCSB_PINTR)
|
||||
return (-1);
|
||||
status_b = *eax;
|
||||
break;
|
||||
case RTC_RSTCODE:
|
||||
rstcode = *eax;
|
||||
break;
|
||||
case RTC_SEC:
|
||||
case RTC_MIN:
|
||||
case RTC_HRS:
|
||||
case RTC_WDAY:
|
||||
case RTC_DAY:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
case RTC_CENTURY:
|
||||
/*
|
||||
* Ignore writes to the time of day registers
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(rtc, IO_RTC, IOPORT_F_OUT, rtc_addr_handler);
|
||||
INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
|
60
usr.sbin/bhyve/uart.c
Normal file
60
usr.sbin/bhyve/uart.c
Normal file
@ -0,0 +1,60 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
|
||||
#define COM1 0x3F8
|
||||
#define COM2 0x2F8
|
||||
|
||||
#define REG_IIR 2
|
||||
|
||||
static int
|
||||
com_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* COM port is not implemented so we return 0xFF for all registers
|
||||
*/
|
||||
*eax = 0xFF;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(uart, COM1 + REG_IIR, IOPORT_F_IN, com_handler);
|
||||
INOUT_PORT(uart, COM2 + REG_IIR, IOPORT_F_IN, com_handler);
|
85
usr.sbin/bhyve/virtio.h
Normal file
85
usr.sbin/bhyve/virtio.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VIRTIO_H_
|
||||
#define _VIRTIO_H_
|
||||
|
||||
#define VRING_ALIGN 4096
|
||||
|
||||
#define VRING_DESC_F_NEXT (1 << 0)
|
||||
#define VRING_DESC_F_WRITE (1 << 1)
|
||||
#define VRING_DESC_F_INDIRECT (1 << 2)
|
||||
|
||||
#define VRING_AVAIL_F_NO_INTERRUPT 1
|
||||
|
||||
struct virtio_desc {
|
||||
uint64_t vd_addr;
|
||||
uint32_t vd_len;
|
||||
uint16_t vd_flags;
|
||||
uint16_t vd_next;
|
||||
} __packed;
|
||||
|
||||
struct virtio_used {
|
||||
uint32_t vu_idx;
|
||||
uint32_t vu_tlen;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* PFN register shift amount
|
||||
*/
|
||||
#define VRING_PFN 12
|
||||
|
||||
/*
|
||||
* Virtio device types
|
||||
*/
|
||||
#define VIRTIO_TYPE_NET 1
|
||||
#define VIRTIO_TYPE_BLOCK 2
|
||||
|
||||
/*
|
||||
* PCI vendor/device IDs
|
||||
*/
|
||||
#define VIRTIO_VENDOR 0x1AF4
|
||||
#define VIRTIO_DEV_NET 0x1000
|
||||
#define VIRTIO_DEV_BLOCK 0x1001
|
||||
|
||||
/*
|
||||
* PCI config space constants
|
||||
*/
|
||||
#define VTCFG_R_HOSTCAP 0
|
||||
#define VTCFG_R_GUESTCAP 4
|
||||
#define VTCFG_R_PFN 8
|
||||
#define VTCFG_R_QNUM 12
|
||||
#define VTCFG_R_QSEL 14
|
||||
#define VTCFG_R_QNOTIFY 16
|
||||
#define VTCFG_R_STATUS 18
|
||||
#define VTCFG_R_ISR 19
|
||||
#define VTCFG_R_CFG0 20 /* No MSI-X */
|
||||
#define VTCFG_R_CFG1 24 /* With MSI-X */
|
||||
#define VTCFG_R_MSIX 20
|
||||
|
||||
#endif /* _VIRTIO_H_ */
|
261
usr.sbin/bhyve/xmsr.c
Normal file
261
usr.sbin/bhyve/xmsr.c
Normal file
@ -0,0 +1,261 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "fbsdrun.h"
|
||||
#include "xmsr.h"
|
||||
|
||||
/*
|
||||
* Trampoline for hypervisor direct 64-bit jump.
|
||||
*
|
||||
* 0 - signature for guest->host verification
|
||||
* 8 - kernel virtual address of trampoline
|
||||
* 16 - instruction virtual address
|
||||
* 24 - stack pointer virtual address
|
||||
* 32 - CR3, physical address of kernel page table
|
||||
* 40 - 24-byte area for null/code/data GDT entries
|
||||
*/
|
||||
#define MP_V64T_SIG 0xcafebabecafebabeULL
|
||||
struct mp_v64tramp {
|
||||
uint64_t mt_sig;
|
||||
uint64_t mt_virt;
|
||||
uint64_t mt_eip;
|
||||
uint64_t mt_rsp;
|
||||
uint64_t mt_cr3;
|
||||
uint64_t mt_gdtr[3];
|
||||
};
|
||||
|
||||
/*
|
||||
* CPU 0 is considered to be the BSP and is set to the RUNNING state.
|
||||
* All other CPUs are set up in the INIT state.
|
||||
*/
|
||||
#define BSP 0
|
||||
enum cpu_bstate {
|
||||
CPU_S_INIT,
|
||||
CPU_S_SIPI,
|
||||
CPU_S_RUNNING
|
||||
} static cpu_b[VM_MAXCPU] = { [BSP] = CPU_S_RUNNING };
|
||||
|
||||
static void spinup_ap(struct vmctx *, int, int, uint64_t *);
|
||||
static void spinup_ap_direct64(struct vmctx *, int, uintptr_t, uint64_t *);
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
|
||||
{
|
||||
int dest;
|
||||
int mode;
|
||||
int thiscpu;
|
||||
int vec;
|
||||
int error, retval;
|
||||
uint64_t rip;
|
||||
|
||||
retval = vcpu;
|
||||
thiscpu = 1 << vcpu;
|
||||
|
||||
/*
|
||||
* The only MSR value handled is the x2apic CR register
|
||||
*/
|
||||
if (code != 0x830) {
|
||||
printf("Unknown WRMSR code %x, val %lx, cpu %d\n",
|
||||
code, val, vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The value written to the MSR will generate an IPI to
|
||||
* a set of CPUs. If this is a SIPI, create the initial
|
||||
* state for the CPU and switch to it. Otherwise, inject
|
||||
* an interrupt for the destination CPU(s), and request
|
||||
* a switch to the next available one by returning -1
|
||||
*/
|
||||
dest = val >> 32;
|
||||
vec = val & APIC_VECTOR_MASK;
|
||||
mode = val & APIC_DELMODE_MASK;
|
||||
|
||||
switch (mode) {
|
||||
case APIC_DELMODE_INIT:
|
||||
assert(dest != 0);
|
||||
assert(dest < guest_ncpus);
|
||||
|
||||
/*
|
||||
* Ignore legacy de-assert INITs in x2apic mode
|
||||
*/
|
||||
if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
|
||||
break;
|
||||
}
|
||||
assert(cpu_b[dest] == CPU_S_INIT);
|
||||
|
||||
/*
|
||||
* Move CPU to wait-for-SIPI state
|
||||
*/
|
||||
error = vcpu_reset(ctx, dest);
|
||||
assert(error == 0);
|
||||
|
||||
cpu_b[dest] = CPU_S_SIPI;
|
||||
break;
|
||||
|
||||
case APIC_DELMODE_STARTUP:
|
||||
assert(dest != 0);
|
||||
assert(dest < guest_ncpus);
|
||||
/*
|
||||
* Ignore SIPIs in any state other than wait-for-SIPI
|
||||
*/
|
||||
if (cpu_b[dest] != CPU_S_SIPI) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bring up the AP and signal the main loop that it is
|
||||
* available and to switch to it.
|
||||
*/
|
||||
spinup_ap(ctx, dest, vec, &rip);
|
||||
cpu_b[dest] = CPU_S_RUNNING;
|
||||
fbsdrun_addcpu(ctx, dest, rip);
|
||||
retval = dest;
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("APIC delivery mode %lx not supported!\n",
|
||||
val & APIC_DELMODE_MASK);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are 2 startup modes possible here:
|
||||
* - if the CPU supports 'unrestricted guest' mode, the spinup can
|
||||
* set up the processor state in power-on 16-bit mode, with the CS:IP
|
||||
* init'd to the specified low-mem 4K page.
|
||||
* - if the guest has requested a 64-bit trampoline in the low-mem 4K
|
||||
* page by placing in the specified signature, set up the register
|
||||
* state using register state in the signature. Note that this
|
||||
* requires accessing guest physical memory to read the signature
|
||||
* while 'unrestricted mode' does not.
|
||||
*/
|
||||
static void
|
||||
spinup_ap(struct vmctx *ctx, int newcpu, int vector, uint64_t *rip)
|
||||
{
|
||||
int error;
|
||||
uint16_t cs;
|
||||
uint64_t desc_base;
|
||||
uint32_t desc_limit, desc_access;
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
|
||||
if (error) {
|
||||
/*
|
||||
* If the guest does not support real-mode execution then
|
||||
* we will bring up the AP directly in 64-bit mode.
|
||||
*/
|
||||
spinup_ap_direct64(ctx, newcpu, vector << PAGE_SHIFT, rip);
|
||||
} else {
|
||||
/*
|
||||
* Update the %cs and %rip of the guest so that it starts
|
||||
* executing real mode code at at 'vector << 12'.
|
||||
*/
|
||||
*rip = 0;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
|
||||
assert(error == 0);
|
||||
|
||||
error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
|
||||
&desc_limit, &desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
desc_base = vector << PAGE_SHIFT;
|
||||
error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
cs = (vector << PAGE_SHIFT) >> 4;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spinup_ap_direct64(struct vmctx *ctx, int newcpu, uintptr_t gaddr,
|
||||
uint64_t *rip)
|
||||
{
|
||||
struct mp_v64tramp *mvt;
|
||||
char *errstr;
|
||||
int error;
|
||||
uint64_t gdtbase;
|
||||
|
||||
mvt = paddr_guest2host(gaddr);
|
||||
|
||||
assert(mvt->mt_sig == MP_V64T_SIG);
|
||||
|
||||
/*
|
||||
* Set up the 3-entry GDT using memory supplied in the
|
||||
* guest's trampoline structure.
|
||||
*/
|
||||
vm_setup_freebsd_gdt(mvt->mt_gdtr);
|
||||
|
||||
#define CHECK_ERROR(msg) \
|
||||
if (error != 0) { \
|
||||
errstr = msg; \
|
||||
goto err_exit; \
|
||||
}
|
||||
|
||||
/* entry point */
|
||||
*rip = mvt->mt_eip;
|
||||
|
||||
/* Get the guest virtual address of the GDT */
|
||||
gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr);
|
||||
|
||||
error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip,
|
||||
mvt->mt_cr3, gdtbase, mvt->mt_rsp);
|
||||
CHECK_ERROR("vm_setup_freebsd_registers");
|
||||
|
||||
return;
|
||||
err_exit:
|
||||
printf("spinup_ap_direct64: machine state error: %s", errstr);
|
||||
exit(1);
|
||||
}
|
34
usr.sbin/bhyve/xmsr.h
Normal file
34
usr.sbin/bhyve/xmsr.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _XMSR_H_
|
||||
#define _XMSR_H_
|
||||
|
||||
int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
|
||||
|
||||
#endif
|
17
usr.sbin/vmmctl/Makefile
Normal file
17
usr.sbin/vmmctl/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= vmmctl
|
||||
SRCS= vmmctl.c
|
||||
|
||||
NO_MAN=
|
||||
|
||||
DPADD= ${LIBVMMAPI}
|
||||
LDADD= -lvmmapi
|
||||
|
||||
WARNS?= 3
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../sys/amd64/vmm
|
||||
|
||||
.include <bsd.prog.mk>
|
75
usr.sbin/vmmctl/sample.sh
Executable file
75
usr.sbin/vmmctl/sample.sh
Executable file
@ -0,0 +1,75 @@
|
||||
#!/bin/sh
|
||||
|
||||
# $FreeBSD$
|
||||
|
||||
VMMCTL="sudo ./vmmctl"
|
||||
VMNAME=sample
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --create
|
||||
${VMMCTL} --vm=${VMNAME} --set-lowmem=128 --set-highmem=256
|
||||
${VMMCTL} --vm=${VMNAME} --get-lowmem --get-highmem
|
||||
|
||||
CR0_PE=$((1 << 0))
|
||||
CR0_PG=$((1 << 31))
|
||||
CR0=$(($CR0_PE | $CR0_PG))
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr0=${CR0} --get-cr0
|
||||
|
||||
# XXX this is bogus the value of %cr3 should come from the loader
|
||||
CR3=0
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr3=${CR3} --get-cr3
|
||||
|
||||
CR4_PAE=$((1 << 5))
|
||||
CR4=$((${CR4_PAE}))
|
||||
${VMMCTL} --vm=${VMNAME} --set-cr4=${CR4} --get-cr4
|
||||
|
||||
DR7=0x00000400 # Table 9-1 from Intel Architecture Manual 3A
|
||||
${VMMCTL} --vm=${VMNAME} --set-dr7=${DR7} --get-dr7
|
||||
|
||||
#
|
||||
# XXX the values of rsp and rip are bogus and should come from the loader.
|
||||
#
|
||||
RSP=0xa5a5a5a5
|
||||
RIP=0x0000bfbfbfbf0000
|
||||
RFLAGS=0x2
|
||||
${VMMCTL} --vm=${VMNAME} --set-rsp=${RSP} --get-rsp
|
||||
${VMMCTL} --vm=${VMNAME} --set-rip=${RIP} --get-rip
|
||||
${VMMCTL} --vm=${VMNAME} --set-rflags=${RFLAGS} --get-rflags
|
||||
|
||||
# Set "hidden" state of %cs descriptor to indicate long mode code segment.
|
||||
#
|
||||
# Note that this should match the contents of the entry pointed to by the
|
||||
# segment selector in the GDTR.
|
||||
#
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-cs --desc-access=0x00002098 --get-desc-cs
|
||||
|
||||
# Set "hidden" state of all data descriptors to indicate a usable segment.
|
||||
# The only useful fields are the "Present" and "Descriptor Type" bits.
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-ds --desc-access=0x00000090 --get-desc-ds
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-es --desc-access=0x00000090 --get-desc-es
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-fs --desc-access=0x00000090 --get-desc-fs
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-gs --desc-access=0x00000090 --get-desc-gs
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-ss --desc-access=0x00000090 --get-desc-ss
|
||||
|
||||
#
|
||||
# Set the code segment selector to point to entry at offset 8 in the GDTR.
|
||||
#
|
||||
${VMMCTL} --vm=${VMNAME} --set-cs=0x0008 --get-cs
|
||||
|
||||
# Set all the remaining data segment selectors to point to entry at offset
|
||||
# 16 in the GDTR.
|
||||
${VMMCTL} --vm=${VMNAME} --set-ds=0x0010 --get-ds
|
||||
${VMMCTL} --vm=${VMNAME} --set-es=0x0010 --get-es
|
||||
${VMMCTL} --vm=${VMNAME} --set-fs=0x0010 --get-fs
|
||||
${VMMCTL} --vm=${VMNAME} --set-gs=0x0010 --get-gs
|
||||
${VMMCTL} --vm=${VMNAME} --set-ss=0x0010 --get-ss
|
||||
|
||||
# XXX the value of the GDTR should come from the loader.
|
||||
# Set the GDTR
|
||||
GDTR_BASE=0xffff0000
|
||||
GDTR_LIMIT=0x10
|
||||
${VMMCTL} --vm=${VMNAME} --set-desc-gdtr --desc-base=${GDTR_BASE} --desc-limit=${GDTR_LIMIT} --get-desc-gdtr
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --set-pinning=0 --get-pinning
|
||||
${VMMCTL} --vm=${VMNAME} --set-pinning=-1 --get-pinning
|
||||
|
||||
${VMMCTL} --vm=${VMNAME} --destroy
|
1485
usr.sbin/vmmctl/vmmctl.c
Normal file
1485
usr.sbin/vmmctl/vmmctl.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user