2016-05-27 06:22:24 +00:00
|
|
|
/*-
|
2017-11-27 15:37:16 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2016-05-27 06:22:24 +00:00
|
|
|
* Copyright (c) 2011 NetApp, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
#include <sys/capsicum.h>
|
|
|
|
#endif
|
2016-05-27 06:22:24 +00:00
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
|
|
|
#include <machine/atomic.h>
|
|
|
|
#include <machine/segments.h>
|
|
|
|
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
#include <capsicum_helpers.h>
|
|
|
|
#endif
|
2016-05-27 06:22:24 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <err.h>
|
2017-02-14 13:35:59 +00:00
|
|
|
#include <errno.h>
|
2016-05-27 06:22:24 +00:00
|
|
|
#include <libgen.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <pthread_np.h>
|
|
|
|
#include <sysexits.h>
|
|
|
|
#include <stdbool.h>
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
#include <stdint.h>
|
2016-05-27 06:22:24 +00:00
|
|
|
|
|
|
|
#include <machine/vmm.h>
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
#include <machine/vmm_dev.h>
|
|
|
|
#endif
|
2016-05-27 06:22:24 +00:00
|
|
|
#include <vmmapi.h>
|
|
|
|
|
|
|
|
#include "bhyverun.h"
|
|
|
|
#include "acpi.h"
|
Initial bhyve native graphics support.
This adds emulations for a raw framebuffer device, PS2 keyboard/mouse,
XHCI USB controller and a USB tablet.
A simple VNC server is provided for keyboard/mouse input, and graphics
output.
A VGA emulation is included, but is currently disconnected until an
additional bhyve change to block out VGA memory is committed.
Credits:
- raw framebuffer, VNC server, XHCI controller, USB bus/device emulation
and UEFI f/w support by Leon Dang
- VGA, console/g, initial VNC server by tychon@
- PS2 keyboard/mouse jointly done by tychon@ and Leon Dang
- hypervisor framebuffer mem support by neel@
Tested by: Michael Dexter, in a number of revisions of this code.
With the appropriate UEFI image, FreeBSD, Windows and Linux guests can
installed and run in graphics mode using the UEFI/GOP framebuffer.
2016-05-27 06:30:35 +00:00
|
|
|
#include "atkbdc.h"
|
2016-05-27 06:22:24 +00:00
|
|
|
#include "inout.h"
|
|
|
|
#include "dbgport.h"
|
|
|
|
#include "fwctl.h"
|
2018-05-01 15:17:46 +00:00
|
|
|
#include "gdb.h"
|
2016-05-27 06:22:24 +00:00
|
|
|
#include "ioapic.h"
|
|
|
|
#include "mem.h"
|
|
|
|
#include "mevent.h"
|
|
|
|
#include "mptbl.h"
|
|
|
|
#include "pci_emul.h"
|
|
|
|
#include "pci_irq.h"
|
|
|
|
#include "pci_lpc.h"
|
|
|
|
#include "smbiostbl.h"
|
|
|
|
#include "xmsr.h"
|
|
|
|
#include "spinup_ap.h"
|
|
|
|
#include "rtc.h"
|
|
|
|
|
|
|
|
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
|
|
|
|
|
|
|
#define MB (1024UL * 1024)
|
|
|
|
#define GB (1024UL * MB)
|
|
|
|
|
|
|
|
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
|
|
|
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
|
|
|
|
|
|
|
|
char *vmname;
|
|
|
|
|
|
|
|
int guest_ncpus;
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
uint16_t cores, maxcpus, sockets, threads;
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
char *guest_uuid_str;
|
|
|
|
|
|
|
|
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
|
|
|
|
static int virtio_msix = 1;
|
|
|
|
static int x2apic_mode = 0; /* default is xAPIC */
|
|
|
|
|
|
|
|
static int strictio;
|
|
|
|
static int strictmsr = 1;
|
|
|
|
|
|
|
|
static int acpi;
|
|
|
|
|
|
|
|
static char *progname;
|
|
|
|
static const int BSP = 0;
|
|
|
|
|
|
|
|
static cpuset_t cpumask;
|
|
|
|
|
|
|
|
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
|
|
|
|
|
|
|
|
static struct vm_exit vmexit[VM_MAXCPU];
|
|
|
|
|
|
|
|
struct bhyvestats {
|
|
|
|
uint64_t vmexit_bogus;
|
|
|
|
uint64_t vmexit_reqidle;
|
|
|
|
uint64_t vmexit_hlt;
|
|
|
|
uint64_t vmexit_pause;
|
|
|
|
uint64_t vmexit_mtrap;
|
|
|
|
uint64_t vmexit_inst_emul;
|
|
|
|
uint64_t cpu_switch_rotate;
|
|
|
|
uint64_t cpu_switch_direct;
|
|
|
|
} stats;
|
|
|
|
|
|
|
|
struct mt_vmm_info {
|
|
|
|
pthread_t mt_thr;
|
|
|
|
struct vmctx *mt_ctx;
|
|
|
|
int mt_vcpu;
|
|
|
|
} mt_vmm_info[VM_MAXCPU];
|
|
|
|
|
|
|
|
static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
|
|
|
|
|
|
|
|
static void
|
|
|
|
usage(int code)
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf(stderr,
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
"Usage: %s [-abehuwxACHPSWY]\n"
|
|
|
|
" %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
|
|
|
|
" %*s [-g <gdb port>] [-l <lpc>]\n"
|
2016-05-27 06:22:24 +00:00
|
|
|
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
|
|
|
|
" -a: local apic is in xAPIC mode (deprecated)\n"
|
|
|
|
" -A: create ACPI tables\n"
|
2018-05-17 12:18:41 +00:00
|
|
|
" -c: number of cpus and/or topology specification\n"
|
2016-05-27 06:22:24 +00:00
|
|
|
" -C: include guest memory in core file\n"
|
|
|
|
" -e: exit on unhandled I/O access\n"
|
|
|
|
" -g: gdb port\n"
|
|
|
|
" -h: help\n"
|
|
|
|
" -H: vmexit from the guest on hlt\n"
|
|
|
|
" -l: LPC device configuration\n"
|
|
|
|
" -m: memory size in MB\n"
|
|
|
|
" -p: pin 'vcpu' to 'hostcpu'\n"
|
|
|
|
" -P: vmexit from the guest on pause\n"
|
|
|
|
" -s: <slot,driver,configinfo> PCI slot config\n"
|
|
|
|
" -S: guest memory cannot be swapped\n"
|
|
|
|
" -u: RTC keeps UTC time\n"
|
|
|
|
" -U: uuid\n"
|
|
|
|
" -w: ignore unimplemented MSRs\n"
|
|
|
|
" -W: force virtio to use single-vector MSI\n"
|
|
|
|
" -x: local apic is in x2APIC mode\n"
|
|
|
|
" -Y: disable MPtable generation\n",
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
progname, (int)strlen(progname), "", (int)strlen(progname), "",
|
|
|
|
(int)strlen(progname), "");
|
2016-05-27 06:22:24 +00:00
|
|
|
|
|
|
|
exit(code);
|
|
|
|
}
|
|
|
|
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
/*
|
|
|
|
* XXX This parser is known to have the following issues:
|
|
|
|
* 1. It accepts null key=value tokens ",,".
|
|
|
|
* 2. It accepts whitespace after = and before value.
|
|
|
|
* 3. Values out of range of INT are silently wrapped.
|
|
|
|
* 4. It doesn't check non-final values.
|
|
|
|
* 5. The apparently bogus limits of UINT16_MAX are for future expansion.
|
|
|
|
*
|
|
|
|
* The acceptance of a null specification ('-c ""') is by design to match the
|
|
|
|
* manual page syntax specification, this results in a topology of 1 vCPU.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
topology_parse(const char *opt)
|
|
|
|
{
|
|
|
|
uint64_t ncpus;
|
|
|
|
int c, chk, n, s, t, tmp;
|
|
|
|
char *cp, *str;
|
|
|
|
bool ns, scts;
|
|
|
|
|
|
|
|
c = 1, n = 1, s = 1, t = 1;
|
|
|
|
ns = false, scts = false;
|
|
|
|
str = strdup(opt);
|
2018-05-25 02:07:05 +00:00
|
|
|
assert(str != NULL);
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
|
|
|
|
while ((cp = strsep(&str, ",")) != NULL) {
|
|
|
|
if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
|
|
|
|
n = tmp;
|
|
|
|
ns = true;
|
|
|
|
} else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
|
|
|
|
n = tmp;
|
|
|
|
ns = true;
|
|
|
|
} else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
|
|
|
|
s = tmp;
|
|
|
|
scts = true;
|
|
|
|
} else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
|
|
|
|
c = tmp;
|
|
|
|
scts = true;
|
|
|
|
} else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
|
|
|
|
t = tmp;
|
|
|
|
scts = true;
|
|
|
|
#ifdef notyet /* Do not expose this until vmm.ko implements it */
|
|
|
|
} else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
|
|
|
|
m = tmp;
|
|
|
|
#endif
|
|
|
|
/* Skip the empty argument case from -c "" */
|
|
|
|
} else if (cp[0] == '\0')
|
|
|
|
continue;
|
|
|
|
else
|
2018-05-25 02:07:05 +00:00
|
|
|
goto out;
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
/* Any trailing garbage causes an error */
|
|
|
|
if (cp[chk] != '\0')
|
2018-05-25 02:07:05 +00:00
|
|
|
goto out;
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
}
|
2018-05-25 02:07:05 +00:00
|
|
|
free(str);
|
|
|
|
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
/*
|
|
|
|
* Range check 1 <= n <= UINT16_MAX all values
|
|
|
|
*/
|
|
|
|
if (n < 1 || s < 1 || c < 1 || t < 1 ||
|
|
|
|
n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX ||
|
|
|
|
t > UINT16_MAX)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
/* If only the cpus was specified, use that as sockets */
|
|
|
|
if (!scts)
|
|
|
|
s = n;
|
|
|
|
/*
|
|
|
|
* Compute sockets * cores * threads avoiding overflow
|
|
|
|
* The range check above insures these are 16 bit values
|
|
|
|
* If n was specified check it against computed ncpus
|
|
|
|
*/
|
|
|
|
ncpus = (uint64_t)s * c * t;
|
|
|
|
if (ncpus > UINT16_MAX || (ns && n != ncpus))
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
guest_ncpus = ncpus;
|
|
|
|
sockets = s;
|
|
|
|
cores = c;
|
|
|
|
threads = t;
|
|
|
|
return(0);
|
2018-05-25 02:07:05 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
free(str);
|
|
|
|
return (-1);
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
}
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
static int
|
|
|
|
pincpu_parse(const char *opt)
|
|
|
|
{
|
|
|
|
int vcpu, pcpu;
|
|
|
|
|
|
|
|
if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
|
|
|
|
fprintf(stderr, "invalid format: %s\n", opt);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
|
|
|
|
fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
|
|
|
|
vcpu, VM_MAXCPU - 1);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
|
|
|
|
fprintf(stderr, "hostcpu '%d' outside valid range from "
|
|
|
|
"0 to %d\n", pcpu, CPU_SETSIZE - 1);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vcpumap[vcpu] == NULL) {
|
|
|
|
if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
|
|
|
|
perror("malloc");
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
CPU_ZERO(vcpumap[vcpu]);
|
|
|
|
}
|
|
|
|
CPU_SET(pcpu, vcpumap[vcpu]);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
|
|
|
|
int errcode)
|
|
|
|
{
|
|
|
|
struct vmctx *ctx;
|
|
|
|
int error, restart_instruction;
|
|
|
|
|
|
|
|
ctx = arg;
|
|
|
|
restart_instruction = 1;
|
|
|
|
|
|
|
|
error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
|
|
|
|
restart_instruction);
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vm_map_gpa(ctx, gaddr, len));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fbsdrun_vmexit_on_pause(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (guest_vmexit_on_pause);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fbsdrun_vmexit_on_hlt(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (guest_vmexit_on_hlt);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fbsdrun_virtio_msix(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (virtio_msix);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
fbsdrun_start_thread(void *param)
|
|
|
|
{
|
|
|
|
char tname[MAXCOMLEN + 1];
|
|
|
|
struct mt_vmm_info *mtp;
|
|
|
|
int vcpu;
|
|
|
|
|
|
|
|
mtp = param;
|
|
|
|
vcpu = mtp->mt_vcpu;
|
|
|
|
|
|
|
|
snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
|
|
|
|
pthread_set_name_np(mtp->mt_thr, tname);
|
|
|
|
|
2018-05-01 15:17:46 +00:00
|
|
|
gdb_cpu_add(vcpu);
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
|
|
|
|
|
|
|
|
/* not reached */
|
|
|
|
exit(1);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
assert(fromcpu == BSP);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The 'newcpu' must be activated in the context of 'fromcpu'. If
|
|
|
|
* vm_activate_cpu() is delayed until newcpu's pthread starts running
|
|
|
|
* then vmm.ko is out-of-sync with bhyve and this can create a race
|
|
|
|
* with vm_suspend().
|
|
|
|
*/
|
|
|
|
error = vm_activate_cpu(ctx, newcpu);
|
|
|
|
if (error != 0)
|
|
|
|
err(EX_OSERR, "could not activate CPU %d", newcpu);
|
|
|
|
|
|
|
|
CPU_SET_ATOMIC(newcpu, &cpumask);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up the vmexit struct to allow execution to start
|
|
|
|
* at the given RIP
|
|
|
|
*/
|
|
|
|
vmexit[newcpu].rip = rip;
|
|
|
|
vmexit[newcpu].inst_length = 0;
|
|
|
|
|
|
|
|
mt_vmm_info[newcpu].mt_ctx = ctx;
|
|
|
|
mt_vmm_info[newcpu].mt_vcpu = newcpu;
|
|
|
|
|
|
|
|
error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
|
|
|
|
fbsdrun_start_thread, &mt_vmm_info[newcpu]);
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!CPU_ISSET(vcpu, &cpumask)) {
|
|
|
|
fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
CPU_CLR_ATOMIC(vcpu, &cpumask);
|
|
|
|
return (CPU_EMPTY(&cpumask));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
|
|
|
|
uint32_t eax)
|
|
|
|
{
|
|
|
|
#if BHYVE_DEBUG
|
|
|
|
/*
|
|
|
|
* put guest-driven debug here
|
|
|
|
*/
|
|
|
|
#endif
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
int bytes, port, in, out;
|
|
|
|
int vcpu;
|
|
|
|
|
|
|
|
vcpu = *pvcpu;
|
|
|
|
|
|
|
|
port = vme->u.inout.port;
|
|
|
|
bytes = vme->u.inout.bytes;
|
|
|
|
in = vme->u.inout.in;
|
|
|
|
out = !in;
|
|
|
|
|
|
|
|
/* Extra-special case of host notifications */
|
|
|
|
if (out && port == GUEST_NIO_PORT) {
|
|
|
|
error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = emulate_inout(ctx, vcpu, vme, strictio);
|
|
|
|
if (error) {
|
|
|
|
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
|
|
|
|
in ? "in" : "out",
|
|
|
|
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
|
|
|
|
port, vmexit->rip);
|
|
|
|
return (VMEXIT_ABORT);
|
|
|
|
} else {
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
|
|
|
{
|
|
|
|
uint64_t val;
|
|
|
|
uint32_t eax, edx;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
val = 0;
|
|
|
|
error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
|
|
|
|
if (error != 0) {
|
|
|
|
fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
|
|
|
|
vme->u.msr.code, *pvcpu);
|
|
|
|
if (strictmsr) {
|
|
|
|
vm_inject_gp(ctx, *pvcpu);
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
eax = val;
|
|
|
|
error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
edx = val >> 32;
|
|
|
|
error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
|
|
|
|
if (error != 0) {
|
|
|
|
fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
|
|
|
|
vme->u.msr.code, vme->u.msr.wval, *pvcpu);
|
|
|
|
if (strictmsr) {
|
|
|
|
vm_inject_gp(ctx, *pvcpu);
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
2016-07-06 04:56:45 +00:00
|
|
|
(void)spinup_ap(ctx, *pvcpu,
|
|
|
|
vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
|
2016-05-27 06:22:24 +00:00
|
|
|
|
2016-07-06 04:56:45 +00:00
|
|
|
return (VMEXIT_CONTINUE);
|
2016-05-27 06:22:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define DEBUG_EPT_MISCONFIG
|
|
|
|
#ifdef DEBUG_EPT_MISCONFIG
|
|
|
|
#define EXIT_REASON_EPT_MISCONFIG 49
|
|
|
|
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
|
|
|
|
#define VMCS_IDENT(x) ((x) | 0x80000000)
|
|
|
|
|
|
|
|
static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
|
|
|
|
static int ept_misconfig_ptenum;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf(stderr, "vm exit[%d]\n", *pvcpu);
|
|
|
|
fprintf(stderr, "\treason\t\tVMX\n");
|
|
|
|
fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
|
|
|
|
fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
|
|
|
|
fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
|
|
|
|
fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
|
|
|
|
fprintf(stderr, "\tqualification\t0x%016lx\n",
|
|
|
|
vmexit->u.vmx.exit_qualification);
|
|
|
|
fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
|
|
|
|
fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
|
|
|
|
#ifdef DEBUG_EPT_MISCONFIG
|
|
|
|
if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
|
|
|
|
vm_get_register(ctx, *pvcpu,
|
|
|
|
VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
|
|
|
|
&ept_misconfig_gpa);
|
|
|
|
vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
|
|
|
|
&ept_misconfig_ptenum);
|
|
|
|
fprintf(stderr, "\tEPT misconfiguration:\n");
|
|
|
|
fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
|
|
|
|
fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
|
|
|
|
ept_misconfig_ptenum, ept_misconfig_pte[0],
|
|
|
|
ept_misconfig_pte[1], ept_misconfig_pte[2],
|
|
|
|
ept_misconfig_pte[3]);
|
|
|
|
}
|
|
|
|
#endif /* DEBUG_EPT_MISCONFIG */
|
|
|
|
return (VMEXIT_ABORT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf(stderr, "vm exit[%d]\n", *pvcpu);
|
|
|
|
fprintf(stderr, "\treason\t\tSVM\n");
|
|
|
|
fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
|
|
|
|
fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
|
|
|
|
fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
|
|
|
|
fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
|
|
|
|
fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
|
|
|
|
return (VMEXIT_ABORT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
assert(vmexit->inst_length == 0);
|
|
|
|
|
|
|
|
stats.vmexit_bogus++;
|
|
|
|
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
assert(vmexit->inst_length == 0);
|
|
|
|
|
|
|
|
stats.vmexit_reqidle++;
|
|
|
|
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
stats.vmexit_hlt++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Just continue execution with the next instruction. We use
|
|
|
|
* the HLT VM exit as a way to be friendly with the host
|
|
|
|
* scheduler.
|
|
|
|
*/
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
stats.vmexit_pause++;
|
|
|
|
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
assert(vmexit->inst_length == 0);
|
|
|
|
|
|
|
|
stats.vmexit_mtrap++;
|
|
|
|
|
2018-05-01 15:17:46 +00:00
|
|
|
gdb_cpu_mtrap(*pvcpu);
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
int err, i;
|
|
|
|
struct vie *vie;
|
|
|
|
|
|
|
|
stats.vmexit_inst_emul++;
|
|
|
|
|
|
|
|
vie = &vmexit->u.inst_emul.vie;
|
|
|
|
err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
|
|
|
|
vie, &vmexit->u.inst_emul.paging);
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
if (err == ESRCH) {
|
|
|
|
fprintf(stderr, "Unhandled memory access to 0x%lx\n",
|
|
|
|
vmexit->u.inst_emul.gpa);
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "Failed to emulate instruction [");
|
|
|
|
for (i = 0; i < vie->num_valid; i++) {
|
|
|
|
fprintf(stderr, "0x%02x%s", vie->inst[i],
|
|
|
|
i != (vie->num_valid - 1) ? " " : "");
|
|
|
|
}
|
|
|
|
fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
|
|
|
|
return (VMEXIT_ABORT);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
|
|
|
|
|
|
|
|
static int
|
|
|
|
vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
enum vm_suspend_how how;
|
|
|
|
|
|
|
|
how = vmexit->u.suspended.how;
|
|
|
|
|
|
|
|
fbsdrun_deletecpu(ctx, *pvcpu);
|
|
|
|
|
|
|
|
if (*pvcpu != BSP) {
|
|
|
|
pthread_mutex_lock(&resetcpu_mtx);
|
|
|
|
pthread_cond_signal(&resetcpu_cond);
|
|
|
|
pthread_mutex_unlock(&resetcpu_mtx);
|
|
|
|
pthread_exit(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_lock(&resetcpu_mtx);
|
|
|
|
while (!CPU_EMPTY(&cpumask)) {
|
|
|
|
pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&resetcpu_mtx);
|
|
|
|
|
|
|
|
switch (how) {
|
|
|
|
case VM_SUSPEND_RESET:
|
|
|
|
exit(0);
|
|
|
|
case VM_SUSPEND_POWEROFF:
|
|
|
|
exit(1);
|
|
|
|
case VM_SUSPEND_HALT:
|
|
|
|
exit(2);
|
|
|
|
case VM_SUSPEND_TRIPLEFAULT:
|
|
|
|
exit(3);
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
|
|
|
|
exit(100);
|
|
|
|
}
|
|
|
|
return (0); /* NOTREACHED */
|
|
|
|
}
|
|
|
|
|
2018-05-01 15:17:46 +00:00
|
|
|
static int
|
|
|
|
vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
|
|
|
{
|
|
|
|
|
|
|
|
gdb_cpu_suspend(*pvcpu);
|
|
|
|
return (VMEXIT_CONTINUE);
|
|
|
|
}
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
|
|
|
[VM_EXITCODE_INOUT] = vmexit_inout,
|
|
|
|
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
|
|
|
|
[VM_EXITCODE_VMX] = vmexit_vmx,
|
|
|
|
[VM_EXITCODE_SVM] = vmexit_svm,
|
|
|
|
[VM_EXITCODE_BOGUS] = vmexit_bogus,
|
|
|
|
[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
|
|
|
|
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
|
|
|
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
|
|
|
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
|
|
|
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
|
|
|
|
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
|
|
|
|
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
|
|
|
|
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
|
2018-05-01 15:17:46 +00:00
|
|
|
[VM_EXITCODE_DEBUG] = vmexit_debug,
|
2016-05-27 06:22:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
|
|
|
|
{
|
|
|
|
int error, rc;
|
|
|
|
enum vm_exitcode exitcode;
|
|
|
|
cpuset_t active_cpus;
|
|
|
|
|
|
|
|
if (vcpumap[vcpu] != NULL) {
|
|
|
|
error = pthread_setaffinity_np(pthread_self(),
|
|
|
|
sizeof(cpuset_t), vcpumap[vcpu]);
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = vm_active_cpus(ctx, &active_cpus);
|
|
|
|
assert(CPU_ISSET(vcpu, &active_cpus));
|
|
|
|
|
|
|
|
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
error = vm_run(ctx, vcpu, &vmexit[vcpu]);
|
|
|
|
if (error != 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
exitcode = vmexit[vcpu].exitcode;
|
|
|
|
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
|
|
|
|
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
|
|
|
|
exitcode);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
|
|
|
|
|
|
|
|
switch (rc) {
|
|
|
|
case VMEXIT_CONTINUE:
|
|
|
|
break;
|
|
|
|
case VMEXIT_ABORT:
|
|
|
|
abort();
|
|
|
|
default:
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
num_vcpus_allowed(struct vmctx *ctx)
|
|
|
|
{
|
|
|
|
int tmp, error;
|
|
|
|
|
|
|
|
error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The guest is allowed to spinup more than one processor only if the
|
|
|
|
* UNRESTRICTED_GUEST capability is available.
|
|
|
|
*/
|
|
|
|
if (error == 0)
|
|
|
|
return (VM_MAXCPU);
|
|
|
|
else
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
|
|
|
|
{
|
|
|
|
int err, tmp;
|
|
|
|
|
|
|
|
if (fbsdrun_vmexit_on_hlt()) {
|
|
|
|
err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
|
|
|
|
if (err < 0) {
|
|
|
|
fprintf(stderr, "VM exit on HLT not supported\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
|
|
|
|
if (cpu == BSP)
|
|
|
|
handler[VM_EXITCODE_HLT] = vmexit_hlt;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fbsdrun_vmexit_on_pause()) {
|
|
|
|
/*
|
|
|
|
* pause exit support required for this mode
|
|
|
|
*/
|
|
|
|
err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
|
|
|
|
if (err < 0) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"SMP mux requested, no pause support\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
|
|
|
|
if (cpu == BSP)
|
|
|
|
handler[VM_EXITCODE_PAUSE] = vmexit_pause;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (x2apic_mode)
|
|
|
|
err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
|
|
|
|
else
|
|
|
|
err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct vmctx *
|
|
|
|
do_open(const char *vmname)
|
|
|
|
{
|
|
|
|
struct vmctx *ctx;
|
|
|
|
int error;
|
|
|
|
bool reinit, romboot;
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
cap_rights_t rights;
|
|
|
|
const cap_ioctl_t *cmds;
|
|
|
|
size_t ncmds;
|
|
|
|
#endif
|
2016-05-27 06:22:24 +00:00
|
|
|
|
|
|
|
reinit = romboot = false;
|
|
|
|
|
|
|
|
if (lpc_bootrom())
|
|
|
|
romboot = true;
|
|
|
|
|
|
|
|
error = vm_create(vmname);
|
|
|
|
if (error) {
|
|
|
|
if (errno == EEXIST) {
|
|
|
|
if (romboot) {
|
|
|
|
reinit = true;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The virtual machine has been setup by the
|
|
|
|
* userspace bootloader.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
perror("vm_create");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!romboot) {
|
|
|
|
/*
|
|
|
|
* If the virtual machine was just created then a
|
|
|
|
* bootrom must be configured to boot it.
|
|
|
|
*/
|
|
|
|
fprintf(stderr, "virtual machine cannot be booted\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = vm_open(vmname);
|
|
|
|
if (ctx == NULL) {
|
|
|
|
perror("vm_open");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW);
|
|
|
|
if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 &&
|
|
|
|
errno != ENOSYS)
|
|
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
|
|
vm_get_ioctls(&ncmds);
|
|
|
|
cmds = vm_get_ioctls(NULL);
|
|
|
|
if (cmds == NULL)
|
|
|
|
errx(EX_OSERR, "out of memory");
|
|
|
|
if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 &&
|
|
|
|
errno != ENOSYS)
|
|
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
|
|
free((cap_ioctl_t *)cmds);
|
|
|
|
#endif
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
if (reinit) {
|
|
|
|
error = vm_reinit(ctx);
|
|
|
|
if (error) {
|
|
|
|
perror("vm_reinit");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
|
|
|
|
if (error)
|
|
|
|
errx(EX_OSERR, "vm_set_topology");
|
2016-05-27 06:22:24 +00:00
|
|
|
return (ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2018-05-01 15:17:46 +00:00
|
|
|
int c, error, dbg_port, gdb_port, err, bvmcons;
|
2016-05-27 06:22:24 +00:00
|
|
|
int max_vcpus, mptgen, memflags;
|
|
|
|
int rtc_localtime;
|
2018-05-01 15:17:46 +00:00
|
|
|
bool gdb_stop;
|
2016-05-27 06:22:24 +00:00
|
|
|
struct vmctx *ctx;
|
|
|
|
uint64_t rip;
|
|
|
|
size_t memsize;
|
|
|
|
char *optstr;
|
|
|
|
|
|
|
|
bvmcons = 0;
|
|
|
|
progname = basename(argv[0]);
|
2018-05-01 15:17:46 +00:00
|
|
|
dbg_port = 0;
|
2016-05-27 06:22:24 +00:00
|
|
|
gdb_port = 0;
|
2018-05-01 15:17:46 +00:00
|
|
|
gdb_stop = false;
|
2016-05-27 06:22:24 +00:00
|
|
|
guest_ncpus = 1;
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
sockets = cores = threads = 1;
|
|
|
|
maxcpus = 0;
|
2016-05-27 06:22:24 +00:00
|
|
|
memsize = 256 * MB;
|
|
|
|
mptgen = 1;
|
|
|
|
rtc_localtime = 1;
|
|
|
|
memflags = 0;
|
|
|
|
|
2018-05-01 15:17:46 +00:00
|
|
|
optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
|
2016-05-27 06:22:24 +00:00
|
|
|
while ((c = getopt(argc, argv, optstr)) != -1) {
|
|
|
|
switch (c) {
|
|
|
|
case 'a':
|
|
|
|
x2apic_mode = 0;
|
|
|
|
break;
|
|
|
|
case 'A':
|
|
|
|
acpi = 1;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
bvmcons = 1;
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
if (pincpu_parse(optarg) != 0) {
|
|
|
|
errx(EX_USAGE, "invalid vcpu pinning "
|
|
|
|
"configuration '%s'", optarg);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'c':
|
Add the ability to control the CPU topology of created VMs
from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).
The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.
Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse. [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.
bhyvectl --get-cpu-topology option added.
Reviewed by: grehan (maintainer, earlier version),
Reviewed by: bcr (manpages)
Approved by: bde (mentor), phk (mentor)
Tested by: Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after: 1 week
Relnotes: Y
Differential Revision: https://reviews.freebsd.org/D9930
2018-04-08 19:24:49 +00:00
|
|
|
if (topology_parse(optarg) != 0) {
|
|
|
|
errx(EX_USAGE, "invalid cpu topology "
|
|
|
|
"'%s'", optarg);
|
|
|
|
}
|
2016-05-27 06:22:24 +00:00
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
memflags |= VM_MEM_F_INCORE;
|
|
|
|
break;
|
|
|
|
case 'g':
|
2018-05-01 15:17:46 +00:00
|
|
|
dbg_port = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 'G':
|
|
|
|
if (optarg[0] == 'w') {
|
|
|
|
gdb_stop = true;
|
|
|
|
optarg++;
|
|
|
|
}
|
2016-05-27 06:22:24 +00:00
|
|
|
gdb_port = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
if (lpc_device_parse(optarg) != 0) {
|
|
|
|
errx(EX_USAGE, "invalid lpc device "
|
|
|
|
"configuration '%s'", optarg);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
if (pci_parse_slot(optarg) != 0)
|
|
|
|
exit(1);
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
case 'S':
|
|
|
|
memflags |= VM_MEM_F_WIRED;
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
error = vm_parse_memsize(optarg, &memsize);
|
|
|
|
if (error)
|
|
|
|
errx(EX_USAGE, "invalid memsize '%s'", optarg);
|
|
|
|
break;
|
|
|
|
case 'H':
|
|
|
|
guest_vmexit_on_hlt = 1;
|
|
|
|
break;
|
|
|
|
case 'I':
|
|
|
|
/*
|
|
|
|
* The "-I" option was used to add an ioapic to the
|
|
|
|
* virtual machine.
|
|
|
|
*
|
|
|
|
* An ioapic is now provided unconditionally for each
|
|
|
|
* virtual machine and this option is now deprecated.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
case 'P':
|
|
|
|
guest_vmexit_on_pause = 1;
|
|
|
|
break;
|
|
|
|
case 'e':
|
|
|
|
strictio = 1;
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
rtc_localtime = 0;
|
|
|
|
break;
|
|
|
|
case 'U':
|
|
|
|
guest_uuid_str = optarg;
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
strictmsr = 0;
|
|
|
|
break;
|
|
|
|
case 'W':
|
|
|
|
virtio_msix = 0;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
x2apic_mode = 1;
|
|
|
|
break;
|
|
|
|
case 'Y':
|
|
|
|
mptgen = 0;
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
usage(0);
|
|
|
|
default:
|
|
|
|
usage(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
argc -= optind;
|
|
|
|
argv += optind;
|
|
|
|
|
|
|
|
if (argc != 1)
|
|
|
|
usage(1);
|
|
|
|
|
|
|
|
vmname = argv[0];
|
|
|
|
ctx = do_open(vmname);
|
|
|
|
|
|
|
|
max_vcpus = num_vcpus_allowed(ctx);
|
|
|
|
if (guest_ncpus > max_vcpus) {
|
|
|
|
fprintf(stderr, "%d vCPUs requested but only %d available\n",
|
|
|
|
guest_ncpus, max_vcpus);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
fbsdrun_set_capabilities(ctx, BSP);
|
|
|
|
|
|
|
|
vm_set_memflags(ctx, memflags);
|
|
|
|
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = init_msr();
|
|
|
|
if (error) {
|
|
|
|
fprintf(stderr, "init_msr error %d", error);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
init_mem();
|
|
|
|
init_inout();
|
Initial bhyve native graphics support.
This adds emulations for a raw framebuffer device, PS2 keyboard/mouse,
XHCI USB controller and a USB tablet.
A simple VNC server is provided for keyboard/mouse input, and graphics
output.
A VGA emulation is included, but is currently disconnected until an
additional bhyve change to block out VGA memory is committed.
Credits:
- raw framebuffer, VNC server, XHCI controller, USB bus/device emulation
and UEFI f/w support by Leon Dang
- VGA, console/g, initial VNC server by tychon@
- PS2 keyboard/mouse jointly done by tychon@ and Leon Dang
- hypervisor framebuffer mem support by neel@
Tested by: Michael Dexter, in a number of revisions of this code.
With the appropriate UEFI image, FreeBSD, Windows and Linux guests can
installed and run in graphics mode using the UEFI/GOP framebuffer.
2016-05-27 06:30:35 +00:00
|
|
|
atkbdc_init(ctx);
|
2016-05-27 06:22:24 +00:00
|
|
|
pci_irq_init(ctx);
|
|
|
|
ioapic_init(ctx);
|
|
|
|
|
|
|
|
rtc_init(ctx, rtc_localtime);
|
|
|
|
sci_init(ctx);
|
|
|
|
|
|
|
|
/*
|
2016-11-08 23:59:41 +00:00
|
|
|
* Exit if a device emulation finds an error in its initilization
|
2016-05-27 06:22:24 +00:00
|
|
|
*/
|
|
|
|
if (init_pci(ctx) != 0)
|
|
|
|
exit(1);
|
|
|
|
|
2018-05-01 15:17:46 +00:00
|
|
|
if (dbg_port != 0)
|
|
|
|
init_dbgport(dbg_port);
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
if (gdb_port != 0)
|
2018-05-01 15:17:46 +00:00
|
|
|
init_gdb(ctx, gdb_port, gdb_stop);
|
2016-05-27 06:22:24 +00:00
|
|
|
|
|
|
|
if (bvmcons)
|
|
|
|
init_bvmcons();
|
|
|
|
|
|
|
|
if (lpc_bootrom()) {
|
|
|
|
if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
|
|
|
|
fprintf(stderr, "ROM boot failed: unrestricted guest "
|
|
|
|
"capability not available\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
error = vcpu_reset(ctx, BSP);
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* build the guest tables, MP etc.
|
|
|
|
*/
|
|
|
|
if (mptgen) {
|
|
|
|
error = mptable_build(ctx, guest_ncpus);
|
|
|
|
if (error)
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = smbios_build(ctx);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
if (acpi) {
|
|
|
|
error = acpi_build(ctx, guest_ncpus);
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lpc_bootrom())
|
|
|
|
fwctl_init();
|
|
|
|
|
2017-02-14 13:35:59 +00:00
|
|
|
#ifndef WITHOUT_CAPSICUM
|
|
|
|
caph_cache_catpages();
|
|
|
|
|
|
|
|
if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
|
|
|
|
errx(EX_OSERR, "Unable to apply rights for sandbox");
|
|
|
|
|
|
|
|
if (cap_enter() == -1 && errno != ENOSYS)
|
|
|
|
errx(EX_OSERR, "cap_enter() failed");
|
|
|
|
#endif
|
|
|
|
|
2016-05-27 06:22:24 +00:00
|
|
|
/*
|
|
|
|
* Change the proc title to include the VM name.
|
|
|
|
*/
|
|
|
|
setproctitle("%s", vmname);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add CPU 0
|
|
|
|
*/
|
|
|
|
fbsdrun_addcpu(ctx, BSP, BSP, rip);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Head off to the main event dispatch loop
|
|
|
|
*/
|
|
|
|
mevent_dispatch();
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
}
|