2011-05-13 04:54:01 +00:00
|
|
|
/*-
|
2023-05-10 15:40:58 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2017-11-27 15:37:16 +00:00
|
|
|
*
|
2011-05-13 04:54:01 +00:00
|
|
|
* Copyright (c) 2011 NetApp, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/linker_set.h>
|
2022-03-10 10:28:06 +00:00
|
|
|
#include <sys/mman.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
#include <ctype.h>
|
2021-11-22 16:13:09 +00:00
|
|
|
#include <err.h>
|
2016-07-06 16:02:15 +00:00
|
|
|
#include <errno.h>
|
2014-01-29 14:56:48 +00:00
|
|
|
#include <pthread.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <strings.h>
|
|
|
|
#include <assert.h>
|
2013-04-10 02:12:39 +00:00
|
|
|
#include <stdbool.h>
|
2021-11-22 15:24:47 +00:00
|
|
|
#include <sysexits.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
#include <machine/vmm.h>
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
#include <machine/vmm_snapshot.h>
|
2011-05-13 04:54:01 +00:00
|
|
|
#include <vmmapi.h>
|
|
|
|
|
2014-01-02 21:26:59 +00:00
|
|
|
#include "acpi.h"
|
2012-12-13 01:58:11 +00:00
|
|
|
#include "bhyverun.h"
|
2019-06-26 20:30:41 +00:00
|
|
|
#include "config.h"
|
2020-01-08 22:55:22 +00:00
|
|
|
#include "debug.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
#include "inout.h"
|
2014-01-29 14:56:48 +00:00
|
|
|
#include "ioapic.h"
|
2012-10-19 18:11:17 +00:00
|
|
|
#include "mem.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
#include "pci_emul.h"
|
2014-05-15 14:16:55 +00:00
|
|
|
#include "pci_irq.h"
|
2014-01-02 21:26:59 +00:00
|
|
|
#include "pci_lpc.h"
|
2023-02-06 10:43:49 +00:00
|
|
|
#include "pci_passthru.h"
|
2021-08-16 07:47:53 +00:00
|
|
|
#include "qemu_fwcfg.h"
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2019-05-31 18:00:44 +00:00
|
|
|
#define CONF1_ADDR_PORT 0x0cf8
|
|
|
|
#define CONF1_DATA_PORT 0x0cfc
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-06-28 05:01:25 +00:00
|
|
|
#define CONF1_ENABLE 0x80000000ul
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
#define MAXBUSES (PCI_BUSMAX + 1)
|
2012-08-06 06:51:27 +00:00
|
|
|
#define MAXSLOTS (PCI_SLOTMAX + 1)
|
|
|
|
#define MAXFUNCS (PCI_FUNCMAX + 1)
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2021-11-22 15:22:48 +00:00
|
|
|
#define GB (1024 * 1024 * 1024UL)
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
struct funcinfo {
|
2019-06-26 20:30:41 +00:00
|
|
|
nvlist_t *fi_config;
|
|
|
|
struct pci_devemu *fi_pde;
|
2014-01-29 14:56:48 +00:00
|
|
|
struct pci_devinst *fi_devi;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct intxinfo {
|
|
|
|
int ii_count;
|
2014-05-15 14:16:55 +00:00
|
|
|
int ii_pirq_pin;
|
2014-01-29 14:56:48 +00:00
|
|
|
int ii_ioapic_irq;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct slotinfo {
|
|
|
|
struct intxinfo si_intpins[4];
|
|
|
|
struct funcinfo si_funcs[MAXFUNCS];
|
2014-02-14 21:34:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct businfo {
|
|
|
|
uint16_t iobase, iolimit; /* I/O window */
|
|
|
|
uint32_t membase32, memlimit32; /* mmio window below 4GB */
|
|
|
|
uint64_t membase64, memlimit64; /* mmio window above 4GB */
|
|
|
|
struct slotinfo slotinfo[MAXSLOTS];
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct businfo *pci_businfo[MAXBUSES];
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
SET_DECLARE(pci_devemu_set, struct pci_devemu);
|
|
|
|
|
|
|
|
static uint64_t pci_emul_iobase;
|
2022-03-10 10:28:06 +00:00
|
|
|
static uint8_t *pci_emul_rombase;
|
|
|
|
static uint64_t pci_emul_romoffset;
|
|
|
|
static uint8_t *pci_emul_romlim;
|
2011-05-13 04:54:01 +00:00
|
|
|
static uint64_t pci_emul_membase32;
|
|
|
|
static uint64_t pci_emul_membase64;
|
2020-11-12 00:46:53 +00:00
|
|
|
static uint64_t pci_emul_memlim64;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2022-01-03 13:16:59 +00:00
|
|
|
struct pci_bar_allocation {
|
|
|
|
TAILQ_ENTRY(pci_bar_allocation) chain;
|
|
|
|
struct pci_devinst *pdi;
|
|
|
|
int idx;
|
|
|
|
enum pcibar_type type;
|
|
|
|
uint64_t size;
|
|
|
|
};
|
2022-10-08 15:26:25 +00:00
|
|
|
|
|
|
|
static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars =
|
|
|
|
TAILQ_HEAD_INITIALIZER(pci_bars);
|
2022-01-03 13:16:59 +00:00
|
|
|
|
2021-08-16 07:47:53 +00:00
|
|
|
struct boot_device {
|
|
|
|
TAILQ_ENTRY(boot_device) boot_device_chain;
|
|
|
|
struct pci_devinst *pdi;
|
|
|
|
int bootindex;
|
|
|
|
};
|
|
|
|
static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER(
|
|
|
|
boot_devices);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
#define PCI_EMUL_IOBASE 0x2000
|
|
|
|
#define PCI_EMUL_IOLIMIT 0x10000
|
|
|
|
|
2022-03-10 10:28:06 +00:00
|
|
|
#define PCI_EMUL_ROMSIZE 0x10000000
|
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
|
|
|
|
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
|
|
|
|
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
|
|
|
|
|
2021-11-22 15:24:47 +00:00
|
|
|
/*
|
|
|
|
* OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't
|
|
|
|
* change this address without changing it in OVMF.
|
|
|
|
*/
|
|
|
|
#define PCI_EMUL_MEMBASE32 0xC0000000
|
2014-08-08 03:49:01 +00:00
|
|
|
#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
|
2021-11-22 15:22:48 +00:00
|
|
|
#define PCI_EMUL_MEMSIZE64 (32*GB)
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2019-06-26 20:30:41 +00:00
|
|
|
static struct pci_devemu *pci_emul_finddev(const char *name);
|
2014-08-08 03:49:01 +00:00
|
|
|
static void pci_lintr_route(struct pci_devinst *pi);
|
|
|
|
static void pci_lintr_update(struct pci_devinst *pi);
|
2023-01-19 18:30:18 +00:00
|
|
|
static void pci_cfgrw(int in, int bus, int slot, int func, int coff,
|
|
|
|
int bytes, uint32_t *val);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2015-04-24 19:15:38 +00:00
|
|
|
static __inline void
|
|
|
|
CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (bytes == 1)
|
|
|
|
pci_set_cfgdata8(pi, coff, val);
|
|
|
|
else if (bytes == 2)
|
|
|
|
pci_set_cfgdata16(pi, coff, val);
|
|
|
|
else
|
|
|
|
pci_set_cfgdata32(pi, coff, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
CFGREAD(struct pci_devinst *pi, int coff, int bytes)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (bytes == 1)
|
|
|
|
return (pci_get_cfgdata8(pi, coff));
|
|
|
|
else if (bytes == 2)
|
|
|
|
return (pci_get_cfgdata16(pi, coff));
|
|
|
|
else
|
|
|
|
return (pci_get_cfgdata32(pi, coff));
|
|
|
|
}
|
|
|
|
|
2022-04-01 08:18:52 +00:00
|
|
|
static int
|
|
|
|
is_pcir_bar(int coff)
|
|
|
|
{
|
|
|
|
return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
is_pcir_bios(int coff)
|
|
|
|
{
|
|
|
|
return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
/*
|
|
|
|
* I/O access
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Slot options are in the form:
|
|
|
|
*
|
2014-02-14 21:34:08 +00:00
|
|
|
* <bus>:<slot>:<func>,<emul>[,<config>]
|
2012-08-06 06:51:27 +00:00
|
|
|
* <slot>[:<func>],<emul>[,<config>]
|
2011-05-13 04:54:01 +00:00
|
|
|
*
|
|
|
|
* slot is 0..31
|
2012-08-06 06:51:27 +00:00
|
|
|
* func is 0..7
|
2011-05-13 04:54:01 +00:00
|
|
|
* emul is a string describing the type of PCI device e.g. virtio-net
|
|
|
|
* config is an optional string, depending on the device, that can be
|
|
|
|
* used for configuration.
|
|
|
|
* Examples are:
|
|
|
|
* 1,virtio-net,tap0
|
2012-08-06 06:51:27 +00:00
|
|
|
* 3:0,dummy
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pci_parse_slot_usage(char *aopt)
|
|
|
|
{
|
2013-04-26 02:24:50 +00:00
|
|
|
|
2020-01-08 22:55:22 +00:00
|
|
|
EPRINTLN("Invalid PCI slot info field \"%s\"", aopt);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2019-06-26 20:30:41 +00:00
|
|
|
/*
|
|
|
|
* Helper function to parse a list of comma-separated options where
|
|
|
|
* each option is formatted as "name[=value]". If no value is
|
|
|
|
* provided, the option is treated as a boolean and is given a value
|
|
|
|
* of true.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pci_parse_legacy_config(nvlist_t *nvl, const char *opt)
|
|
|
|
{
|
|
|
|
char *config, *name, *tofree, *value;
|
|
|
|
|
|
|
|
if (opt == NULL)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
config = tofree = strdup(opt);
|
|
|
|
while ((name = strsep(&config, ",")) != NULL) {
|
|
|
|
value = strchr(name, '=');
|
|
|
|
if (value != NULL) {
|
|
|
|
*value = '\0';
|
|
|
|
value++;
|
|
|
|
set_config_value_node(nvl, name, value);
|
|
|
|
} else
|
|
|
|
set_config_bool_node(nvl, name, true);
|
|
|
|
}
|
|
|
|
free(tofree);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* PCI device configuration is stored in MIBs that encode the device's
|
|
|
|
* location:
|
|
|
|
*
|
|
|
|
* pci.<bus>.<slot>.<func>
|
|
|
|
*
|
|
|
|
* Where "bus", "slot", and "func" are all decimal values without
|
|
|
|
* leading zeroes. Each valid device must have a "device" node which
|
|
|
|
* identifies the driver model of the device.
|
|
|
|
*
|
|
|
|
* Device backends can provide a parser for the "config" string. If
|
|
|
|
* a custom parser is not provided, pci_parse_legacy_config() is used
|
|
|
|
* to parse the string.
|
|
|
|
*/
|
2013-04-26 02:24:50 +00:00
|
|
|
int
|
2014-01-27 22:26:15 +00:00
|
|
|
pci_parse_slot(char *opt)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2019-06-26 20:30:41 +00:00
|
|
|
char node_name[sizeof("pci.XXX.XX.X")];
|
|
|
|
struct pci_devemu *pde;
|
2014-02-14 21:34:08 +00:00
|
|
|
char *emul, *config, *str, *cp;
|
|
|
|
int error, bnum, snum, fnum;
|
2019-06-26 20:30:41 +00:00
|
|
|
nvlist_t *nvl;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2013-04-26 02:24:50 +00:00
|
|
|
error = -1;
|
2014-02-14 21:34:08 +00:00
|
|
|
str = strdup(opt);
|
|
|
|
|
|
|
|
emul = config = NULL;
|
|
|
|
if ((cp = strchr(str, ',')) != NULL) {
|
|
|
|
*cp = '\0';
|
|
|
|
emul = cp + 1;
|
|
|
|
if ((cp = strchr(emul, ',')) != NULL) {
|
|
|
|
*cp = '\0';
|
|
|
|
config = cp + 1;
|
|
|
|
}
|
|
|
|
} else {
|
2013-04-26 02:24:50 +00:00
|
|
|
pci_parse_slot_usage(opt);
|
|
|
|
goto done;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
/* <bus>:<slot>:<func> */
|
|
|
|
if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
|
|
|
|
bnum = 0;
|
|
|
|
/* <slot>:<func> */
|
|
|
|
if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
|
|
|
|
fnum = 0;
|
|
|
|
/* <slot> */
|
|
|
|
if (sscanf(str, "%d", &snum) != 1) {
|
|
|
|
snum = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-26 02:24:50 +00:00
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
|
|
|
|
fnum < 0 || fnum >= MAXFUNCS) {
|
2013-04-26 02:24:50 +00:00
|
|
|
pci_parse_slot_usage(opt);
|
|
|
|
goto done;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
2013-04-26 02:24:50 +00:00
|
|
|
|
2019-06-26 20:30:41 +00:00
|
|
|
pde = pci_emul_finddev(emul);
|
|
|
|
if (pde == NULL) {
|
|
|
|
EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum,
|
|
|
|
fnum, emul);
|
2013-04-26 02:24:50 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2019-06-26 20:30:41 +00:00
|
|
|
snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum,
|
|
|
|
fnum);
|
|
|
|
nvl = find_config_node(node_name);
|
|
|
|
if (nvl != NULL) {
|
|
|
|
EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum,
|
|
|
|
fnum);
|
2013-04-26 02:24:50 +00:00
|
|
|
goto done;
|
|
|
|
}
|
2019-06-26 20:30:41 +00:00
|
|
|
nvl = create_config_node(node_name);
|
|
|
|
if (pde->pe_alias != NULL)
|
|
|
|
set_config_value_node(nvl, "device", pde->pe_alias);
|
|
|
|
else
|
|
|
|
set_config_value_node(nvl, "device", pde->pe_emu);
|
2013-04-26 02:24:50 +00:00
|
|
|
|
2019-06-26 20:30:41 +00:00
|
|
|
if (pde->pe_legacy_config != NULL)
|
|
|
|
error = pde->pe_legacy_config(nvl, config);
|
|
|
|
else
|
|
|
|
error = pci_parse_legacy_config(nvl, config);
|
2013-04-26 02:24:50 +00:00
|
|
|
done:
|
2019-06-26 20:30:41 +00:00
|
|
|
free(str);
|
2013-04-26 02:24:50 +00:00
|
|
|
return (error);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2018-08-22 20:23:08 +00:00
|
|
|
void
|
2022-08-16 17:10:58 +00:00
|
|
|
pci_print_supported_devices(void)
|
2018-08-22 20:23:08 +00:00
|
|
|
{
|
|
|
|
struct pci_devemu **pdpp, *pdp;
|
|
|
|
|
|
|
|
SET_FOREACH(pdpp, pci_devemu_set) {
|
|
|
|
pdp = *pdpp;
|
|
|
|
printf("%s\n", pdp->pe_emu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-06 10:43:49 +00:00
|
|
|
uint32_t
|
|
|
|
pci_config_read_reg(const struct pcisel *const host_sel, nvlist_t *nvl,
|
|
|
|
const uint32_t reg, const uint8_t size, const uint32_t def)
|
|
|
|
{
|
|
|
|
const char *config;
|
|
|
|
const nvlist_t *pci_regs;
|
|
|
|
|
|
|
|
assert(size == 1 || size == 2 || size == 4);
|
|
|
|
|
|
|
|
pci_regs = find_relative_config_node(nvl, "pcireg");
|
|
|
|
if (pci_regs == NULL) {
|
|
|
|
return def;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (reg) {
|
|
|
|
case PCIR_DEVICE:
|
|
|
|
config = get_config_value_node(pci_regs, "device");
|
|
|
|
break;
|
|
|
|
case PCIR_VENDOR:
|
|
|
|
config = get_config_value_node(pci_regs, "vendor");
|
|
|
|
break;
|
|
|
|
case PCIR_REVID:
|
|
|
|
config = get_config_value_node(pci_regs, "revid");
|
|
|
|
break;
|
|
|
|
case PCIR_SUBVEND_0:
|
|
|
|
config = get_config_value_node(pci_regs, "subvendor");
|
|
|
|
break;
|
|
|
|
case PCIR_SUBDEV_0:
|
|
|
|
config = get_config_value_node(pci_regs, "subdevice");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (config == NULL) {
|
|
|
|
return def;
|
|
|
|
} else if (host_sel != NULL && strcmp(config, "host") == 0) {
|
|
|
|
return read_config(host_sel, reg, size);
|
|
|
|
} else {
|
|
|
|
return strtol(config, NULL, 16);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-30 04:30:36 +00:00
|
|
|
static int
|
|
|
|
pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (offset < pi->pi_msix.pba_offset)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
|
|
|
|
uint64_t value)
|
|
|
|
{
|
|
|
|
int msix_entry_offset;
|
|
|
|
int tab_index;
|
|
|
|
char *dest;
|
|
|
|
|
|
|
|
/* support only 4 or 8 byte writes */
|
|
|
|
if (size != 4 && size != 8)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return if table index is beyond what device supports
|
|
|
|
*/
|
|
|
|
tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
|
|
|
|
if (tab_index >= pi->pi_msix.table_count)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
|
|
|
|
|
|
|
/* support only aligned writes */
|
|
|
|
if ((msix_entry_offset % size) != 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
dest = (char *)(pi->pi_msix.table + tab_index);
|
|
|
|
dest += msix_entry_offset;
|
|
|
|
|
|
|
|
if (size == 4)
|
|
|
|
*((uint32_t *)dest) = value;
|
|
|
|
else
|
|
|
|
*((uint64_t *)dest) = value;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
|
|
|
|
{
|
|
|
|
char *dest;
|
|
|
|
int msix_entry_offset;
|
|
|
|
int tab_index;
|
|
|
|
uint64_t retval = ~0;
|
|
|
|
|
2013-08-27 16:50:48 +00:00
|
|
|
/*
|
|
|
|
* The PCI standard only allows 4 and 8 byte accesses to the MSI-X
|
2015-10-21 05:37:09 +00:00
|
|
|
* table but we also allow 1 byte access to accommodate reads from
|
2013-08-27 16:50:48 +00:00
|
|
|
* ddb.
|
|
|
|
*/
|
|
|
|
if (size != 1 && size != 4 && size != 8)
|
2013-01-30 04:30:36 +00:00
|
|
|
return (retval);
|
|
|
|
|
|
|
|
msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
|
|
|
|
|
|
|
/* support only aligned reads */
|
|
|
|
if ((msix_entry_offset % size) != 0) {
|
|
|
|
return (retval);
|
|
|
|
}
|
|
|
|
|
|
|
|
tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
|
|
|
|
|
|
|
|
if (tab_index < pi->pi_msix.table_count) {
|
|
|
|
/* valid MSI-X Table access */
|
|
|
|
dest = (char *)(pi->pi_msix.table + tab_index);
|
|
|
|
dest += msix_entry_offset;
|
|
|
|
|
2013-08-27 16:50:48 +00:00
|
|
|
if (size == 1)
|
|
|
|
retval = *((uint8_t *)dest);
|
|
|
|
else if (size == 4)
|
2013-01-30 04:30:36 +00:00
|
|
|
retval = *((uint32_t *)dest);
|
|
|
|
else
|
|
|
|
retval = *((uint64_t *)dest);
|
|
|
|
} else if (pci_valid_pba_offset(pi, offset)) {
|
|
|
|
/* return 0 for PBA access */
|
|
|
|
retval = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (retval);
|
|
|
|
}
|
|
|
|
|
2013-02-01 02:41:47 +00:00
|
|
|
int
|
|
|
|
pci_msix_table_bar(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (pi->pi_msix.table != NULL)
|
|
|
|
return (pi->pi_msix.table_bar);
|
|
|
|
else
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_msix_pba_bar(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (pi->pi_msix.table != NULL)
|
|
|
|
return (pi->pi_msix.pba_bar);
|
|
|
|
else
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
static int
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_emul_io_handler(struct vmctx *ctx __unused, int in, int port,
|
2022-12-09 18:35:28 +00:00
|
|
|
int bytes, uint32_t *eax, void *arg)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
struct pci_devinst *pdi = arg;
|
|
|
|
struct pci_devemu *pe = pdi->pi_d;
|
2012-10-19 18:11:17 +00:00
|
|
|
uint64_t offset;
|
|
|
|
int i;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2022-10-23 14:32:45 +00:00
|
|
|
assert(port >= 0);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
for (i = 0; i <= PCI_BARMAX; i++) {
|
|
|
|
if (pdi->pi_bar[i].type == PCIBAR_IO &&
|
2022-10-23 14:32:45 +00:00
|
|
|
(uint64_t)port >= pdi->pi_bar[i].addr &&
|
|
|
|
(uint64_t)port + bytes <=
|
|
|
|
pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
|
2011-05-13 04:54:01 +00:00
|
|
|
offset = port - pdi->pi_bar[i].addr;
|
|
|
|
if (in)
|
2023-01-19 18:30:18 +00:00
|
|
|
*eax = (*pe->pe_barread)(pdi, i,
|
2012-10-19 18:11:17 +00:00
|
|
|
offset, bytes);
|
2011-05-13 04:54:01 +00:00
|
|
|
else
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_barwrite)(pdi, i, offset,
|
2012-10-19 18:11:17 +00:00
|
|
|
bytes, *eax);
|
2011-05-13 04:54:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
static int
|
2023-03-24 18:49:06 +00:00
|
|
|
pci_emul_mem_handler(struct vcpu *vcpu __unused, int dir,
|
2022-12-09 18:35:28 +00:00
|
|
|
uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
|
2012-10-19 18:11:17 +00:00
|
|
|
{
|
|
|
|
struct pci_devinst *pdi = arg1;
|
|
|
|
struct pci_devemu *pe = pdi->pi_d;
|
|
|
|
uint64_t offset;
|
|
|
|
int bidx = (int) arg2;
|
|
|
|
|
|
|
|
assert(bidx <= PCI_BARMAX);
|
|
|
|
assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
|
|
|
|
pdi->pi_bar[bidx].type == PCIBAR_MEM64);
|
|
|
|
assert(addr >= pdi->pi_bar[bidx].addr &&
|
|
|
|
addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
|
|
|
|
|
|
|
|
offset = addr - pdi->pi_bar[bidx].addr;
|
|
|
|
|
2014-06-06 16:18:37 +00:00
|
|
|
if (dir == MEM_F_WRITE) {
|
2014-06-09 19:55:50 +00:00
|
|
|
if (size == 8) {
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_barwrite)(pdi, bidx, offset,
|
2014-06-06 16:18:37 +00:00
|
|
|
4, *val & 0xffffffff);
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_barwrite)(pdi, bidx, offset + 4,
|
2014-06-06 16:18:37 +00:00
|
|
|
4, *val >> 32);
|
|
|
|
} else {
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_barwrite)(pdi, bidx, offset,
|
2014-06-06 16:18:37 +00:00
|
|
|
size, *val);
|
|
|
|
}
|
|
|
|
} else {
|
2014-06-09 19:55:50 +00:00
|
|
|
if (size == 8) {
|
2023-01-19 18:30:18 +00:00
|
|
|
*val = (*pe->pe_barread)(pdi, bidx,
|
2014-06-06 16:18:37 +00:00
|
|
|
offset, 4);
|
2023-01-19 18:30:18 +00:00
|
|
|
*val |= (*pe->pe_barread)(pdi, bidx,
|
2014-06-06 16:18:37 +00:00
|
|
|
offset + 4, 4) << 32;
|
|
|
|
} else {
|
2023-01-19 18:30:18 +00:00
|
|
|
*val = (*pe->pe_barread)(pdi, bidx,
|
2014-06-06 16:18:37 +00:00
|
|
|
offset, size);
|
|
|
|
}
|
|
|
|
}
|
2012-10-19 18:11:17 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
static int
|
|
|
|
pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
|
|
|
|
uint64_t *addr)
|
|
|
|
{
|
|
|
|
uint64_t base;
|
|
|
|
|
|
|
|
assert((size & (size - 1)) == 0); /* must be a power of 2 */
|
|
|
|
|
|
|
|
base = roundup2(*baseptr, size);
|
|
|
|
|
|
|
|
if (base + size <= limit) {
|
|
|
|
*addr = base;
|
|
|
|
*baseptr = base + size;
|
|
|
|
return (0);
|
|
|
|
} else
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2013-04-10 02:12:39 +00:00
|
|
|
/*
|
|
|
|
* Register (or unregister) the MMIO or I/O region associated with the BAR
|
|
|
|
* register 'idx' of an emulated pci device.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
|
|
|
|
{
|
2021-03-18 16:08:52 +00:00
|
|
|
struct pci_devemu *pe;
|
2013-04-10 02:12:39 +00:00
|
|
|
int error;
|
|
|
|
struct inout_port iop;
|
|
|
|
struct mem_range mr;
|
|
|
|
|
2021-03-18 16:08:52 +00:00
|
|
|
pe = pi->pi_d;
|
2013-04-10 02:12:39 +00:00
|
|
|
switch (pi->pi_bar[idx].type) {
|
|
|
|
case PCIBAR_IO:
|
|
|
|
bzero(&iop, sizeof(struct inout_port));
|
|
|
|
iop.name = pi->pi_name;
|
|
|
|
iop.port = pi->pi_bar[idx].addr;
|
|
|
|
iop.size = pi->pi_bar[idx].size;
|
|
|
|
if (registration) {
|
|
|
|
iop.flags = IOPORT_F_INOUT;
|
|
|
|
iop.handler = pci_emul_io_handler;
|
|
|
|
iop.arg = pi;
|
|
|
|
error = register_inout(&iop);
|
2019-05-31 18:00:44 +00:00
|
|
|
} else
|
2013-04-10 02:12:39 +00:00
|
|
|
error = unregister_inout(&iop);
|
2021-03-18 16:08:52 +00:00
|
|
|
if (pe->pe_baraddr != NULL)
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_baraddr)(pi, idx, registration,
|
2021-03-18 16:08:52 +00:00
|
|
|
pi->pi_bar[idx].addr);
|
2013-04-10 02:12:39 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_MEM32:
|
|
|
|
case PCIBAR_MEM64:
|
|
|
|
bzero(&mr, sizeof(struct mem_range));
|
|
|
|
mr.name = pi->pi_name;
|
|
|
|
mr.base = pi->pi_bar[idx].addr;
|
|
|
|
mr.size = pi->pi_bar[idx].size;
|
|
|
|
if (registration) {
|
|
|
|
mr.flags = MEM_F_RW;
|
|
|
|
mr.handler = pci_emul_mem_handler;
|
|
|
|
mr.arg1 = pi;
|
|
|
|
mr.arg2 = idx;
|
|
|
|
error = register_mem(&mr);
|
|
|
|
} else
|
|
|
|
error = unregister_mem(&mr);
|
2021-03-18 16:08:52 +00:00
|
|
|
if (pe->pe_baraddr != NULL)
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_baraddr)(pi, idx, registration,
|
2021-03-18 16:08:52 +00:00
|
|
|
pi->pi_bar[idx].addr);
|
2013-04-10 02:12:39 +00:00
|
|
|
break;
|
2022-03-10 10:28:06 +00:00
|
|
|
case PCIBAR_ROM:
|
|
|
|
error = 0;
|
|
|
|
if (pe->pe_baraddr != NULL)
|
2023-01-19 18:30:18 +00:00
|
|
|
(*pe->pe_baraddr)(pi, idx, registration,
|
2022-03-10 10:28:06 +00:00
|
|
|
pi->pi_bar[idx].addr);
|
|
|
|
break;
|
2013-04-10 02:12:39 +00:00
|
|
|
default:
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
unregister_bar(struct pci_devinst *pi, int idx)
|
|
|
|
{
|
|
|
|
|
|
|
|
modify_bar_registration(pi, idx, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
register_bar(struct pci_devinst *pi, int idx)
|
|
|
|
{
|
|
|
|
|
|
|
|
modify_bar_registration(pi, idx, 1);
|
|
|
|
}
|
|
|
|
|
2022-03-10 10:28:06 +00:00
|
|
|
/* Is the ROM enabled for the emulated pci device? */
|
|
|
|
static int
|
|
|
|
romen(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
|
|
|
|
PCIM_BIOS_ENABLE;
|
|
|
|
}
|
|
|
|
|
2013-04-10 02:12:39 +00:00
|
|
|
/* Are we decoding i/o port accesses for the emulated pci device? */
|
|
|
|
static int
|
|
|
|
porten(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
uint16_t cmd;
|
|
|
|
|
|
|
|
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
|
|
|
|
|
|
|
|
return (cmd & PCIM_CMD_PORTEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Are we decoding memory accesses for the emulated pci device? */
|
|
|
|
static int
|
|
|
|
memen(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
uint16_t cmd;
|
|
|
|
|
|
|
|
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
|
|
|
|
|
|
|
|
return (cmd & PCIM_CMD_MEMEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the MMIO or I/O address that is decoded by the BAR register.
|
|
|
|
*
|
|
|
|
* If the pci device has enabled the address space decoding then intercept
|
|
|
|
* the address range decoded by the BAR register.
|
|
|
|
*/
|
|
|
|
static void
|
2019-05-31 18:00:44 +00:00
|
|
|
update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
|
2013-04-10 02:12:39 +00:00
|
|
|
{
|
|
|
|
int decode;
|
|
|
|
|
|
|
|
if (pi->pi_bar[idx].type == PCIBAR_IO)
|
|
|
|
decode = porten(pi);
|
|
|
|
else
|
|
|
|
decode = memen(pi);
|
|
|
|
|
|
|
|
if (decode)
|
|
|
|
unregister_bar(pi, idx);
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case PCIBAR_IO:
|
|
|
|
case PCIBAR_MEM32:
|
|
|
|
pi->pi_bar[idx].addr = addr;
|
|
|
|
break;
|
|
|
|
case PCIBAR_MEM64:
|
|
|
|
pi->pi_bar[idx].addr &= ~0xffffffffUL;
|
|
|
|
pi->pi_bar[idx].addr |= addr;
|
|
|
|
break;
|
|
|
|
case PCIBAR_MEMHI64:
|
|
|
|
pi->pi_bar[idx].addr &= 0xffffffff;
|
|
|
|
pi->pi_bar[idx].addr |= addr;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (decode)
|
|
|
|
register_bar(pi, idx);
|
|
|
|
}
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
int
|
2020-11-12 02:52:01 +00:00
|
|
|
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
|
|
|
|
uint64_t size)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2022-03-10 10:28:06 +00:00
|
|
|
assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX));
|
|
|
|
assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX));
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
if ((size & (size - 1)) != 0)
|
|
|
|
size = 1UL << flsl(size); /* round up to a power of 2 */
|
|
|
|
|
2013-04-10 02:12:39 +00:00
|
|
|
/* Enforce minimum BAR sizes required by the PCI standard */
|
|
|
|
if (type == PCIBAR_IO) {
|
|
|
|
if (size < 4)
|
|
|
|
size = 4;
|
2022-03-10 10:28:06 +00:00
|
|
|
} else if (type == PCIBAR_ROM) {
|
|
|
|
if (size < ~PCIM_BIOS_ADDR_MASK + 1)
|
|
|
|
size = ~PCIM_BIOS_ADDR_MASK + 1;
|
2013-04-10 02:12:39 +00:00
|
|
|
} else {
|
|
|
|
if (size < 16)
|
|
|
|
size = 16;
|
|
|
|
}
|
|
|
|
|
2022-01-03 13:16:59 +00:00
|
|
|
/*
|
|
|
|
* To reduce fragmentation of the MMIO space, we allocate the BARs by
|
|
|
|
* size. Therefore, don't allocate the BAR yet. We create a list of all
|
|
|
|
* BAR allocation which is sorted by BAR size. When all PCI devices are
|
|
|
|
* initialized, we will assign an address to the BARs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* create a new list entry */
|
|
|
|
struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar));
|
|
|
|
memset(new_bar, 0, sizeof(*new_bar));
|
|
|
|
new_bar->pdi = pdi;
|
|
|
|
new_bar->idx = idx;
|
|
|
|
new_bar->type = type;
|
|
|
|
new_bar->size = size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search for a BAR which size is lower than the size of our newly
|
|
|
|
* allocated BAR.
|
|
|
|
*/
|
|
|
|
struct pci_bar_allocation *bar = NULL;
|
|
|
|
TAILQ_FOREACH(bar, &pci_bars, chain) {
|
|
|
|
if (bar->size < size) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bar == NULL) {
|
|
|
|
/*
|
|
|
|
* Either the list is empty or new BAR is the smallest BAR of
|
|
|
|
* the list. Append it to the end of our list.
|
|
|
|
*/
|
|
|
|
TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The found BAR is smaller than our new BAR. For that reason,
|
|
|
|
* insert our new BAR before the found BAR.
|
|
|
|
*/
|
|
|
|
TAILQ_INSERT_BEFORE(bar, new_bar, chain);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pci_passthru devices synchronize their physical and virtual command
|
|
|
|
* register on init. For that reason, the virtual cmd reg should be
|
|
|
|
* updated as early as possible.
|
|
|
|
*/
|
|
|
|
uint16_t enbit = 0;
|
|
|
|
switch (type) {
|
|
|
|
case PCIBAR_IO:
|
|
|
|
enbit = PCIM_CMD_PORTEN;
|
|
|
|
break;
|
|
|
|
case PCIBAR_MEM64:
|
|
|
|
case PCIBAR_MEM32:
|
|
|
|
enbit = PCIM_CMD_MEMEN;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
enbit = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
|
|
|
|
pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
|
|
|
|
const enum pcibar_type type, const uint64_t size)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
uint64_t *baseptr, limit, addr, mask, lobits, bar;
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
switch (type) {
|
|
|
|
case PCIBAR_NONE:
|
|
|
|
baseptr = NULL;
|
2022-01-03 13:16:59 +00:00
|
|
|
addr = mask = lobits = 0;
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_IO:
|
2014-01-27 22:26:15 +00:00
|
|
|
baseptr = &pci_emul_iobase;
|
2011-05-13 04:54:01 +00:00
|
|
|
limit = PCI_EMUL_IOLIMIT;
|
|
|
|
mask = PCIM_BAR_IO_BASE;
|
|
|
|
lobits = PCIM_BAR_IO_SPACE;
|
|
|
|
break;
|
|
|
|
case PCIBAR_MEM64:
|
|
|
|
/*
|
|
|
|
* XXX
|
|
|
|
* Some drivers do not work well if the 64-bit BAR is allocated
|
|
|
|
* above 4GB. Allow for this by allocating small requests under
|
|
|
|
* 4GB unless then allocation size is larger than some arbitrary
|
2020-11-12 00:51:53 +00:00
|
|
|
* number (128MB currently).
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2020-11-12 00:51:53 +00:00
|
|
|
if (size > 128 * 1024 * 1024) {
|
2020-11-12 02:52:01 +00:00
|
|
|
baseptr = &pci_emul_membase64;
|
2020-11-12 00:46:53 +00:00
|
|
|
limit = pci_emul_memlim64;
|
2011-05-13 04:54:01 +00:00
|
|
|
mask = PCIM_BAR_MEM_BASE;
|
|
|
|
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
|
|
|
|
PCIM_BAR_MEM_PREFETCH;
|
2012-08-06 07:20:25 +00:00
|
|
|
} else {
|
|
|
|
baseptr = &pci_emul_membase32;
|
|
|
|
limit = PCI_EMUL_MEMLIMIT32;
|
|
|
|
mask = PCIM_BAR_MEM_BASE;
|
|
|
|
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
2012-08-06 07:20:25 +00:00
|
|
|
break;
|
2011-05-13 04:54:01 +00:00
|
|
|
case PCIBAR_MEM32:
|
|
|
|
baseptr = &pci_emul_membase32;
|
|
|
|
limit = PCI_EMUL_MEMLIMIT32;
|
|
|
|
mask = PCIM_BAR_MEM_BASE;
|
|
|
|
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
|
|
|
|
break;
|
2022-03-10 10:28:06 +00:00
|
|
|
case PCIBAR_ROM:
|
|
|
|
/* do not claim memory for ROM. OVMF will do it for us. */
|
|
|
|
baseptr = NULL;
|
|
|
|
limit = 0;
|
|
|
|
mask = PCIM_BIOS_ADDR_MASK;
|
|
|
|
lobits = 0;
|
|
|
|
break;
|
2011-05-13 04:54:01 +00:00
|
|
|
default:
|
|
|
|
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (baseptr != NULL) {
|
|
|
|
error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
2022-04-01 08:19:53 +00:00
|
|
|
} else {
|
|
|
|
addr = 0;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pdi->pi_bar[idx].type = type;
|
|
|
|
pdi->pi_bar[idx].addr = addr;
|
|
|
|
pdi->pi_bar[idx].size = size;
|
2021-11-18 15:25:09 +00:00
|
|
|
/*
|
|
|
|
* passthru devices are using same lobits as physical device they set
|
|
|
|
* this property
|
|
|
|
*/
|
|
|
|
if (pdi->pi_bar[idx].lobits != 0) {
|
|
|
|
lobits = pdi->pi_bar[idx].lobits;
|
|
|
|
} else {
|
|
|
|
pdi->pi_bar[idx].lobits = lobits;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
/* Initialize the BAR register in config space */
|
|
|
|
bar = (addr & mask) | lobits;
|
|
|
|
pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
|
|
|
|
|
|
|
|
if (type == PCIBAR_MEM64) {
|
|
|
|
assert(idx + 1 <= PCI_BARMAX);
|
|
|
|
pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
|
|
|
|
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
|
|
|
|
}
|
2019-05-31 18:00:44 +00:00
|
|
|
|
2022-03-10 10:28:06 +00:00
|
|
|
if (type != PCIBAR_ROM) {
|
|
|
|
register_bar(pdi, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
|
|
|
|
void **const addr)
|
|
|
|
{
|
|
|
|
/* allocate ROM space once on first call */
|
|
|
|
if (pci_emul_rombase == 0) {
|
|
|
|
pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
|
|
|
|
"pcirom", PCI_EMUL_ROMSIZE);
|
|
|
|
if (pci_emul_rombase == MAP_FAILED) {
|
|
|
|
warnx("%s: failed to create rom segment", __func__);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
|
|
|
|
pci_emul_romoffset = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ROM size should be a power of 2 and greater than 2 KB */
|
|
|
|
const uint64_t rom_size = MAX(1UL << flsl(size),
|
|
|
|
~PCIM_BIOS_ADDR_MASK + 1);
|
|
|
|
|
|
|
|
/* check if ROM fits into ROM space */
|
|
|
|
if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
|
|
|
|
warnx("%s: no space left in rom segment:", __func__);
|
|
|
|
warnx("%16lu bytes left",
|
|
|
|
PCI_EMUL_ROMSIZE - pci_emul_romoffset);
|
|
|
|
warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
|
|
|
|
pdi->pi_slot, pdi->pi_func);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate ROM BAR */
|
|
|
|
const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
|
|
|
|
rom_size);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
/* return address */
|
|
|
|
*addr = pci_emul_rombase + pci_emul_romoffset;
|
|
|
|
|
|
|
|
/* save offset into ROM Space */
|
|
|
|
pdi->pi_romoffset = pci_emul_romoffset;
|
|
|
|
|
|
|
|
/* increase offset for next ROM */
|
|
|
|
pci_emul_romoffset += rom_size;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2021-08-16 07:47:53 +00:00
|
|
|
int
|
|
|
|
pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex)
|
|
|
|
{
|
|
|
|
struct boot_device *new_device, *device;
|
|
|
|
|
|
|
|
/* don't permit a negative bootindex */
|
|
|
|
if (bootindex < 0) {
|
|
|
|
errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* alloc new boot device */
|
|
|
|
new_device = calloc(1, sizeof(struct boot_device));
|
|
|
|
if (new_device == NULL) {
|
|
|
|
return (ENOMEM);
|
|
|
|
}
|
|
|
|
new_device->pdi = pi;
|
|
|
|
new_device->bootindex = bootindex;
|
|
|
|
|
|
|
|
/* search for boot device with higher boot index */
|
|
|
|
TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
|
|
|
|
if (device->bootindex == bootindex) {
|
|
|
|
errx(4,
|
|
|
|
"Could not set bootindex %d for %s. Bootindex already occupied by %s",
|
|
|
|
bootindex, pi->pi_name, device->pdi->pi_name);
|
|
|
|
} else if (device->bootindex > bootindex) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* add boot device to queue */
|
|
|
|
if (device == NULL) {
|
|
|
|
TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain);
|
|
|
|
} else {
|
|
|
|
TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
#define CAP_START_OFFSET 0x40
|
|
|
|
static int
|
|
|
|
pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
|
|
|
|
{
|
2014-02-18 03:00:20 +00:00
|
|
|
int i, capoff, reallen;
|
2011-05-13 04:54:01 +00:00
|
|
|
uint16_t sts;
|
|
|
|
|
2014-02-18 03:00:20 +00:00
|
|
|
assert(caplen > 0);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
reallen = roundup2(caplen, 4); /* dword aligned */
|
|
|
|
|
|
|
|
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
2014-02-18 03:00:20 +00:00
|
|
|
if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
|
2011-05-13 04:54:01 +00:00
|
|
|
capoff = CAP_START_OFFSET;
|
2014-02-18 03:00:20 +00:00
|
|
|
else
|
|
|
|
capoff = pi->pi_capend + 1;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
/* Check if we have enough space */
|
2014-02-18 03:00:20 +00:00
|
|
|
if (capoff + reallen > PCI_REGMAX + 1)
|
2011-05-13 04:54:01 +00:00
|
|
|
return (-1);
|
|
|
|
|
2014-02-18 03:00:20 +00:00
|
|
|
/* Set the previous capability pointer */
|
|
|
|
if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
|
|
|
|
pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
|
|
|
|
pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
|
|
|
|
} else
|
|
|
|
pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
/* Copy the capability */
|
|
|
|
for (i = 0; i < caplen; i++)
|
|
|
|
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
|
|
|
|
|
|
|
/* Set the next capability pointer */
|
2014-02-18 03:00:20 +00:00
|
|
|
pci_set_cfgdata8(pi, capoff + 1, 0);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2014-02-18 03:00:20 +00:00
|
|
|
pi->pi_prevcap = capoff;
|
|
|
|
pi->pi_capend = capoff + reallen - 1;
|
2011-05-13 04:54:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct pci_devemu *
|
2019-06-26 20:30:41 +00:00
|
|
|
pci_emul_finddev(const char *name)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
struct pci_devemu **pdpp, *pdp;
|
|
|
|
|
|
|
|
SET_FOREACH(pdpp, pci_devemu_set) {
|
|
|
|
pdp = *pdpp;
|
|
|
|
if (!strcmp(pdp->pe_emu, name)) {
|
|
|
|
return (pdp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2013-07-04 05:35:56 +00:00
|
|
|
static int
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
|
|
|
|
int func, struct funcinfo *fi)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
struct pci_devinst *pdi;
|
2013-07-04 05:35:56 +00:00
|
|
|
int err;
|
|
|
|
|
2014-04-22 18:55:21 +00:00
|
|
|
pdi = calloc(1, sizeof(struct pci_devinst));
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
pdi->pi_vmctx = ctx;
|
2014-02-14 21:34:08 +00:00
|
|
|
pdi->pi_bus = bus;
|
2011-05-13 04:54:01 +00:00
|
|
|
pdi->pi_slot = slot;
|
2012-08-06 06:51:27 +00:00
|
|
|
pdi->pi_func = func;
|
2014-01-29 14:56:48 +00:00
|
|
|
pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
|
|
|
|
pdi->pi_lintr.pin = 0;
|
|
|
|
pdi->pi_lintr.state = IDLE;
|
2014-05-15 14:16:55 +00:00
|
|
|
pdi->pi_lintr.pirq_pin = 0;
|
2014-01-29 14:56:48 +00:00
|
|
|
pdi->pi_lintr.ioapic_irq = 0;
|
2011-05-13 04:54:01 +00:00
|
|
|
pdi->pi_d = pde;
|
2023-05-15 14:28:45 +00:00
|
|
|
snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus,
|
|
|
|
slot, func);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
/* Disable legacy interrupts */
|
|
|
|
pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
|
|
|
|
pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
|
|
|
|
|
2019-06-07 15:48:12 +00:00
|
|
|
pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN);
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2023-01-19 18:30:18 +00:00
|
|
|
err = (*pde->pe_init)(pdi, fi->fi_config);
|
2014-02-14 21:34:08 +00:00
|
|
|
if (err == 0)
|
|
|
|
fi->fi_devi = pdi;
|
|
|
|
else
|
2011-05-13 04:54:01 +00:00
|
|
|
free(pdi);
|
2013-07-04 05:35:56 +00:00
|
|
|
|
|
|
|
return (err);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
|
|
|
|
{
|
|
|
|
int mmc;
|
|
|
|
|
|
|
|
/* Number of msi messages must be a power of 2 between 1 and 32 */
|
|
|
|
assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
|
|
|
|
mmc = ffs(msgnum) - 1;
|
|
|
|
|
|
|
|
bzero(msicap, sizeof(struct msicap));
|
|
|
|
msicap->capid = PCIY_MSI;
|
|
|
|
msicap->nextptr = nextptr;
|
|
|
|
msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
|
|
|
|
{
|
|
|
|
struct msicap msicap;
|
|
|
|
|
|
|
|
pci_populate_msicap(&msicap, msgnum, 0);
|
|
|
|
|
|
|
|
return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
|
|
|
|
}
|
|
|
|
|
2013-01-30 04:30:36 +00:00
|
|
|
static void
|
|
|
|
pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
|
2014-02-18 03:00:20 +00:00
|
|
|
uint32_t msix_tab_size)
|
2013-01-30 04:30:36 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
assert(msix_tab_size % 4096 == 0);
|
|
|
|
|
|
|
|
bzero(msixcap, sizeof(struct msixcap));
|
|
|
|
msixcap->capid = PCIY_MSIX;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Message Control Register, all fields set to
|
|
|
|
* zero except for the Table Size.
|
|
|
|
* Note: Table size N is encoded as N-1
|
|
|
|
*/
|
|
|
|
msixcap->msgctrl = msgnum - 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MSI-X BAR setup:
|
|
|
|
* - MSI-X table start at offset 0
|
|
|
|
* - PBA table starts at a 4K aligned offset after the MSI-X table
|
|
|
|
*/
|
|
|
|
msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
|
|
|
|
msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
pci_msix_table_init(struct pci_devinst *pi, int table_entries)
|
|
|
|
{
|
|
|
|
int i, table_size;
|
|
|
|
|
|
|
|
assert(table_entries > 0);
|
|
|
|
assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
|
|
|
|
|
|
|
|
table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
|
2014-04-22 18:55:21 +00:00
|
|
|
pi->pi_msix.table = calloc(1, table_size);
|
2013-01-30 04:30:36 +00:00
|
|
|
|
|
|
|
/* set mask bit of vector control register */
|
|
|
|
for (i = 0; i < table_entries; i++)
|
|
|
|
pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
|
|
|
|
{
|
|
|
|
uint32_t tab_size;
|
|
|
|
struct msixcap msixcap;
|
|
|
|
|
|
|
|
assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
|
|
|
|
assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
|
2019-05-31 18:00:44 +00:00
|
|
|
|
2013-01-30 04:30:36 +00:00
|
|
|
tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
|
|
|
|
|
|
|
|
/* Align table size to nearest 4K */
|
|
|
|
tab_size = roundup2(tab_size, 4096);
|
|
|
|
|
|
|
|
pi->pi_msix.table_bar = barnum;
|
|
|
|
pi->pi_msix.pba_bar = barnum;
|
|
|
|
pi->pi_msix.table_offset = 0;
|
|
|
|
pi->pi_msix.table_count = msgnum;
|
|
|
|
pi->pi_msix.pba_offset = tab_size;
|
2014-02-18 19:00:15 +00:00
|
|
|
pi->pi_msix.pba_size = PBA_SIZE(msgnum);
|
2013-01-30 04:30:36 +00:00
|
|
|
|
|
|
|
pci_msix_table_init(pi, msgnum);
|
|
|
|
|
2014-02-18 03:00:20 +00:00
|
|
|
pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
|
2013-01-30 04:30:36 +00:00
|
|
|
|
|
|
|
/* allocate memory for MSI-X Table and PBA */
|
|
|
|
pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
|
|
|
|
tab_size + pi->pi_msix.pba_size);
|
|
|
|
|
|
|
|
return (pci_emul_add_capability(pi, (u_char *)&msixcap,
|
|
|
|
sizeof(msixcap)));
|
|
|
|
}
|
|
|
|
|
2020-05-25 06:25:31 +00:00
|
|
|
static void
|
2012-04-28 16:28:00 +00:00
|
|
|
msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
|
|
|
int bytes, uint32_t val)
|
|
|
|
{
|
|
|
|
uint16_t msgctrl, rwmask;
|
2015-12-31 01:55:51 +00:00
|
|
|
int off;
|
2018-06-14 01:34:53 +00:00
|
|
|
|
2012-04-28 16:28:00 +00:00
|
|
|
off = offset - capoff;
|
|
|
|
/* Message Control Register */
|
|
|
|
if (off == 2 && bytes == 2) {
|
|
|
|
rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
|
|
|
|
msgctrl = pci_get_cfgdata16(pi, offset);
|
|
|
|
msgctrl &= ~rwmask;
|
|
|
|
msgctrl |= val & rwmask;
|
|
|
|
val = msgctrl;
|
|
|
|
|
|
|
|
pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
|
2013-01-30 04:30:36 +00:00
|
|
|
pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
|
2014-01-29 14:56:48 +00:00
|
|
|
pci_lintr_update(pi);
|
2018-06-14 01:34:53 +00:00
|
|
|
}
|
|
|
|
|
2012-04-28 16:28:00 +00:00
|
|
|
CFGWRITE(pi, offset, val, bytes);
|
|
|
|
}
|
|
|
|
|
2020-05-25 06:25:31 +00:00
|
|
|
static void
|
2011-05-13 04:54:01 +00:00
|
|
|
msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
|
|
|
int bytes, uint32_t val)
|
|
|
|
{
|
|
|
|
uint16_t msgctrl, rwmask, msgdata, mme;
|
|
|
|
uint32_t addrlo;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If guest is writing to the message control register make sure
|
|
|
|
* we do not overwrite read-only fields.
|
|
|
|
*/
|
|
|
|
if ((offset - capoff) == 2 && bytes == 2) {
|
|
|
|
rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
|
|
|
|
msgctrl = pci_get_cfgdata16(pi, offset);
|
|
|
|
msgctrl &= ~rwmask;
|
|
|
|
msgctrl |= val & rwmask;
|
|
|
|
val = msgctrl;
|
|
|
|
}
|
|
|
|
CFGWRITE(pi, offset, val, bytes);
|
2020-04-27 22:27:35 +00:00
|
|
|
|
|
|
|
msgctrl = pci_get_cfgdata16(pi, capoff + 2);
|
|
|
|
addrlo = pci_get_cfgdata32(pi, capoff + 4);
|
|
|
|
if (msgctrl & PCIM_MSICTRL_64BIT)
|
|
|
|
msgdata = pci_get_cfgdata16(pi, capoff + 12);
|
|
|
|
else
|
|
|
|
msgdata = pci_get_cfgdata16(pi, capoff + 8);
|
|
|
|
|
|
|
|
mme = msgctrl & PCIM_MSICTRL_MME_MASK;
|
|
|
|
pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
|
|
|
|
if (pi->pi_msi.enabled) {
|
|
|
|
pi->pi_msi.addr = addrlo;
|
|
|
|
pi->pi_msi.msg_data = msgdata;
|
|
|
|
pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
|
|
|
|
} else {
|
|
|
|
pi->pi_msi.maxmsgnum = 0;
|
|
|
|
}
|
|
|
|
pci_lintr_update(pi);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2022-10-08 15:22:38 +00:00
|
|
|
static void
|
|
|
|
pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset,
|
|
|
|
int bytes, uint32_t val)
|
2013-02-15 18:41:36 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
/* XXX don't write to the readonly parts */
|
|
|
|
CFGWRITE(pi, offset, val, bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define PCIECAP_VERSION 0x2
|
|
|
|
int
|
|
|
|
pci_emul_add_pciecap(struct pci_devinst *pi, int type)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
struct pciecap pciecap;
|
|
|
|
|
|
|
|
bzero(&pciecap, sizeof(pciecap));
|
|
|
|
|
2019-06-07 17:09:49 +00:00
|
|
|
/*
|
|
|
|
* Use the integrated endpoint type for endpoints on a root complex bus.
|
|
|
|
*
|
|
|
|
* NB: bhyve currently only supports a single PCI bus that is the root
|
|
|
|
* complex bus, so all endpoints are integrated.
|
|
|
|
*/
|
|
|
|
if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0))
|
|
|
|
type = PCIEM_TYPE_ROOT_INT_EP;
|
|
|
|
|
2013-02-15 18:41:36 +00:00
|
|
|
pciecap.capid = PCIY_EXPRESS;
|
2019-06-07 17:09:49 +00:00
|
|
|
pciecap.pcie_capabilities = PCIECAP_VERSION | type;
|
|
|
|
if (type != PCIEM_TYPE_ROOT_INT_EP) {
|
|
|
|
pciecap.link_capabilities = 0x411; /* gen1, x1 */
|
|
|
|
pciecap.link_status = 0x11; /* gen1, x1 */
|
|
|
|
}
|
2013-02-15 18:41:36 +00:00
|
|
|
|
|
|
|
err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
/*
|
|
|
|
* This function assumes that 'coff' is in the capabilities region of the
|
2020-05-25 06:25:31 +00:00
|
|
|
* config space. A capoff parameter of zero will force a search for the
|
|
|
|
* offset and type.
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2020-05-25 06:25:31 +00:00
|
|
|
void
|
|
|
|
pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val,
|
|
|
|
uint8_t capoff, int capid)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2020-05-25 06:25:31 +00:00
|
|
|
uint8_t nextoff;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
/* Do not allow un-aligned writes */
|
|
|
|
if ((offset & (bytes - 1)) != 0)
|
|
|
|
return;
|
|
|
|
|
2020-05-25 06:25:31 +00:00
|
|
|
if (capoff == 0) {
|
|
|
|
/* Find the capability that we want to update */
|
|
|
|
capoff = CAP_START_OFFSET;
|
|
|
|
while (1) {
|
|
|
|
nextoff = pci_get_cfgdata8(pi, capoff + 1);
|
|
|
|
if (nextoff == 0)
|
|
|
|
break;
|
|
|
|
if (offset >= capoff && offset < nextoff)
|
|
|
|
break;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2020-05-25 06:25:31 +00:00
|
|
|
capoff = nextoff;
|
|
|
|
}
|
|
|
|
assert(offset >= capoff);
|
|
|
|
capid = pci_get_cfgdata8(pi, capoff);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-10-09 23:53:21 +00:00
|
|
|
* Capability ID and Next Capability Pointer are readonly.
|
|
|
|
* However, some o/s's do 4-byte writes that include these.
|
|
|
|
* For this case, trim the write back to 2 bytes and adjust
|
|
|
|
* the data.
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2013-10-09 23:53:21 +00:00
|
|
|
if (offset == capoff || offset == capoff + 1) {
|
|
|
|
if (offset == capoff && bytes == 4) {
|
|
|
|
bytes = 2;
|
|
|
|
offset += 2;
|
|
|
|
val >>= 16;
|
|
|
|
} else
|
|
|
|
return;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
switch (capid) {
|
|
|
|
case PCIY_MSI:
|
|
|
|
msicap_cfgwrite(pi, capoff, offset, bytes, val);
|
|
|
|
break;
|
2013-01-30 04:30:36 +00:00
|
|
|
case PCIY_MSIX:
|
|
|
|
msixcap_cfgwrite(pi, capoff, offset, bytes, val);
|
|
|
|
break;
|
2013-02-15 18:41:36 +00:00
|
|
|
case PCIY_EXPRESS:
|
|
|
|
pciecap_cfgwrite(pi, capoff, offset, bytes, val);
|
|
|
|
break;
|
2011-05-13 04:54:01 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
pci_emul_iscap(struct pci_devinst *pi, int offset)
|
|
|
|
{
|
|
|
|
uint16_t sts;
|
|
|
|
|
|
|
|
sts = pci_get_cfgdata16(pi, PCIR_STATUS);
|
|
|
|
if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
|
2014-02-18 03:00:20 +00:00
|
|
|
if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
|
|
|
|
return (1);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
2014-02-18 03:00:20 +00:00
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2013-02-22 00:46:32 +00:00
|
|
|
static int
|
2023-03-24 18:49:06 +00:00
|
|
|
pci_emul_fallback_handler(struct vcpu *vcpu __unused, int dir,
|
|
|
|
uint64_t addr __unused, int size __unused, uint64_t *val,
|
2022-10-08 15:22:38 +00:00
|
|
|
void *arg1 __unused, long arg2 __unused)
|
2013-02-22 00:46:32 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Ignore writes; return 0xff's for reads. The mem read code
|
|
|
|
* will take care of truncating to the correct size.
|
|
|
|
*/
|
|
|
|
if (dir == MEM_F_READ) {
|
|
|
|
*val = 0xffffffffffffffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
static int
|
2023-03-24 18:49:06 +00:00
|
|
|
pci_emul_ecfg_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr,
|
|
|
|
int bytes, uint64_t *val, void *arg1 __unused, long arg2 __unused)
|
2014-08-08 03:49:01 +00:00
|
|
|
{
|
|
|
|
int bus, slot, func, coff, in;
|
|
|
|
|
|
|
|
coff = addr & 0xfff;
|
|
|
|
func = (addr >> 12) & 0x7;
|
|
|
|
slot = (addr >> 15) & 0x1f;
|
|
|
|
bus = (addr >> 20) & 0xff;
|
|
|
|
in = (dir == MEM_F_READ);
|
|
|
|
if (in)
|
|
|
|
*val = ~0UL;
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_cfgrw(in, bus, slot, func, coff, bytes, (uint32_t *)val);
|
2014-08-08 03:49:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
pci_ecfg_base(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (PCI_EMUL_ECFG_BASE);
|
|
|
|
}
|
|
|
|
|
2021-08-16 07:47:53 +00:00
|
|
|
static int
|
|
|
|
init_bootorder(void)
|
|
|
|
{
|
|
|
|
struct boot_device *device;
|
|
|
|
FILE *fp;
|
|
|
|
char *bootorder;
|
|
|
|
size_t bootorder_len;
|
|
|
|
|
|
|
|
if (TAILQ_EMPTY(&boot_devices))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
fp = open_memstream(&bootorder, &bootorder_len);
|
|
|
|
TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
|
|
|
|
fprintf(fp, "/pci@i0cf8/pci@%d,%d\n",
|
|
|
|
device->pdi->pi_slot, device->pdi->pi_func);
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
|
|
|
|
return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder));
|
|
|
|
}
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
#define BUSIO_ROUNDUP 32
|
2022-01-03 13:19:39 +00:00
|
|
|
#define BUSMEM32_ROUNDUP (1024 * 1024)
|
|
|
|
#define BUSMEM64_ROUNDUP (512 * 1024 * 1024)
|
2014-02-14 21:34:08 +00:00
|
|
|
|
2013-07-04 05:35:56 +00:00
|
|
|
int
|
2011-05-13 04:54:01 +00:00
|
|
|
init_pci(struct vmctx *ctx)
|
|
|
|
{
|
2019-06-26 20:30:41 +00:00
|
|
|
char node_name[sizeof("pci.XXX.XX.X")];
|
2014-08-08 03:49:01 +00:00
|
|
|
struct mem_range mr;
|
2011-05-13 04:54:01 +00:00
|
|
|
struct pci_devemu *pde;
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
2014-01-29 14:56:48 +00:00
|
|
|
struct funcinfo *fi;
|
2019-06-26 20:30:41 +00:00
|
|
|
nvlist_t *nvl;
|
|
|
|
const char *emul;
|
2013-04-17 02:03:12 +00:00
|
|
|
size_t lowmem;
|
2021-11-22 15:22:48 +00:00
|
|
|
int bus, slot, func;
|
|
|
|
int error;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2021-11-22 15:24:47 +00:00
|
|
|
if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32)
|
|
|
|
errx(EX_OSERR, "Invalid lowmem limit");
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
pci_emul_iobase = PCI_EMUL_IOBASE;
|
2021-11-22 15:24:47 +00:00
|
|
|
pci_emul_membase32 = PCI_EMUL_MEMBASE32;
|
2020-11-12 00:46:53 +00:00
|
|
|
|
2021-11-22 15:22:48 +00:00
|
|
|
pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx);
|
|
|
|
pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
|
|
|
|
pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2021-08-16 07:47:53 +00:00
|
|
|
TAILQ_INIT(&boot_devices);
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
for (bus = 0; bus < MAXBUSES; bus++) {
|
2019-06-26 20:30:41 +00:00
|
|
|
snprintf(node_name, sizeof(node_name), "pci.%d", bus);
|
|
|
|
nvl = find_config_node(node_name);
|
|
|
|
if (nvl == NULL)
|
2014-02-14 21:34:08 +00:00
|
|
|
continue;
|
2019-06-26 20:30:41 +00:00
|
|
|
pci_businfo[bus] = calloc(1, sizeof(struct businfo));
|
|
|
|
bi = pci_businfo[bus];
|
|
|
|
|
2019-05-31 18:00:44 +00:00
|
|
|
/*
|
2014-02-14 21:34:08 +00:00
|
|
|
* Keep track of the i/o and memory resources allocated to
|
|
|
|
* this bus.
|
|
|
|
*/
|
|
|
|
bi->iobase = pci_emul_iobase;
|
|
|
|
bi->membase32 = pci_emul_membase32;
|
|
|
|
bi->membase64 = pci_emul_membase64;
|
|
|
|
|
2022-01-03 13:16:59 +00:00
|
|
|
/* first run: init devices */
|
2014-02-14 21:34:08 +00:00
|
|
|
for (slot = 0; slot < MAXSLOTS; slot++) {
|
|
|
|
si = &bi->slotinfo[slot];
|
|
|
|
for (func = 0; func < MAXFUNCS; func++) {
|
|
|
|
fi = &si->si_funcs[func];
|
2019-06-26 20:30:41 +00:00
|
|
|
snprintf(node_name, sizeof(node_name),
|
|
|
|
"pci.%d.%d.%d", bus, slot, func);
|
|
|
|
nvl = find_config_node(node_name);
|
|
|
|
if (nvl == NULL)
|
2014-02-14 21:34:08 +00:00
|
|
|
continue;
|
2019-06-26 20:30:41 +00:00
|
|
|
|
|
|
|
fi->fi_config = nvl;
|
|
|
|
emul = get_config_value_node(nvl, "device");
|
|
|
|
if (emul == NULL) {
|
|
|
|
EPRINTLN("pci slot %d:%d:%d: missing "
|
|
|
|
"\"device\" value", bus, slot, func);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
pde = pci_emul_finddev(emul);
|
|
|
|
if (pde == NULL) {
|
|
|
|
EPRINTLN("pci slot %d:%d:%d: unknown "
|
|
|
|
"device \"%s\"", bus, slot, func,
|
|
|
|
emul);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
if (pde->pe_alias != NULL) {
|
|
|
|
EPRINTLN("pci slot %d:%d:%d: legacy "
|
|
|
|
"device \"%s\", use \"%s\" instead",
|
|
|
|
bus, slot, func, emul,
|
|
|
|
pde->pe_alias);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
fi->fi_pde = pde;
|
2014-02-14 21:34:08 +00:00
|
|
|
error = pci_emul_init(ctx, pde, bus, slot,
|
|
|
|
func, fi);
|
2013-07-04 05:35:56 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
}
|
2014-02-14 21:34:08 +00:00
|
|
|
|
2022-01-03 13:16:59 +00:00
|
|
|
/* second run: assign BARs and free list */
|
|
|
|
struct pci_bar_allocation *bar;
|
|
|
|
struct pci_bar_allocation *bar_tmp;
|
|
|
|
TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
|
|
|
|
pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
|
|
|
|
bar->size);
|
|
|
|
free(bar);
|
|
|
|
}
|
|
|
|
TAILQ_INIT(&pci_bars);
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
/*
|
|
|
|
* Add some slop to the I/O and memory resources decoded by
|
|
|
|
* this bus to give a guest some flexibility if it wants to
|
|
|
|
* reprogram the BARs.
|
|
|
|
*/
|
|
|
|
pci_emul_iobase += BUSIO_ROUNDUP;
|
|
|
|
pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
|
|
|
|
bi->iolimit = pci_emul_iobase;
|
|
|
|
|
2022-01-03 13:19:39 +00:00
|
|
|
pci_emul_membase32 += BUSMEM32_ROUNDUP;
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_emul_membase32 = roundup2(pci_emul_membase32,
|
2022-01-03 13:19:39 +00:00
|
|
|
BUSMEM32_ROUNDUP);
|
2014-02-14 21:34:08 +00:00
|
|
|
bi->memlimit32 = pci_emul_membase32;
|
|
|
|
|
2022-01-03 13:19:39 +00:00
|
|
|
pci_emul_membase64 += BUSMEM64_ROUNDUP;
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_emul_membase64 = roundup2(pci_emul_membase64,
|
2022-01-03 13:19:39 +00:00
|
|
|
BUSMEM64_ROUNDUP);
|
2014-02-14 21:34:08 +00:00
|
|
|
bi->memlimit64 = pci_emul_membase64;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
2012-05-03 03:11:27 +00:00
|
|
|
|
2014-05-15 14:16:55 +00:00
|
|
|
/*
|
|
|
|
* PCI backends are initialized before routing INTx interrupts
|
|
|
|
* so that LPC devices are able to reserve ISA IRQs before
|
|
|
|
* routing PIRQ pins.
|
|
|
|
*/
|
|
|
|
for (bus = 0; bus < MAXBUSES; bus++) {
|
|
|
|
if ((bi = pci_businfo[bus]) == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (slot = 0; slot < MAXSLOTS; slot++) {
|
|
|
|
si = &bi->slotinfo[slot];
|
|
|
|
for (func = 0; func < MAXFUNCS; func++) {
|
|
|
|
fi = &si->si_funcs[func];
|
|
|
|
if (fi->fi_devi == NULL)
|
|
|
|
continue;
|
|
|
|
pci_lintr_route(fi->fi_devi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
lpc_pirq_routed();
|
|
|
|
|
2021-08-16 07:47:53 +00:00
|
|
|
if ((error = init_bootorder()) != 0) {
|
|
|
|
warnx("%s: Unable to init bootorder", __func__);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-02-22 00:46:32 +00:00
|
|
|
/*
|
2013-04-17 02:03:12 +00:00
|
|
|
* The guest physical memory map looks like the following:
|
|
|
|
* [0, lowmem) guest system memory
|
2021-11-22 15:24:47 +00:00
|
|
|
* [lowmem, 0xC0000000) memory hole (may be absent)
|
|
|
|
* [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation)
|
2014-08-08 03:49:01 +00:00
|
|
|
* [0xE0000000, 0xF0000000) PCI extended config window
|
|
|
|
* [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware
|
2013-04-17 02:03:12 +00:00
|
|
|
* [4GB, 4GB + highmem)
|
2014-08-08 03:49:01 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2013-04-17 02:03:12 +00:00
|
|
|
* Accesses to memory addresses that are not allocated to system
|
|
|
|
* memory or PCI devices return 0xff's.
|
2013-02-22 00:46:32 +00:00
|
|
|
*/
|
2014-06-24 02:02:51 +00:00
|
|
|
lowmem = vm_get_lowmem_size(ctx);
|
2014-08-08 03:49:01 +00:00
|
|
|
bzero(&mr, sizeof(struct mem_range));
|
|
|
|
mr.name = "PCI hole";
|
|
|
|
mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
|
|
|
|
mr.base = lowmem;
|
|
|
|
mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
|
|
|
|
mr.handler = pci_emul_fallback_handler;
|
|
|
|
error = register_mem_fallback(&mr);
|
|
|
|
assert(error == 0);
|
2013-04-17 02:03:12 +00:00
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
/* PCI extended config space */
|
|
|
|
bzero(&mr, sizeof(struct mem_range));
|
|
|
|
mr.name = "PCI ECFG";
|
|
|
|
mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
|
|
|
|
mr.base = PCI_EMUL_ECFG_BASE;
|
|
|
|
mr.size = PCI_EMUL_ECFG_SIZE;
|
|
|
|
mr.handler = pci_emul_ecfg_handler;
|
|
|
|
error = register_mem(&mr);
|
2013-02-22 00:46:32 +00:00
|
|
|
assert(error == 0);
|
2013-07-04 05:35:56 +00:00
|
|
|
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
static void
|
2022-10-08 15:22:38 +00:00
|
|
|
pci_apic_prt_entry(int bus __unused, int slot, int pin, int pirq_pin __unused,
|
|
|
|
int ioapic_irq, void *arg __unused)
|
2014-01-29 14:56:48 +00:00
|
|
|
{
|
|
|
|
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line(" Package ()");
|
2014-01-29 14:56:48 +00:00
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" 0x%X,", slot << 16 | 0xffff);
|
|
|
|
dsdt_line(" 0x%02X,", pin - 1);
|
|
|
|
dsdt_line(" Zero,");
|
|
|
|
dsdt_line(" 0x%X", ioapic_irq);
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line(" },");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2022-10-08 15:22:38 +00:00
|
|
|
pci_pirq_prt_entry(int bus __unused, int slot, int pin, int pirq_pin,
|
|
|
|
int ioapic_irq __unused, void *arg __unused)
|
2014-05-15 14:16:55 +00:00
|
|
|
{
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
name = lpc_pirq_name(pirq_pin);
|
|
|
|
if (name == NULL)
|
|
|
|
return;
|
|
|
|
dsdt_line(" Package ()");
|
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" 0x%X,", slot << 16 | 0xffff);
|
|
|
|
dsdt_line(" 0x%02X,", pin - 1);
|
|
|
|
dsdt_line(" %s,", name);
|
|
|
|
dsdt_line(" 0x00");
|
|
|
|
dsdt_line(" },");
|
|
|
|
free(name);
|
2014-01-29 14:56:48 +00:00
|
|
|
}
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
/*
|
|
|
|
* A bhyve virtual machine has a flat PCI hierarchy with a root port
|
|
|
|
* corresponding to each PCI bus.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pci_bus_write_dsdt(int bus)
|
2014-01-02 21:26:59 +00:00
|
|
|
{
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
2014-01-02 21:26:59 +00:00
|
|
|
struct pci_devinst *pi;
|
2014-05-15 14:16:55 +00:00
|
|
|
int count, func, slot;
|
2014-01-02 21:26:59 +00:00
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
/*
|
|
|
|
* If there are no devices on this 'bus' then just return.
|
|
|
|
*/
|
|
|
|
if ((bi = pci_businfo[bus]) == NULL) {
|
|
|
|
/*
|
|
|
|
* Bus 0 is special because it decodes the I/O ports used
|
|
|
|
* for PCI config space access even if there are no devices
|
|
|
|
* on it.
|
|
|
|
*/
|
|
|
|
if (bus != 0)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
dsdt_line(" Device (PC%02X)", bus);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))");
|
2014-02-14 21:34:08 +00:00
|
|
|
|
|
|
|
dsdt_line(" Method (_BBN, 0, NotSerialized)");
|
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" Return (0x%08X)", bus);
|
|
|
|
dsdt_line(" }");
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, "
|
|
|
|
"MaxFixed, PosDecode,");
|
|
|
|
dsdt_line(" 0x0000, // Granularity");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%04X, // Range Minimum", bus);
|
|
|
|
dsdt_line(" 0x%04X, // Range Maximum", bus);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x0000, // Translation Offset");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x0001, // Length");
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" ,, )");
|
2014-02-14 21:34:08 +00:00
|
|
|
|
|
|
|
if (bus == 0) {
|
|
|
|
dsdt_indent(3);
|
|
|
|
dsdt_fixed_ioport(0xCF8, 8);
|
|
|
|
dsdt_unindent(3);
|
|
|
|
|
|
|
|
dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
|
|
|
|
"PosDecode, EntireRange,");
|
|
|
|
dsdt_line(" 0x0000, // Granularity");
|
|
|
|
dsdt_line(" 0x0000, // Range Minimum");
|
|
|
|
dsdt_line(" 0x0CF7, // Range Maximum");
|
|
|
|
dsdt_line(" 0x0000, // Translation Offset");
|
|
|
|
dsdt_line(" 0x0CF8, // Length");
|
|
|
|
dsdt_line(" ,, , TypeStatic)");
|
|
|
|
|
|
|
|
dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
|
|
|
|
"PosDecode, EntireRange,");
|
|
|
|
dsdt_line(" 0x0000, // Granularity");
|
|
|
|
dsdt_line(" 0x0D00, // Range Minimum");
|
|
|
|
dsdt_line(" 0x%04X, // Range Maximum",
|
|
|
|
PCI_EMUL_IOBASE - 1);
|
|
|
|
dsdt_line(" 0x0000, // Translation Offset");
|
|
|
|
dsdt_line(" 0x%04X, // Length",
|
|
|
|
PCI_EMUL_IOBASE - 0x0D00);
|
|
|
|
dsdt_line(" ,, , TypeStatic)");
|
|
|
|
|
|
|
|
if (bi == NULL) {
|
|
|
|
dsdt_line(" })");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(bi != NULL);
|
|
|
|
|
|
|
|
/* i/o window */
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
|
|
|
|
"PosDecode, EntireRange,");
|
|
|
|
dsdt_line(" 0x0000, // Granularity");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%04X, // Range Minimum", bi->iobase);
|
|
|
|
dsdt_line(" 0x%04X, // Range Maximum",
|
|
|
|
bi->iolimit - 1);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x0000, // Translation Offset");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%04X, // Length",
|
|
|
|
bi->iolimit - bi->iobase);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" ,, , TypeStatic)");
|
2014-02-14 21:34:08 +00:00
|
|
|
|
|
|
|
/* mmio window (32-bit) */
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" DWordMemory (ResourceProducer, PosDecode, "
|
|
|
|
"MinFixed, MaxFixed, NonCacheable, ReadWrite,");
|
|
|
|
dsdt_line(" 0x00000000, // Granularity");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x%08X, // Range Maximum\n",
|
2014-02-14 21:34:08 +00:00
|
|
|
bi->memlimit32 - 1);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x00000000, // Translation Offset");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%08X, // Length\n",
|
|
|
|
bi->memlimit32 - bi->membase32);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
|
2014-02-14 21:34:08 +00:00
|
|
|
|
|
|
|
/* mmio window (64-bit) */
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" QWordMemory (ResourceProducer, PosDecode, "
|
|
|
|
"MinFixed, MaxFixed, NonCacheable, ReadWrite,");
|
|
|
|
dsdt_line(" 0x0000000000000000, // Granularity");
|
2014-02-14 21:34:08 +00:00
|
|
|
dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x%016lX, // Range Maximum\n",
|
2014-02-14 21:34:08 +00:00
|
|
|
bi->memlimit64 - 1);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" 0x0000000000000000, // Translation Offset");
|
|
|
|
dsdt_line(" 0x%016lX, // Length\n",
|
2014-02-14 21:34:08 +00:00
|
|
|
bi->memlimit64 - bi->membase64);
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
|
|
|
|
dsdt_line(" })");
|
2014-02-14 21:34:08 +00:00
|
|
|
|
|
|
|
count = pci_count_lintr(bus);
|
2014-01-29 14:56:48 +00:00
|
|
|
if (count != 0) {
|
|
|
|
dsdt_indent(2);
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line("Name (PPRT, Package ()");
|
2014-01-29 14:56:48 +00:00
|
|
|
dsdt_line("{");
|
2014-05-15 14:16:55 +00:00
|
|
|
pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
|
2018-06-14 01:34:53 +00:00
|
|
|
dsdt_line("})");
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line("Name (APRT, Package ()");
|
|
|
|
dsdt_line("{");
|
|
|
|
pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
|
2018-06-14 01:34:53 +00:00
|
|
|
dsdt_line("})");
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line("Method (_PRT, 0, NotSerialized)");
|
|
|
|
dsdt_line("{");
|
|
|
|
dsdt_line(" If (PICM)");
|
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" Return (APRT)");
|
|
|
|
dsdt_line(" }");
|
|
|
|
dsdt_line(" Else");
|
|
|
|
dsdt_line(" {");
|
|
|
|
dsdt_line(" Return (PPRT)");
|
|
|
|
dsdt_line(" }");
|
|
|
|
dsdt_line("}");
|
2014-01-29 14:56:48 +00:00
|
|
|
dsdt_unindent(2);
|
|
|
|
}
|
2014-01-02 21:26:59 +00:00
|
|
|
|
|
|
|
dsdt_indent(2);
|
|
|
|
for (slot = 0; slot < MAXSLOTS; slot++) {
|
2014-02-14 21:34:08 +00:00
|
|
|
si = &bi->slotinfo[slot];
|
2014-01-02 21:26:59 +00:00
|
|
|
for (func = 0; func < MAXFUNCS; func++) {
|
2014-02-14 21:34:08 +00:00
|
|
|
pi = si->si_funcs[func].fi_devi;
|
2014-01-02 21:26:59 +00:00
|
|
|
if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
|
|
|
|
pi->pi_d->pe_write_dsdt(pi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dsdt_unindent(2);
|
2014-02-14 21:34:08 +00:00
|
|
|
done:
|
2014-01-02 21:26:59 +00:00
|
|
|
dsdt_line(" }");
|
|
|
|
}
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
void
|
|
|
|
pci_write_dsdt(void)
|
|
|
|
{
|
|
|
|
int bus;
|
|
|
|
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_indent(1);
|
|
|
|
dsdt_line("Name (PICM, 0x00)");
|
|
|
|
dsdt_line("Method (_PIC, 1, NotSerialized)");
|
|
|
|
dsdt_line("{");
|
|
|
|
dsdt_line(" Store (Arg0, PICM)");
|
|
|
|
dsdt_line("}");
|
|
|
|
dsdt_line("");
|
|
|
|
dsdt_line("Scope (_SB)");
|
|
|
|
dsdt_line("{");
|
2014-02-14 21:34:08 +00:00
|
|
|
for (bus = 0; bus < MAXBUSES; bus++)
|
|
|
|
pci_bus_write_dsdt(bus);
|
2014-05-15 14:16:55 +00:00
|
|
|
dsdt_line("}");
|
|
|
|
dsdt_unindent(1);
|
2014-02-14 21:34:08 +00:00
|
|
|
}
|
|
|
|
|
2014-05-02 04:51:31 +00:00
|
|
|
int
|
|
|
|
pci_bus_configured(int bus)
|
|
|
|
{
|
|
|
|
assert(bus >= 0 && bus < MAXBUSES);
|
|
|
|
return (pci_businfo[bus] != NULL);
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
int
|
|
|
|
pci_msi_enabled(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
return (pi->pi_msi.enabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2013-12-16 19:59:31 +00:00
|
|
|
pci_msi_maxmsgnum(struct pci_devinst *pi)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
if (pi->pi_msi.enabled)
|
2013-12-16 19:59:31 +00:00
|
|
|
return (pi->pi_msi.maxmsgnum);
|
2011-05-13 04:54:01 +00:00
|
|
|
else
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2013-01-30 04:30:36 +00:00
|
|
|
int
|
|
|
|
pci_msix_enabled(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pci_generate_msix(struct pci_devinst *pi, int index)
|
|
|
|
{
|
|
|
|
struct msix_table_entry *mte;
|
|
|
|
|
|
|
|
if (!pci_msix_enabled(pi))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (pi->pi_msix.function_mask)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (index >= pi->pi_msix.table_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
mte = &pi->pi_msix.table[index];
|
|
|
|
if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
|
|
|
|
/* XXX Set PBA bit if interrupt is disabled */
|
2013-12-16 19:59:31 +00:00
|
|
|
vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
|
2013-01-30 04:30:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
void
|
2013-12-16 19:59:31 +00:00
|
|
|
pci_generate_msi(struct pci_devinst *pi, int index)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
|
2013-12-16 19:59:31 +00:00
|
|
|
if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
|
|
|
|
vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
|
|
|
|
pi->pi_msi.msg_data + index);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
static bool
|
|
|
|
pci_lintr_permitted(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
uint16_t cmd;
|
|
|
|
|
|
|
|
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
|
|
|
|
return (!(pi->pi_msi.enabled || pi->pi_msix.enabled ||
|
|
|
|
(cmd & PCIM_CMD_INTxDIS)));
|
|
|
|
}
|
|
|
|
|
2014-05-15 14:16:55 +00:00
|
|
|
void
|
2014-01-29 14:56:48 +00:00
|
|
|
pci_lintr_request(struct pci_devinst *pi)
|
2012-05-03 03:11:27 +00:00
|
|
|
{
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
2014-01-29 14:56:48 +00:00
|
|
|
struct slotinfo *si;
|
2014-05-15 14:16:55 +00:00
|
|
|
int bestpin, bestcount, pin;
|
2013-10-29 00:18:11 +00:00
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
bi = pci_businfo[pi->pi_bus];
|
|
|
|
assert(bi != NULL);
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
/*
|
2014-05-15 14:16:55 +00:00
|
|
|
* Just allocate a pin from our slot. The pin will be
|
|
|
|
* assigned IRQs later when interrupts are routed.
|
2014-01-29 14:56:48 +00:00
|
|
|
*/
|
2014-02-14 21:34:08 +00:00
|
|
|
si = &bi->slotinfo[pi->pi_slot];
|
2014-01-29 14:56:48 +00:00
|
|
|
bestpin = 0;
|
|
|
|
bestcount = si->si_intpins[0].ii_count;
|
|
|
|
for (pin = 1; pin < 4; pin++) {
|
|
|
|
if (si->si_intpins[pin].ii_count < bestcount) {
|
|
|
|
bestpin = pin;
|
|
|
|
bestcount = si->si_intpins[pin].ii_count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
si->si_intpins[bestpin].ii_count++;
|
|
|
|
pi->pi_lintr.pin = bestpin + 1;
|
|
|
|
pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1);
|
2014-05-15 14:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
pci_lintr_route(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
struct businfo *bi;
|
|
|
|
struct intxinfo *ii;
|
|
|
|
|
|
|
|
if (pi->pi_lintr.pin == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
bi = pci_businfo[pi->pi_bus];
|
|
|
|
assert(bi != NULL);
|
|
|
|
ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to allocate an I/O APIC pin for this intpin if one
|
|
|
|
* is not yet assigned.
|
|
|
|
*/
|
|
|
|
if (ii->ii_ioapic_irq == 0)
|
2016-07-14 17:16:10 +00:00
|
|
|
ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi);
|
2014-05-15 14:16:55 +00:00
|
|
|
assert(ii->ii_ioapic_irq > 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Attempt to allocate a PIRQ pin for this intpin if one is
|
|
|
|
* not yet assigned.
|
|
|
|
*/
|
|
|
|
if (ii->ii_pirq_pin == 0)
|
2016-07-14 17:16:10 +00:00
|
|
|
ii->ii_pirq_pin = pirq_alloc_pin(pi);
|
2014-05-15 14:16:55 +00:00
|
|
|
assert(ii->ii_pirq_pin > 0);
|
|
|
|
|
|
|
|
pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq;
|
|
|
|
pi->pi_lintr.pirq_pin = ii->ii_pirq_pin;
|
|
|
|
pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin));
|
2012-05-03 03:11:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pci_lintr_assert(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
assert(pi->pi_lintr.pin > 0);
|
2013-11-23 03:56:03 +00:00
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
pthread_mutex_lock(&pi->pi_lintr.lock);
|
|
|
|
if (pi->pi_lintr.state == IDLE) {
|
|
|
|
if (pci_lintr_permitted(pi)) {
|
|
|
|
pi->pi_lintr.state = ASSERTED;
|
2014-05-15 14:16:55 +00:00
|
|
|
pci_irq_assert(pi);
|
2014-01-29 14:56:48 +00:00
|
|
|
} else
|
|
|
|
pi->pi_lintr.state = PENDING;
|
2013-11-23 03:56:03 +00:00
|
|
|
}
|
2014-01-29 14:56:48 +00:00
|
|
|
pthread_mutex_unlock(&pi->pi_lintr.lock);
|
2012-05-03 03:11:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pci_lintr_deassert(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
assert(pi->pi_lintr.pin > 0);
|
|
|
|
|
|
|
|
pthread_mutex_lock(&pi->pi_lintr.lock);
|
|
|
|
if (pi->pi_lintr.state == ASSERTED) {
|
|
|
|
pi->pi_lintr.state = IDLE;
|
2014-05-15 14:16:55 +00:00
|
|
|
pci_irq_deassert(pi);
|
2014-01-29 14:56:48 +00:00
|
|
|
} else if (pi->pi_lintr.state == PENDING)
|
|
|
|
pi->pi_lintr.state = IDLE;
|
|
|
|
pthread_mutex_unlock(&pi->pi_lintr.lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
pci_lintr_update(struct pci_devinst *pi)
|
|
|
|
{
|
|
|
|
|
|
|
|
pthread_mutex_lock(&pi->pi_lintr.lock);
|
|
|
|
if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) {
|
2014-05-15 14:16:55 +00:00
|
|
|
pci_irq_deassert(pi);
|
2014-01-29 14:56:48 +00:00
|
|
|
pi->pi_lintr.state = PENDING;
|
|
|
|
} else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) {
|
|
|
|
pi->pi_lintr.state = ASSERTED;
|
2014-05-15 14:16:55 +00:00
|
|
|
pci_irq_assert(pi);
|
2014-01-29 14:56:48 +00:00
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&pi->pi_lintr.lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_count_lintr(int bus)
|
2014-01-29 14:56:48 +00:00
|
|
|
{
|
|
|
|
int count, slot, pin;
|
2014-02-14 21:34:08 +00:00
|
|
|
struct slotinfo *slotinfo;
|
2014-01-29 14:56:48 +00:00
|
|
|
|
|
|
|
count = 0;
|
2014-02-14 21:34:08 +00:00
|
|
|
if (pci_businfo[bus] != NULL) {
|
|
|
|
for (slot = 0; slot < MAXSLOTS; slot++) {
|
|
|
|
slotinfo = &pci_businfo[bus]->slotinfo[slot];
|
|
|
|
for (pin = 0; pin < 4; pin++) {
|
|
|
|
if (slotinfo->si_intpins[pin].ii_count != 0)
|
|
|
|
count++;
|
|
|
|
}
|
2014-01-29 14:56:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (count);
|
|
|
|
}
|
2013-11-23 03:56:03 +00:00
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
void
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
|
2014-01-29 14:56:48 +00:00
|
|
|
{
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
2014-01-29 14:56:48 +00:00
|
|
|
struct intxinfo *ii;
|
|
|
|
int slot, pin;
|
|
|
|
|
2014-02-14 21:34:08 +00:00
|
|
|
if ((bi = pci_businfo[bus]) == NULL)
|
|
|
|
return;
|
|
|
|
|
2014-01-29 14:56:48 +00:00
|
|
|
for (slot = 0; slot < MAXSLOTS; slot++) {
|
2014-02-14 21:34:08 +00:00
|
|
|
si = &bi->slotinfo[slot];
|
2014-01-29 14:56:48 +00:00
|
|
|
for (pin = 0; pin < 4; pin++) {
|
2014-02-14 21:34:08 +00:00
|
|
|
ii = &si->si_intpins[pin];
|
2014-01-29 14:56:48 +00:00
|
|
|
if (ii->ii_count != 0)
|
2014-05-15 14:16:55 +00:00
|
|
|
cb(bus, slot, pin + 1, ii->ii_pirq_pin,
|
|
|
|
ii->ii_ioapic_irq, arg);
|
2014-01-29 14:56:48 +00:00
|
|
|
}
|
2013-11-23 03:56:03 +00:00
|
|
|
}
|
2012-05-03 03:11:27 +00:00
|
|
|
}
|
|
|
|
|
2012-08-06 06:51:27 +00:00
|
|
|
/*
|
|
|
|
* Return 1 if the emulated device in 'slot' is a multi-function device.
|
|
|
|
* Return 0 otherwise.
|
|
|
|
*/
|
|
|
|
static int
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_emul_is_mfdev(int bus, int slot)
|
2012-08-06 06:51:27 +00:00
|
|
|
{
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
2012-08-06 06:51:27 +00:00
|
|
|
int f, numfuncs;
|
2012-05-03 03:11:27 +00:00
|
|
|
|
2012-08-06 06:51:27 +00:00
|
|
|
numfuncs = 0;
|
2014-02-14 21:34:08 +00:00
|
|
|
if ((bi = pci_businfo[bus]) != NULL) {
|
|
|
|
si = &bi->slotinfo[slot];
|
|
|
|
for (f = 0; f < MAXFUNCS; f++) {
|
|
|
|
if (si->si_funcs[f].fi_devi != NULL) {
|
|
|
|
numfuncs++;
|
|
|
|
}
|
2012-08-06 06:51:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (numfuncs > 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
|
|
|
|
* whether or not is a multi-function being emulated in the pci 'slot'.
|
|
|
|
*/
|
|
|
|
static void
|
2014-02-14 21:34:08 +00:00
|
|
|
pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
|
2012-08-06 06:51:27 +00:00
|
|
|
{
|
|
|
|
int mfdev;
|
|
|
|
|
|
|
|
if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
|
2014-02-14 21:34:08 +00:00
|
|
|
mfdev = pci_emul_is_mfdev(bus, slot);
|
2012-08-06 06:51:27 +00:00
|
|
|
switch (bytes) {
|
|
|
|
case 1:
|
|
|
|
case 2:
|
|
|
|
*rv &= ~PCIM_MFDEV;
|
|
|
|
if (mfdev) {
|
|
|
|
*rv |= PCIM_MFDEV;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
*rv &= ~(PCIM_MFDEV << 16);
|
|
|
|
if (mfdev) {
|
|
|
|
*rv |= (PCIM_MFDEV << 16);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-05-03 03:11:27 +00:00
|
|
|
|
2019-06-07 15:53:27 +00:00
|
|
|
/*
|
|
|
|
* Update device state in response to changes to the PCI command
|
|
|
|
* register.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old)
|
2013-04-10 02:12:39 +00:00
|
|
|
{
|
2019-06-07 15:53:27 +00:00
|
|
|
int i;
|
|
|
|
uint16_t changed, new;
|
2013-04-10 02:12:39 +00:00
|
|
|
|
2019-06-07 15:53:27 +00:00
|
|
|
new = pci_get_cfgdata16(pi, PCIR_COMMAND);
|
|
|
|
changed = old ^ new;
|
2013-04-10 02:12:39 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the MMIO or I/O address space decoding has changed then
|
|
|
|
* register/unregister all BARs that decode that address space.
|
|
|
|
*/
|
2022-03-10 10:28:06 +00:00
|
|
|
for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
|
2013-04-10 02:12:39 +00:00
|
|
|
switch (pi->pi_bar[i].type) {
|
|
|
|
case PCIBAR_NONE:
|
|
|
|
case PCIBAR_MEMHI64:
|
|
|
|
break;
|
|
|
|
case PCIBAR_IO:
|
|
|
|
/* I/O address space decoding changed? */
|
2015-04-24 19:15:38 +00:00
|
|
|
if (changed & PCIM_CMD_PORTEN) {
|
2019-06-07 15:53:27 +00:00
|
|
|
if (new & PCIM_CMD_PORTEN)
|
2013-04-10 02:12:39 +00:00
|
|
|
register_bar(pi, i);
|
|
|
|
else
|
|
|
|
unregister_bar(pi, i);
|
|
|
|
}
|
|
|
|
break;
|
2022-03-10 10:28:06 +00:00
|
|
|
case PCIBAR_ROM:
|
|
|
|
/* skip (un-)register of ROM if it disabled */
|
|
|
|
if (!romen(pi))
|
|
|
|
break;
|
|
|
|
/* fallthrough */
|
2013-04-10 02:12:39 +00:00
|
|
|
case PCIBAR_MEM32:
|
|
|
|
case PCIBAR_MEM64:
|
|
|
|
/* MMIO address space decoding changed? */
|
2015-04-24 19:15:38 +00:00
|
|
|
if (changed & PCIM_CMD_MEMEN) {
|
2019-06-07 15:53:27 +00:00
|
|
|
if (new & PCIM_CMD_MEMEN)
|
2013-04-10 02:12:39 +00:00
|
|
|
register_bar(pi, i);
|
|
|
|
else
|
|
|
|
unregister_bar(pi, i);
|
|
|
|
}
|
2019-05-31 18:00:44 +00:00
|
|
|
break;
|
2013-04-10 02:12:39 +00:00
|
|
|
default:
|
2019-05-31 18:00:44 +00:00
|
|
|
assert(0);
|
2013-04-10 02:12:39 +00:00
|
|
|
}
|
|
|
|
}
|
2014-01-29 14:56:48 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If INTx has been unmasked and is pending, assert the
|
|
|
|
* interrupt.
|
|
|
|
*/
|
|
|
|
pci_lintr_update(pi);
|
2018-06-14 01:34:53 +00:00
|
|
|
}
|
2013-04-10 02:12:39 +00:00
|
|
|
|
2019-06-07 15:53:27 +00:00
|
|
|
static void
|
|
|
|
pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
|
|
|
|
{
|
|
|
|
int rshift;
|
|
|
|
uint32_t cmd, old, readonly;
|
|
|
|
|
|
|
|
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
|
|
|
|
*
|
|
|
|
* XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
|
|
|
|
* 'write 1 to clear'. However these bits are not set to '1' by
|
|
|
|
* any device emulation so it is simpler to treat them as readonly.
|
|
|
|
*/
|
|
|
|
rshift = (coff & 0x3) * 8;
|
|
|
|
readonly = 0xFFFFF880 >> rshift;
|
|
|
|
|
|
|
|
old = CFGREAD(pi, coff, bytes);
|
|
|
|
new &= ~readonly;
|
|
|
|
new |= (old & readonly);
|
|
|
|
CFGWRITE(pi, coff, new, bytes); /* update config */
|
|
|
|
|
|
|
|
pci_emul_cmd_changed(pi, cmd);
|
|
|
|
}
|
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
static void
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_cfgrw(int in, int bus, int slot, int func, int coff, int bytes,
|
2023-06-28 20:06:37 +00:00
|
|
|
uint32_t *valp)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
2014-02-14 21:34:08 +00:00
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
2011-05-13 04:54:01 +00:00
|
|
|
struct pci_devinst *pi;
|
|
|
|
struct pci_devemu *pe;
|
2014-08-08 03:49:01 +00:00
|
|
|
int idx, needcfg;
|
2013-04-10 02:12:39 +00:00
|
|
|
uint64_t addr, bar, mask;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
if ((bi = pci_businfo[bus]) != NULL) {
|
|
|
|
si = &bi->slotinfo[slot];
|
|
|
|
pi = si->si_funcs[func].fi_devi;
|
2014-02-14 21:34:08 +00:00
|
|
|
} else
|
2012-10-27 02:39:08 +00:00
|
|
|
pi = NULL;
|
|
|
|
|
2012-08-06 06:51:27 +00:00
|
|
|
/*
|
2014-08-08 03:49:01 +00:00
|
|
|
* Just return if there is no device at this slot:func or if the
|
|
|
|
* the guest is doing an un-aligned access.
|
2012-08-06 06:51:27 +00:00
|
|
|
*/
|
2014-08-08 03:49:01 +00:00
|
|
|
if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) ||
|
|
|
|
(coff & (bytes - 1)) != 0) {
|
2011-05-13 04:54:01 +00:00
|
|
|
if (in)
|
2023-06-28 20:06:37 +00:00
|
|
|
*valp = 0xffffffff;
|
2014-08-08 03:49:01 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ignore all writes beyond the standard config space and return all
|
|
|
|
* ones on reads.
|
|
|
|
*/
|
|
|
|
if (coff >= PCI_REGMAX + 1) {
|
|
|
|
if (in) {
|
2023-06-28 20:06:37 +00:00
|
|
|
*valp = 0xffffffff;
|
2014-08-08 03:49:01 +00:00
|
|
|
/*
|
|
|
|
* Extended capabilities begin at offset 256 in config
|
|
|
|
* space. Absence of extended capabilities is signaled
|
|
|
|
* with all 0s in the extended capability header at
|
|
|
|
* offset 256.
|
|
|
|
*/
|
|
|
|
if (coff <= PCI_REGMAX + 4)
|
2023-06-28 20:06:37 +00:00
|
|
|
*valp = 0x00000000;
|
2014-08-08 03:49:01 +00:00
|
|
|
}
|
|
|
|
return;
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pe = pi->pi_d;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Config read
|
|
|
|
*/
|
|
|
|
if (in) {
|
|
|
|
/* Let the device emulation override the default handler */
|
2012-08-06 06:51:27 +00:00
|
|
|
if (pe->pe_cfgread != NULL) {
|
2023-06-28 20:06:37 +00:00
|
|
|
needcfg = pe->pe_cfgread(pi, coff, bytes, valp);
|
2012-08-06 06:51:27 +00:00
|
|
|
} else {
|
|
|
|
needcfg = 1;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2015-04-24 19:15:38 +00:00
|
|
|
if (needcfg)
|
2023-06-28 20:06:37 +00:00
|
|
|
*valp = CFGREAD(pi, coff, bytes);
|
2012-08-06 06:51:27 +00:00
|
|
|
|
2023-06-28 20:06:37 +00:00
|
|
|
pci_emul_hdrtype_fixup(bus, slot, coff, bytes, valp);
|
2011-05-13 04:54:01 +00:00
|
|
|
} else {
|
|
|
|
/* Let the device emulation override the default handler */
|
|
|
|
if (pe->pe_cfgwrite != NULL &&
|
2023-06-28 20:06:37 +00:00
|
|
|
(*pe->pe_cfgwrite)(pi, coff, bytes, *valp) == 0)
|
2014-08-08 03:49:01 +00:00
|
|
|
return;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
/*
|
2022-03-10 10:28:06 +00:00
|
|
|
* Special handling for write to BAR and ROM registers
|
2011-05-13 04:54:01 +00:00
|
|
|
*/
|
2022-04-01 08:18:52 +00:00
|
|
|
if (is_pcir_bar(coff) || is_pcir_bios(coff)) {
|
2011-05-13 04:54:01 +00:00
|
|
|
/*
|
|
|
|
* Ignore writes to BAR registers that are not
|
|
|
|
* 4-byte aligned.
|
|
|
|
*/
|
|
|
|
if (bytes != 4 || (coff & 0x3) != 0)
|
2014-08-08 03:49:01 +00:00
|
|
|
return;
|
2022-04-01 08:18:52 +00:00
|
|
|
|
|
|
|
if (is_pcir_bar(coff)) {
|
2022-03-10 10:28:06 +00:00
|
|
|
idx = (coff - PCIR_BAR(0)) / 4;
|
2022-04-01 08:18:52 +00:00
|
|
|
} else if (is_pcir_bios(coff)) {
|
2022-03-10 10:28:06 +00:00
|
|
|
idx = PCI_ROM_IDX;
|
2022-04-01 08:18:52 +00:00
|
|
|
} else {
|
|
|
|
errx(4, "%s: invalid BAR offset %d", __func__,
|
|
|
|
coff);
|
2022-03-10 10:28:06 +00:00
|
|
|
}
|
2022-04-01 08:18:52 +00:00
|
|
|
|
2013-04-10 02:12:39 +00:00
|
|
|
mask = ~(pi->pi_bar[idx].size - 1);
|
2011-05-13 04:54:01 +00:00
|
|
|
switch (pi->pi_bar[idx].type) {
|
|
|
|
case PCIBAR_NONE:
|
2013-04-10 02:12:39 +00:00
|
|
|
pi->pi_bar[idx].addr = bar = 0;
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_IO:
|
2023-06-28 20:06:37 +00:00
|
|
|
addr = *valp & mask;
|
2013-04-10 02:12:39 +00:00
|
|
|
addr &= 0xffff;
|
2021-11-18 15:25:09 +00:00
|
|
|
bar = addr | pi->pi_bar[idx].lobits;
|
2013-04-10 02:12:39 +00:00
|
|
|
/*
|
|
|
|
* Register the new BAR value for interception
|
|
|
|
*/
|
|
|
|
if (addr != pi->pi_bar[idx].addr) {
|
|
|
|
update_bar_address(pi, addr, idx,
|
|
|
|
PCIBAR_IO);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_MEM32:
|
2023-06-28 20:06:37 +00:00
|
|
|
addr = bar = *valp & mask;
|
2021-11-18 15:25:09 +00:00
|
|
|
bar |= pi->pi_bar[idx].lobits;
|
2013-04-10 02:12:39 +00:00
|
|
|
if (addr != pi->pi_bar[idx].addr) {
|
|
|
|
update_bar_address(pi, addr, idx,
|
|
|
|
PCIBAR_MEM32);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_MEM64:
|
2023-06-28 20:06:37 +00:00
|
|
|
addr = bar = *valp & mask;
|
2021-11-18 15:25:09 +00:00
|
|
|
bar |= pi->pi_bar[idx].lobits;
|
2013-04-10 02:12:39 +00:00
|
|
|
if (addr != (uint32_t)pi->pi_bar[idx].addr) {
|
|
|
|
update_bar_address(pi, addr, idx,
|
|
|
|
PCIBAR_MEM64);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
|
|
|
case PCIBAR_MEMHI64:
|
|
|
|
mask = ~(pi->pi_bar[idx - 1].size - 1);
|
2023-06-28 20:06:37 +00:00
|
|
|
addr = ((uint64_t)*valp << 32) & mask;
|
2013-04-10 02:12:39 +00:00
|
|
|
bar = addr >> 32;
|
|
|
|
if (bar != pi->pi_bar[idx - 1].addr >> 32) {
|
|
|
|
update_bar_address(pi, addr, idx - 1,
|
|
|
|
PCIBAR_MEMHI64);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
break;
|
2022-03-10 10:28:06 +00:00
|
|
|
case PCIBAR_ROM:
|
2023-06-28 20:06:37 +00:00
|
|
|
addr = bar = *valp & mask;
|
2022-03-10 10:28:06 +00:00
|
|
|
if (memen(pi) && romen(pi)) {
|
|
|
|
unregister_bar(pi, idx);
|
|
|
|
}
|
|
|
|
pi->pi_bar[idx].addr = addr;
|
2023-06-28 20:06:37 +00:00
|
|
|
pi->pi_bar[idx].lobits = *valp &
|
2022-03-10 10:28:06 +00:00
|
|
|
PCIM_BIOS_ENABLE;
|
|
|
|
/* romen could have changed it value */
|
|
|
|
if (memen(pi) && romen(pi)) {
|
|
|
|
register_bar(pi, idx);
|
|
|
|
}
|
|
|
|
bar |= pi->pi_bar[idx].lobits;
|
|
|
|
break;
|
2011-05-13 04:54:01 +00:00
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
pci_set_cfgdata32(pi, coff, bar);
|
2012-04-28 16:28:00 +00:00
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
} else if (pci_emul_iscap(pi, coff)) {
|
2023-06-28 20:06:37 +00:00
|
|
|
pci_emul_capwrite(pi, coff, bytes, *valp, 0, 0);
|
2015-04-24 19:15:38 +00:00
|
|
|
} else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
|
2023-06-28 20:06:37 +00:00
|
|
|
pci_emul_cmdsts_write(pi, coff, *valp, bytes);
|
2011-05-13 04:54:01 +00:00
|
|
|
} else {
|
2023-06-28 20:06:37 +00:00
|
|
|
CFGWRITE(pi, coff, *valp, bytes);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
}
|
2014-08-08 03:49:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
|
|
|
|
|
|
|
|
static int
|
2022-12-09 18:35:44 +00:00
|
|
|
pci_emul_cfgaddr(struct vmctx *ctx __unused, int in,
|
2022-10-08 15:22:38 +00:00
|
|
|
int port __unused, int bytes, uint32_t *eax, void *arg __unused)
|
2014-08-08 03:49:01 +00:00
|
|
|
{
|
|
|
|
uint32_t x;
|
|
|
|
|
|
|
|
if (bytes != 4) {
|
|
|
|
if (in)
|
|
|
|
*eax = (bytes == 2) ? 0xffff : 0xff;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in) {
|
|
|
|
x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff;
|
|
|
|
if (cfgenable)
|
|
|
|
x |= CONF1_ENABLE;
|
|
|
|
*eax = x;
|
|
|
|
} else {
|
|
|
|
x = *eax;
|
|
|
|
cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
|
2021-10-15 07:25:54 +00:00
|
|
|
cfgoff = (x & PCI_REGMAX) & ~0x03;
|
2014-08-08 03:49:01 +00:00
|
|
|
cfgfunc = (x >> 8) & PCI_FUNCMAX;
|
|
|
|
cfgslot = (x >> 11) & PCI_SLOTMAX;
|
|
|
|
cfgbus = (x >> 16) & PCI_BUSMAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
|
|
|
|
|
|
|
|
static int
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_emul_cfgdata(struct vmctx *ctx __unused, int in, int port,
|
2022-12-09 18:35:28 +00:00
|
|
|
int bytes, uint32_t *eax, void *arg __unused)
|
2014-08-08 03:49:01 +00:00
|
|
|
{
|
|
|
|
int coff;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2014-08-08 03:49:01 +00:00
|
|
|
assert(bytes == 1 || bytes == 2 || bytes == 4);
|
|
|
|
|
|
|
|
coff = cfgoff + (port - CONF1_DATA_PORT);
|
|
|
|
if (cfgenable) {
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_cfgrw(in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax);
|
2014-08-08 03:49:01 +00:00
|
|
|
} else {
|
|
|
|
/* Ignore accesses to cfgdata if not enabled by cfgaddr */
|
|
|
|
if (in)
|
|
|
|
*eax = 0xffffffff;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
|
|
|
|
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
|
|
|
|
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
|
|
|
|
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
|
|
|
|
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
#ifdef BHYVE_SNAPSHOT
|
|
|
|
/*
|
|
|
|
* Saves/restores PCI device emulated state. Returns 0 on success.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
|
|
|
|
{
|
|
|
|
struct pci_devinst *pi;
|
|
|
|
int i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
pi = meta->dev_data;
|
|
|
|
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
|
|
|
|
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
|
|
|
|
|
|
|
|
SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
|
|
|
|
meta, ret, done);
|
|
|
|
|
2022-10-23 14:32:45 +00:00
|
|
|
for (i = 0; i < (int)nitems(pi->pi_bar); i++) {
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Restore MSI-X table. */
|
|
|
|
for (i = 0; i < pi->pi_msix.table_count; i++) {
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
|
|
|
|
meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
|
|
|
|
meta, ret, done);
|
|
|
|
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
|
|
|
|
meta, ret, done);
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pci_snapshot(struct vm_snapshot_meta *meta)
|
|
|
|
{
|
|
|
|
struct pci_devemu *pde;
|
|
|
|
struct pci_devinst *pdi;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
assert(meta->dev_name != NULL);
|
|
|
|
|
2023-05-15 14:29:04 +00:00
|
|
|
pdi = meta->dev_data;
|
|
|
|
pde = pdi->pi_d;
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
|
2023-05-15 14:29:04 +00:00
|
|
|
if (pde->pe_snapshot == NULL)
|
|
|
|
return (ENOTSUP);
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
|
|
|
|
ret = pci_snapshot_pci_dev(meta);
|
2023-05-15 14:29:04 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = (*pde->pe_snapshot)(meta);
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2023-05-15 14:29:04 +00:00
|
|
|
pci_pause(struct pci_devinst *pdi)
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
{
|
2023-05-15 14:29:04 +00:00
|
|
|
struct pci_devemu *pde = pdi->pi_d;
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
|
|
|
|
if (pde->pe_pause == NULL) {
|
|
|
|
/* The pause/resume functionality is optional. */
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2023-01-19 18:30:18 +00:00
|
|
|
return (*pde->pe_pause)(pdi);
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2023-05-15 14:29:04 +00:00
|
|
|
pci_resume(struct pci_devinst *pdi)
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
{
|
2023-05-15 14:29:04 +00:00
|
|
|
struct pci_devemu *pde = pdi->pi_d;
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
|
|
|
|
if (pde->pe_resume == NULL) {
|
|
|
|
/* The pause/resume functionality is optional. */
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2023-01-19 18:30:18 +00:00
|
|
|
return (*pde->pe_resume)(pdi);
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
#define PCI_EMUL_TEST
|
|
|
|
#ifdef PCI_EMUL_TEST
|
|
|
|
/*
|
|
|
|
* Define a dummy test device
|
|
|
|
*/
|
2014-02-14 21:34:08 +00:00
|
|
|
#define DIOSZ 8
|
2012-10-19 18:11:17 +00:00
|
|
|
#define DMEMSZ 4096
|
2011-05-13 04:54:01 +00:00
|
|
|
struct pci_emul_dsoftc {
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
uint8_t ioregs[DIOSZ];
|
2015-05-02 03:25:24 +00:00
|
|
|
uint8_t memregs[2][DMEMSZ];
|
2011-05-13 04:54:01 +00:00
|
|
|
};
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
#define PCI_EMUL_MSI_MSGS 4
|
|
|
|
#define PCI_EMUL_MSIX_MSGS 16
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2011-05-15 04:03:11 +00:00
|
|
|
static int
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_emul_dinit(struct pci_devinst *pi, nvlist_t *nvl __unused)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct pci_emul_dsoftc *sc;
|
|
|
|
|
2014-04-22 18:55:21 +00:00
|
|
|
sc = calloc(1, sizeof(struct pci_emul_dsoftc));
|
2011-05-13 04:54:01 +00:00
|
|
|
|
|
|
|
pi->pi_arg = sc;
|
|
|
|
|
|
|
|
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
|
|
|
|
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
|
|
|
|
pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
|
2011-05-13 04:54:01 +00:00
|
|
|
assert(error == 0);
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
|
2011-05-13 04:54:01 +00:00
|
|
|
assert(error == 0);
|
|
|
|
|
2015-05-02 03:25:24 +00:00
|
|
|
error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
|
|
|
|
assert(error == 0);
|
|
|
|
|
2011-05-13 04:54:01 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2011-05-15 04:03:11 +00:00
|
|
|
static void
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_emul_diow(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
|
2022-10-08 15:22:38 +00:00
|
|
|
uint64_t value)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
if (baridx == 0) {
|
|
|
|
if (offset + size > DIOSZ) {
|
|
|
|
printf("diow: iow too large, offset %ld size %d\n",
|
|
|
|
offset, size);
|
|
|
|
return;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
if (size == 1) {
|
|
|
|
sc->ioregs[offset] = value & 0xff;
|
|
|
|
} else if (size == 2) {
|
|
|
|
*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
|
|
|
|
} else if (size == 4) {
|
|
|
|
*(uint32_t *)&sc->ioregs[offset] = value;
|
|
|
|
} else {
|
|
|
|
printf("diow: iow unknown size %d\n", size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Special magic value to generate an interrupt
|
|
|
|
*/
|
|
|
|
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
|
2013-12-16 19:59:31 +00:00
|
|
|
pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
|
2012-10-19 18:11:17 +00:00
|
|
|
|
|
|
|
if (value == 0xabcdef) {
|
2013-12-16 19:59:31 +00:00
|
|
|
for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
|
2012-10-19 18:11:17 +00:00
|
|
|
pci_generate_msi(pi, i);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
2015-05-02 03:25:24 +00:00
|
|
|
if (baridx == 1 || baridx == 2) {
|
2012-10-19 18:11:17 +00:00
|
|
|
if (offset + size > DMEMSZ) {
|
|
|
|
printf("diow: memw too large, offset %ld size %d\n",
|
|
|
|
offset, size);
|
|
|
|
return;
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2015-05-02 03:25:24 +00:00
|
|
|
i = baridx - 1; /* 'memregs' index */
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
if (size == 1) {
|
2015-05-02 03:25:24 +00:00
|
|
|
sc->memregs[i][offset] = value;
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 2) {
|
2015-05-02 03:25:24 +00:00
|
|
|
*(uint16_t *)&sc->memregs[i][offset] = value;
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 4) {
|
2015-05-02 03:25:24 +00:00
|
|
|
*(uint32_t *)&sc->memregs[i][offset] = value;
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 8) {
|
2015-05-02 03:25:24 +00:00
|
|
|
*(uint64_t *)&sc->memregs[i][offset] = value;
|
2012-10-19 18:11:17 +00:00
|
|
|
} else {
|
|
|
|
printf("diow: memw unknown size %d\n", size);
|
|
|
|
}
|
2021-12-26 07:52:38 +00:00
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
/*
|
|
|
|
* magic interrupt ??
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2016-05-13 14:59:02 +00:00
|
|
|
if (baridx > 2 || baridx < 0) {
|
2012-10-19 18:11:17 +00:00
|
|
|
printf("diow: unknown bar idx %d\n", baridx);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
static uint64_t
|
2023-01-19 18:30:18 +00:00
|
|
|
pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
|
2011-05-13 04:54:01 +00:00
|
|
|
{
|
|
|
|
struct pci_emul_dsoftc *sc = pi->pi_arg;
|
|
|
|
uint32_t value;
|
2015-05-02 03:25:24 +00:00
|
|
|
int i;
|
2011-05-13 04:54:01 +00:00
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
if (baridx == 0) {
|
|
|
|
if (offset + size > DIOSZ) {
|
|
|
|
printf("dior: ior too large, offset %ld size %d\n",
|
|
|
|
offset, size);
|
|
|
|
return (0);
|
|
|
|
}
|
2021-12-26 07:52:38 +00:00
|
|
|
|
2016-03-31 19:07:03 +00:00
|
|
|
value = 0;
|
2012-10-19 18:11:17 +00:00
|
|
|
if (size == 1) {
|
|
|
|
value = sc->ioregs[offset];
|
|
|
|
} else if (size == 2) {
|
|
|
|
value = *(uint16_t *) &sc->ioregs[offset];
|
|
|
|
} else if (size == 4) {
|
|
|
|
value = *(uint32_t *) &sc->ioregs[offset];
|
|
|
|
} else {
|
|
|
|
printf("dior: ior unknown size %d\n", size);
|
|
|
|
}
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
2015-05-02 03:25:24 +00:00
|
|
|
|
|
|
|
if (baridx == 1 || baridx == 2) {
|
2012-10-19 18:11:17 +00:00
|
|
|
if (offset + size > DMEMSZ) {
|
|
|
|
printf("dior: memr too large, offset %ld size %d\n",
|
|
|
|
offset, size);
|
|
|
|
return (0);
|
|
|
|
}
|
2021-12-26 07:52:38 +00:00
|
|
|
|
2015-05-02 03:25:24 +00:00
|
|
|
i = baridx - 1; /* 'memregs' index */
|
|
|
|
|
2012-10-19 18:11:17 +00:00
|
|
|
if (size == 1) {
|
2015-05-02 03:25:24 +00:00
|
|
|
value = sc->memregs[i][offset];
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 2) {
|
2015-05-02 03:25:24 +00:00
|
|
|
value = *(uint16_t *) &sc->memregs[i][offset];
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 4) {
|
2015-05-02 03:25:24 +00:00
|
|
|
value = *(uint32_t *) &sc->memregs[i][offset];
|
2012-10-19 18:11:17 +00:00
|
|
|
} else if (size == 8) {
|
2015-05-02 03:25:24 +00:00
|
|
|
value = *(uint64_t *) &sc->memregs[i][offset];
|
2012-10-19 18:11:17 +00:00
|
|
|
} else {
|
|
|
|
printf("dior: ior unknown size %d\n", size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-05-13 14:59:02 +00:00
|
|
|
if (baridx > 2 || baridx < 0) {
|
2012-10-19 18:11:17 +00:00
|
|
|
printf("dior: unknown bar idx %d\n", baridx);
|
|
|
|
return (0);
|
2011-05-13 04:54:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (value);
|
|
|
|
}
|
|
|
|
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
#ifdef BHYVE_SNAPSHOT
|
2023-05-15 14:29:04 +00:00
|
|
|
struct pci_devinst *
|
|
|
|
pci_next(const struct pci_devinst *cursor)
|
|
|
|
{
|
|
|
|
unsigned bus = 0, slot = 0, func = 0;
|
|
|
|
struct businfo *bi;
|
|
|
|
struct slotinfo *si;
|
|
|
|
struct funcinfo *fi;
|
|
|
|
|
|
|
|
bus = cursor ? cursor->pi_bus : 0;
|
|
|
|
slot = cursor ? cursor->pi_slot : 0;
|
|
|
|
func = cursor ? (cursor->pi_func + 1) : 0;
|
|
|
|
|
|
|
|
for (; bus < MAXBUSES; bus++) {
|
|
|
|
if ((bi = pci_businfo[bus]) == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (slot >= MAXSLOTS)
|
|
|
|
slot = 0;
|
|
|
|
|
|
|
|
for (; slot < MAXSLOTS; slot++) {
|
|
|
|
si = &bi->slotinfo[slot];
|
|
|
|
if (func >= MAXFUNCS)
|
|
|
|
func = 0;
|
|
|
|
for (; func < MAXFUNCS; func++) {
|
|
|
|
fi = &si->si_funcs[func];
|
|
|
|
if (fi->fi_devi == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return (fi->fi_devi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2022-10-22 17:41:33 +00:00
|
|
|
static int
|
2022-10-08 15:22:38 +00:00
|
|
|
pci_emul_snapshot(struct vm_snapshot_meta *meta __unused)
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2022-08-16 17:12:32 +00:00
|
|
|
static const struct pci_devemu pci_dummy = {
|
2011-05-13 04:54:01 +00:00
|
|
|
.pe_emu = "dummy",
|
|
|
|
.pe_init = pci_emul_dinit,
|
2012-10-19 18:11:17 +00:00
|
|
|
.pe_barwrite = pci_emul_diow,
|
Initial support for bhyve save and restore.
Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed. In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken). A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.
To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.
While the current implementation is useful for several uses cases, it
has a few limitations. The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system). In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions. The file format also does not currently support
versioning of individual chunks of state. As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files. The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility. As a result, the current implementation is not enabled
by default. It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.
Submitted by: Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by: Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes: yes
Sponsored by: University Politehnica of Bucharest
Sponsored by: Matthew Grooms (student scholarships)
Sponsored by: iXsystems
Differential Revision: https://reviews.freebsd.org/D19495
2020-05-05 00:02:04 +00:00
|
|
|
.pe_barread = pci_emul_dior,
|
|
|
|
#ifdef BHYVE_SNAPSHOT
|
|
|
|
.pe_snapshot = pci_emul_snapshot,
|
|
|
|
#endif
|
2011-05-13 04:54:01 +00:00
|
|
|
};
|
|
|
|
PCI_EMUL_SET(pci_dummy);
|
|
|
|
|
|
|
|
#endif /* PCI_EMUL_TEST */
|