freebsd-dev/usr.sbin/bhyve/pci_emul.h
John Baldwin 621b509048 Refactor configuration management in bhyve.
Replace the existing ad-hoc configuration via various global variables
with a small database of key-value pairs.  The database supports
heirarchical keys using a MIB-like syntax to name the path to a given
key.  Values are always stored as strings.  The API used to manage
configuation values does include wrappers to handling boolean values.
Other values use non-string types require parsing by consumers.

The configuration values are stored in a tree using nvlists.  Leaf
nodes hold string values.  Configuration values are permitted to
reference other configuration values using '%(name)'.  This permits
constructing template configurations.

All existing command line arguments now set configuration values.  For
devices, the "-s" option parses its option argument to generate a list
of key-value pairs for the given device.

A new '-o' command line option permits setting an individual
configuration variable.  The key name is always given as a full path
of dot-separated components.

A new '-k' command line option parses a simple configuration file.
This configuration file holds a flat list of 'key=value' lines where
the 'key' is the full path of a configuration variable.  Lines
starting with a '#' are comments.

In general, bhyve starts by parsing command line options in sequence
and applying those settings to configuration values.  Once this is
complete, bhyve then begins initializing its state based on the
configuration values.  This means that subsequent configuration
options or files may override or supplement previously given settings.

A special 'config.dump' configuration value can be set to true to help
debug configuration issues.  When this value is set, bhyve will print
out the configuration variables as a flat list of 'key=value' lines.

Most command line argments map to a single configuration variable,
e.g.  '-w' sets the 'x86.strictmsr' value to false.  A few command
line arguments have less obvious effects:

- Multiple '-p' options append their values (as a comma-seperated
  list) to "vcpu.N.cpuset" values (where N is a decimal vcpu number).

- For '-s' options, a pci.<bus>.<slot>.<function> node is created.
  The first argument to '-s' (the device type) is used as the value of
  a "device" variable.  Additional comma-separated arguments are then
  parsed into 'key=value' pairs and used to set additional variables
  under the device node.  A PCI device emulation driver can provide
  its own hook to override the parsing of the additonal '-s' arguments
  after the device type.

  After the configuration phase as completed, the init_pci hook
  then walks the "pci.<bus>.<slot>.<func>" nodes.  It uses the
  "device" value to find the device model to use.  The device
  model's init routine is passed a reference to its nvlist node
  in the configuration tree which it can query for specific
  variables.

  The result is that a lot of the string parsing is removed from
  the device models and centralized.  In addition, adding a new
  variable just requires teaching the model to look for the new
  variable.

- For '-l' options, a similar model is used where the string is
  parsed into values that are later read during initialization.
  One key note here is that the serial ports use the commonly
  used lowercase names from existing documentation and examples
  (e.g. "lpc.com1") instead of the uppercase names previously
  used internally in bhyve.

Reviewed by:	grehan
MFC after:	3 months
Differential Revision:	https://reviews.freebsd.org/D26035
2021-03-18 16:30:26 -07:00

305 lines
8.6 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PCI_EMUL_H_
#define _PCI_EMUL_H_
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/kernel.h>
#include <sys/nv.h>
#include <sys/_pthreadtypes.h>
#include <dev/pci/pcireg.h>
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
struct vmctx;
struct pci_devinst;
struct memory_region;
struct vm_snapshot_meta;
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
/* instance creation */
int (*pe_init)(struct vmctx *, struct pci_devinst *,
nvlist_t *);
int (*pe_legacy_config)(nvlist_t *, const char *);
const char *pe_alias;
/* ACPI DSDT enumeration */
void (*pe_write_dsdt)(struct pci_devinst *);
/* config space read/write callbacks */
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int offset,
int bytes, uint32_t val);
int (*pe_cfgread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int offset,
int bytes, uint32_t *retval);
/* BAR read/write callbacks */
void (*pe_barwrite)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value);
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
/* Save/restore device state */
int (*pe_snapshot)(struct vm_snapshot_meta *meta);
int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);
int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
enum pcibar_type {
PCIBAR_NONE,
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
PCIBAR_MEMHI64
};
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
};
#define PI_NAMESZ 40
struct msix_table_entry {
uint64_t addr;
uint32_t msg_data;
uint32_t vector_control;
} __packed;
/*
* In case the structure is modified to hold extra information, use a define
* for the size that should be emulated.
*/
#define MSIX_TABLE_ENTRY_SIZE 16
#define MAX_MSIX_TABLE_ENTRIES 2048
#define PBA_SIZE(msgnum) (roundup2((msgnum), 64) / 8)
enum lintr_stat {
IDLE,
ASSERTED,
PENDING
};
struct pci_devinst {
struct pci_devemu *pi_d;
struct vmctx *pi_vmctx;
uint8_t pi_bus, pi_slot, pi_func;
char pi_name[PI_NAMESZ];
int pi_bar_getsize;
int pi_prevcap;
int pi_capend;
struct {
int8_t pin;
enum lintr_stat state;
int pirq_pin;
int ioapic_irq;
pthread_mutex_t lock;
} pi_lintr;
struct {
int enabled;
uint64_t addr;
uint64_t msg_data;
int maxmsgnum;
} pi_msi;
struct {
int enabled;
int table_bar;
int pba_bar;
uint32_t table_offset;
int table_count;
uint32_t pba_offset;
int pba_size;
int function_mask;
struct msix_table_entry *table; /* allocated at runtime */
void *pba_page;
int pba_page_offset;
} pi_msix;
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
struct pcibar pi_bar[PCI_BARMAX + 1];
};
struct msicap {
uint8_t capid;
uint8_t nextptr;
uint16_t msgctrl;
uint32_t addrlo;
uint32_t addrhi;
uint16_t msgdata;
} __packed;
static_assert(sizeof(struct msicap) == 14, "compile-time assertion failed");
struct msixcap {
uint8_t capid;
uint8_t nextptr;
uint16_t msgctrl;
uint32_t table_info; /* bar index and offset within it */
uint32_t pba_info; /* bar index and offset within it */
} __packed;
static_assert(sizeof(struct msixcap) == 12, "compile-time assertion failed");
struct pciecap {
uint8_t capid;
uint8_t nextptr;
uint16_t pcie_capabilities;
uint32_t dev_capabilities; /* all devices */
uint16_t dev_control;
uint16_t dev_status;
uint32_t link_capabilities; /* devices with links */
uint16_t link_control;
uint16_t link_status;
uint32_t slot_capabilities; /* ports with slots */
uint16_t slot_control;
uint16_t slot_status;
uint16_t root_control; /* root ports */
uint16_t root_capabilities;
uint32_t root_status;
uint32_t dev_capabilities2; /* all devices */
uint16_t dev_control2;
uint16_t dev_status2;
uint32_t link_capabilities2; /* devices with links */
uint16_t link_control2;
uint16_t link_status2;
uint32_t slot_capabilities2; /* ports with slots */
uint16_t slot_control2;
uint16_t slot_status2;
} __packed;
static_assert(sizeof(struct pciecap) == 60, "compile-time assertion failed");
typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
int ioapic_irq, void *arg);
int init_pci(struct vmctx *ctx);
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
uint32_t val, uint8_t capoff, int capid);
void pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old);
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
void pci_generate_msix(struct pci_devinst *pi, int msgnum);
void pci_lintr_assert(struct pci_devinst *pi);
void pci_lintr_deassert(struct pci_devinst *pi);
void pci_lintr_request(struct pci_devinst *pi);
int pci_msi_enabled(struct pci_devinst *pi);
int pci_msix_enabled(struct pci_devinst *pi);
int pci_msix_table_bar(struct pci_devinst *pi);
int pci_msix_pba_bar(struct pci_devinst *pi);
int pci_msi_maxmsgnum(struct pci_devinst *pi);
int pci_parse_legacy_config(nvlist_t *nvl, const char *opt);
int pci_parse_slot(char *opt);
void pci_print_supported_devices();
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum);
int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
uint64_t value);
uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size);
int pci_count_lintr(int bus);
void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg);
void pci_write_dsdt(void);
uint64_t pci_ecfg_base(void);
int pci_bus_configured(int bus);
#ifdef BHYVE_SNAPSHOT
int pci_snapshot(struct vm_snapshot_meta *meta);
int pci_pause(struct vmctx *ctx, const char *dev_name);
int pci_resume(struct vmctx *ctx, const char *dev_name);
#endif
static __inline void
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
{
assert(offset <= PCI_REGMAX);
*(uint8_t *)(pi->pi_cfgdata + offset) = val;
}
static __inline void
pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
{
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
*(uint16_t *)(pi->pi_cfgdata + offset) = val;
}
static __inline void
pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
{
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
*(uint32_t *)(pi->pi_cfgdata + offset) = val;
}
static __inline uint8_t
pci_get_cfgdata8(struct pci_devinst *pi, int offset)
{
assert(offset <= PCI_REGMAX);
return (*(uint8_t *)(pi->pi_cfgdata + offset));
}
static __inline uint16_t
pci_get_cfgdata16(struct pci_devinst *pi, int offset)
{
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
return (*(uint16_t *)(pi->pi_cfgdata + offset));
}
static __inline uint32_t
pci_get_cfgdata32(struct pci_devinst *pi, int offset)
{
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
return (*(uint32_t *)(pi->pi_cfgdata + offset));
}
#endif /* _PCI_EMUL_H_ */