Create branch for bhyve graphics import.
This commit is contained in:
commit
361da73864
55
Makefile
Normal file
55
Makefile
Normal file
@ -0,0 +1,55 @@
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PROG= bhyve
|
||||
PACKAGE= bhyve
|
||||
|
||||
DEBUG_FLAGS= -g -O0
|
||||
|
||||
MAN= bhyve.8
|
||||
|
||||
BHYVE_SYSDIR?=${SRCTOP}
|
||||
|
||||
SRCS= \
|
||||
atkbdc.c \
|
||||
acpi.c \
|
||||
bhyverun.c \
|
||||
block_if.c \
|
||||
bootrom.c \
|
||||
consport.c \
|
||||
dbgport.c \
|
||||
fwctl.c \
|
||||
inout.c \
|
||||
ioapic.c \
|
||||
mem.c \
|
||||
mevent.c \
|
||||
mptbl.c \
|
||||
pci_ahci.c \
|
||||
pci_emul.c \
|
||||
pci_hostbridge.c \
|
||||
pci_irq.c \
|
||||
pci_lpc.c \
|
||||
pci_passthru.c \
|
||||
pci_virtio_block.c \
|
||||
pci_virtio_net.c \
|
||||
pci_virtio_rnd.c \
|
||||
pci_uart.c \
|
||||
pm.c \
|
||||
post.c \
|
||||
rtc.c \
|
||||
smbiostbl.c \
|
||||
task_switch.c \
|
||||
uart_emul.c \
|
||||
virtio.c \
|
||||
xmsr.c \
|
||||
spinup_ap.c
|
||||
|
||||
.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
|
||||
SRCS+= vmm_instruction_emul.c
|
||||
|
||||
LIBADD= vmmapi md pthread
|
||||
|
||||
WARNS?= 2
|
||||
|
||||
.include <bsd.prog.mk>
|
22
Makefile.depend
Normal file
22
Makefile.depend
Normal file
@ -0,0 +1,22 @@
|
||||
# $FreeBSD$
|
||||
# Autogenerated - do NOT edit!
|
||||
|
||||
DIRDEPS = \
|
||||
gnu/lib/csu \
|
||||
gnu/lib/libgcc \
|
||||
include \
|
||||
include/xlocale \
|
||||
lib/${CSU_DIR} \
|
||||
lib/libc \
|
||||
lib/libcompiler_rt \
|
||||
lib/libmd \
|
||||
lib/libthr \
|
||||
lib/libutil \
|
||||
lib/libvmmapi \
|
||||
|
||||
|
||||
.include <dirdeps.mk>
|
||||
|
||||
.if ${DEP_RELDIR} == ${_DEP_RELDIR}
|
||||
# local dependencies - needed for -jN in clean tree
|
||||
.endif
|
54
acpi.h
Normal file
54
acpi.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _ACPI_H_
|
||||
#define _ACPI_H_
|
||||
|
||||
#define SCI_INT 9
|
||||
|
||||
#define SMI_CMD 0xb2
|
||||
#define BHYVE_ACPI_ENABLE 0xa0
|
||||
#define BHYVE_ACPI_DISABLE 0xa1
|
||||
|
||||
#define PM1A_EVT_ADDR 0x400
|
||||
#define PM1A_CNT_ADDR 0x404
|
||||
|
||||
#define IO_PMTMR 0x408 /* 4-byte i/o port for the timer */
|
||||
|
||||
struct vmctx;
|
||||
|
||||
int acpi_build(struct vmctx *ctx, int ncpu);
|
||||
void dsdt_line(const char *fmt, ...);
|
||||
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
|
||||
void dsdt_fixed_irq(uint8_t irq);
|
||||
void dsdt_fixed_mem32(uint32_t base, uint32_t length);
|
||||
void dsdt_indent(int levels);
|
||||
void dsdt_unindent(int levels);
|
||||
void sci_init(struct vmctx *ctx);
|
||||
|
||||
#endif /* _ACPI_H_ */
|
322
ahci.h
Normal file
322
ahci.h
Normal file
@ -0,0 +1,322 @@
|
||||
/*-
|
||||
* Copyright (c) 1998 - 2008 Søren Schmidt <sos@FreeBSD.org>
|
||||
* Copyright (c) 2009-2012 Alexander Motin <mav@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer,
|
||||
* without modification, immediately at the beginning of the file.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _AHCI_H_
|
||||
#define _AHCI_H_
|
||||
|
||||
/* ATA register defines */
|
||||
#define ATA_DATA 0 /* (RW) data */
|
||||
|
||||
#define ATA_FEATURE 1 /* (W) feature */
|
||||
#define ATA_F_DMA 0x01 /* enable DMA */
|
||||
#define ATA_F_OVL 0x02 /* enable overlap */
|
||||
|
||||
#define ATA_COUNT 2 /* (W) sector count */
|
||||
|
||||
#define ATA_SECTOR 3 /* (RW) sector # */
|
||||
#define ATA_CYL_LSB 4 /* (RW) cylinder# LSB */
|
||||
#define ATA_CYL_MSB 5 /* (RW) cylinder# MSB */
|
||||
#define ATA_DRIVE 6 /* (W) Sector/Drive/Head */
|
||||
#define ATA_D_LBA 0x40 /* use LBA addressing */
|
||||
#define ATA_D_IBM 0xa0 /* 512 byte sectors, ECC */
|
||||
|
||||
#define ATA_COMMAND 7 /* (W) command */
|
||||
|
||||
#define ATA_ERROR 8 /* (R) error */
|
||||
#define ATA_E_ILI 0x01 /* illegal length */
|
||||
#define ATA_E_NM 0x02 /* no media */
|
||||
#define ATA_E_ABORT 0x04 /* command aborted */
|
||||
#define ATA_E_MCR 0x08 /* media change request */
|
||||
#define ATA_E_IDNF 0x10 /* ID not found */
|
||||
#define ATA_E_MC 0x20 /* media changed */
|
||||
#define ATA_E_UNC 0x40 /* uncorrectable data */
|
||||
#define ATA_E_ICRC 0x80 /* UDMA crc error */
|
||||
#define ATA_E_ATAPI_SENSE_MASK 0xf0 /* ATAPI sense key mask */
|
||||
|
||||
#define ATA_IREASON 9 /* (R) interrupt reason */
|
||||
#define ATA_I_CMD 0x01 /* cmd (1) | data (0) */
|
||||
#define ATA_I_IN 0x02 /* read (1) | write (0) */
|
||||
#define ATA_I_RELEASE 0x04 /* released bus (1) */
|
||||
#define ATA_I_TAGMASK 0xf8 /* tag mask */
|
||||
|
||||
#define ATA_STATUS 10 /* (R) status */
|
||||
#define ATA_ALTSTAT 11 /* (R) alternate status */
|
||||
#define ATA_S_ERROR 0x01 /* error */
|
||||
#define ATA_S_INDEX 0x02 /* index */
|
||||
#define ATA_S_CORR 0x04 /* data corrected */
|
||||
#define ATA_S_DRQ 0x08 /* data request */
|
||||
#define ATA_S_DSC 0x10 /* drive seek completed */
|
||||
#define ATA_S_SERVICE 0x10 /* drive needs service */
|
||||
#define ATA_S_DWF 0x20 /* drive write fault */
|
||||
#define ATA_S_DMA 0x20 /* DMA ready */
|
||||
#define ATA_S_READY 0x40 /* drive ready */
|
||||
#define ATA_S_BUSY 0x80 /* busy */
|
||||
|
||||
#define ATA_CONTROL 12 /* (W) control */
|
||||
#define ATA_A_IDS 0x02 /* disable interrupts */
|
||||
#define ATA_A_RESET 0x04 /* RESET controller */
|
||||
#define ATA_A_4BIT 0x08 /* 4 head bits */
|
||||
#define ATA_A_HOB 0x80 /* High Order Byte enable */
|
||||
|
||||
/* SATA register defines */
|
||||
#define ATA_SSTATUS 13
|
||||
#define ATA_SS_DET_MASK 0x0000000f
|
||||
#define ATA_SS_DET_NO_DEVICE 0x00000000
|
||||
#define ATA_SS_DET_DEV_PRESENT 0x00000001
|
||||
#define ATA_SS_DET_PHY_ONLINE 0x00000003
|
||||
#define ATA_SS_DET_PHY_OFFLINE 0x00000004
|
||||
|
||||
#define ATA_SS_SPD_MASK 0x000000f0
|
||||
#define ATA_SS_SPD_NO_SPEED 0x00000000
|
||||
#define ATA_SS_SPD_GEN1 0x00000010
|
||||
#define ATA_SS_SPD_GEN2 0x00000020
|
||||
#define ATA_SS_SPD_GEN3 0x00000030
|
||||
|
||||
#define ATA_SS_IPM_MASK 0x00000f00
|
||||
#define ATA_SS_IPM_NO_DEVICE 0x00000000
|
||||
#define ATA_SS_IPM_ACTIVE 0x00000100
|
||||
#define ATA_SS_IPM_PARTIAL 0x00000200
|
||||
#define ATA_SS_IPM_SLUMBER 0x00000600
|
||||
#define ATA_SS_IPM_DEVSLEEP 0x00000800
|
||||
|
||||
#define ATA_SERROR 14
|
||||
#define ATA_SE_DATA_CORRECTED 0x00000001
|
||||
#define ATA_SE_COMM_CORRECTED 0x00000002
|
||||
#define ATA_SE_DATA_ERR 0x00000100
|
||||
#define ATA_SE_COMM_ERR 0x00000200
|
||||
#define ATA_SE_PROT_ERR 0x00000400
|
||||
#define ATA_SE_HOST_ERR 0x00000800
|
||||
#define ATA_SE_PHY_CHANGED 0x00010000
|
||||
#define ATA_SE_PHY_IERROR 0x00020000
|
||||
#define ATA_SE_COMM_WAKE 0x00040000
|
||||
#define ATA_SE_DECODE_ERR 0x00080000
|
||||
#define ATA_SE_PARITY_ERR 0x00100000
|
||||
#define ATA_SE_CRC_ERR 0x00200000
|
||||
#define ATA_SE_HANDSHAKE_ERR 0x00400000
|
||||
#define ATA_SE_LINKSEQ_ERR 0x00800000
|
||||
#define ATA_SE_TRANSPORT_ERR 0x01000000
|
||||
#define ATA_SE_UNKNOWN_FIS 0x02000000
|
||||
#define ATA_SE_EXCHANGED 0x04000000
|
||||
|
||||
#define ATA_SCONTROL 15
|
||||
#define ATA_SC_DET_MASK 0x0000000f
|
||||
#define ATA_SC_DET_IDLE 0x00000000
|
||||
#define ATA_SC_DET_RESET 0x00000001
|
||||
#define ATA_SC_DET_DISABLE 0x00000004
|
||||
|
||||
#define ATA_SC_SPD_MASK 0x000000f0
|
||||
#define ATA_SC_SPD_NO_SPEED 0x00000000
|
||||
#define ATA_SC_SPD_SPEED_GEN1 0x00000010
|
||||
#define ATA_SC_SPD_SPEED_GEN2 0x00000020
|
||||
#define ATA_SC_SPD_SPEED_GEN3 0x00000030
|
||||
|
||||
#define ATA_SC_IPM_MASK 0x00000f00
|
||||
#define ATA_SC_IPM_NONE 0x00000000
|
||||
#define ATA_SC_IPM_DIS_PARTIAL 0x00000100
|
||||
#define ATA_SC_IPM_DIS_SLUMBER 0x00000200
|
||||
#define ATA_SC_IPM_DIS_DEVSLEEP 0x00000400
|
||||
|
||||
#define ATA_SACTIVE 16
|
||||
|
||||
#define AHCI_MAX_PORTS 32
|
||||
#define AHCI_MAX_SLOTS 32
|
||||
#define AHCI_MAX_IRQS 16
|
||||
|
||||
/* SATA AHCI v1.0 register defines */
|
||||
#define AHCI_CAP 0x00
|
||||
#define AHCI_CAP_NPMASK 0x0000001f
|
||||
#define AHCI_CAP_SXS 0x00000020
|
||||
#define AHCI_CAP_EMS 0x00000040
|
||||
#define AHCI_CAP_CCCS 0x00000080
|
||||
#define AHCI_CAP_NCS 0x00001F00
|
||||
#define AHCI_CAP_NCS_SHIFT 8
|
||||
#define AHCI_CAP_PSC 0x00002000
|
||||
#define AHCI_CAP_SSC 0x00004000
|
||||
#define AHCI_CAP_PMD 0x00008000
|
||||
#define AHCI_CAP_FBSS 0x00010000
|
||||
#define AHCI_CAP_SPM 0x00020000
|
||||
#define AHCI_CAP_SAM 0x00080000
|
||||
#define AHCI_CAP_ISS 0x00F00000
|
||||
#define AHCI_CAP_ISS_SHIFT 20
|
||||
#define AHCI_CAP_SCLO 0x01000000
|
||||
#define AHCI_CAP_SAL 0x02000000
|
||||
#define AHCI_CAP_SALP 0x04000000
|
||||
#define AHCI_CAP_SSS 0x08000000
|
||||
#define AHCI_CAP_SMPS 0x10000000
|
||||
#define AHCI_CAP_SSNTF 0x20000000
|
||||
#define AHCI_CAP_SNCQ 0x40000000
|
||||
#define AHCI_CAP_64BIT 0x80000000
|
||||
|
||||
#define AHCI_GHC 0x04
|
||||
#define AHCI_GHC_AE 0x80000000
|
||||
#define AHCI_GHC_MRSM 0x00000004
|
||||
#define AHCI_GHC_IE 0x00000002
|
||||
#define AHCI_GHC_HR 0x00000001
|
||||
|
||||
#define AHCI_IS 0x08
|
||||
#define AHCI_PI 0x0c
|
||||
#define AHCI_VS 0x10
|
||||
|
||||
#define AHCI_CCCC 0x14
|
||||
#define AHCI_CCCC_TV_MASK 0xffff0000
|
||||
#define AHCI_CCCC_TV_SHIFT 16
|
||||
#define AHCI_CCCC_CC_MASK 0x0000ff00
|
||||
#define AHCI_CCCC_CC_SHIFT 8
|
||||
#define AHCI_CCCC_INT_MASK 0x000000f8
|
||||
#define AHCI_CCCC_INT_SHIFT 3
|
||||
#define AHCI_CCCC_EN 0x00000001
|
||||
#define AHCI_CCCP 0x18
|
||||
|
||||
#define AHCI_EM_LOC 0x1C
|
||||
#define AHCI_EM_CTL 0x20
|
||||
#define AHCI_EM_MR 0x00000001
|
||||
#define AHCI_EM_TM 0x00000100
|
||||
#define AHCI_EM_RST 0x00000200
|
||||
#define AHCI_EM_LED 0x00010000
|
||||
#define AHCI_EM_SAFTE 0x00020000
|
||||
#define AHCI_EM_SES2 0x00040000
|
||||
#define AHCI_EM_SGPIO 0x00080000
|
||||
#define AHCI_EM_SMB 0x01000000
|
||||
#define AHCI_EM_XMT 0x02000000
|
||||
#define AHCI_EM_ALHD 0x04000000
|
||||
#define AHCI_EM_PM 0x08000000
|
||||
|
||||
#define AHCI_CAP2 0x24
|
||||
#define AHCI_CAP2_BOH 0x00000001
|
||||
#define AHCI_CAP2_NVMP 0x00000002
|
||||
#define AHCI_CAP2_APST 0x00000004
|
||||
#define AHCI_CAP2_SDS 0x00000008
|
||||
#define AHCI_CAP2_SADM 0x00000010
|
||||
#define AHCI_CAP2_DESO 0x00000020
|
||||
|
||||
#define AHCI_OFFSET 0x100
|
||||
#define AHCI_STEP 0x80
|
||||
|
||||
#define AHCI_P_CLB 0x00
|
||||
#define AHCI_P_CLBU 0x04
|
||||
#define AHCI_P_FB 0x08
|
||||
#define AHCI_P_FBU 0x0c
|
||||
#define AHCI_P_IS 0x10
|
||||
#define AHCI_P_IE 0x14
|
||||
#define AHCI_P_IX_DHR 0x00000001
|
||||
#define AHCI_P_IX_PS 0x00000002
|
||||
#define AHCI_P_IX_DS 0x00000004
|
||||
#define AHCI_P_IX_SDB 0x00000008
|
||||
#define AHCI_P_IX_UF 0x00000010
|
||||
#define AHCI_P_IX_DP 0x00000020
|
||||
#define AHCI_P_IX_PC 0x00000040
|
||||
#define AHCI_P_IX_MP 0x00000080
|
||||
|
||||
#define AHCI_P_IX_PRC 0x00400000
|
||||
#define AHCI_P_IX_IPM 0x00800000
|
||||
#define AHCI_P_IX_OF 0x01000000
|
||||
#define AHCI_P_IX_INF 0x04000000
|
||||
#define AHCI_P_IX_IF 0x08000000
|
||||
#define AHCI_P_IX_HBD 0x10000000
|
||||
#define AHCI_P_IX_HBF 0x20000000
|
||||
#define AHCI_P_IX_TFE 0x40000000
|
||||
#define AHCI_P_IX_CPD 0x80000000
|
||||
|
||||
#define AHCI_P_CMD 0x18
|
||||
#define AHCI_P_CMD_ST 0x00000001
|
||||
#define AHCI_P_CMD_SUD 0x00000002
|
||||
#define AHCI_P_CMD_POD 0x00000004
|
||||
#define AHCI_P_CMD_CLO 0x00000008
|
||||
#define AHCI_P_CMD_FRE 0x00000010
|
||||
#define AHCI_P_CMD_CCS_MASK 0x00001f00
|
||||
#define AHCI_P_CMD_CCS_SHIFT 8
|
||||
#define AHCI_P_CMD_ISS 0x00002000
|
||||
#define AHCI_P_CMD_FR 0x00004000
|
||||
#define AHCI_P_CMD_CR 0x00008000
|
||||
#define AHCI_P_CMD_CPS 0x00010000
|
||||
#define AHCI_P_CMD_PMA 0x00020000
|
||||
#define AHCI_P_CMD_HPCP 0x00040000
|
||||
#define AHCI_P_CMD_MPSP 0x00080000
|
||||
#define AHCI_P_CMD_CPD 0x00100000
|
||||
#define AHCI_P_CMD_ESP 0x00200000
|
||||
#define AHCI_P_CMD_FBSCP 0x00400000
|
||||
#define AHCI_P_CMD_APSTE 0x00800000
|
||||
#define AHCI_P_CMD_ATAPI 0x01000000
|
||||
#define AHCI_P_CMD_DLAE 0x02000000
|
||||
#define AHCI_P_CMD_ALPE 0x04000000
|
||||
#define AHCI_P_CMD_ASP 0x08000000
|
||||
#define AHCI_P_CMD_ICC_MASK 0xf0000000
|
||||
#define AHCI_P_CMD_NOOP 0x00000000
|
||||
#define AHCI_P_CMD_ACTIVE 0x10000000
|
||||
#define AHCI_P_CMD_PARTIAL 0x20000000
|
||||
#define AHCI_P_CMD_SLUMBER 0x60000000
|
||||
#define AHCI_P_CMD_DEVSLEEP 0x80000000
|
||||
|
||||
#define AHCI_P_TFD 0x20
|
||||
#define AHCI_P_SIG 0x24
|
||||
#define AHCI_P_SSTS 0x28
|
||||
#define AHCI_P_SCTL 0x2c
|
||||
#define AHCI_P_SERR 0x30
|
||||
#define AHCI_P_SACT 0x34
|
||||
#define AHCI_P_CI 0x38
|
||||
#define AHCI_P_SNTF 0x3C
|
||||
#define AHCI_P_FBS 0x40
|
||||
#define AHCI_P_FBS_EN 0x00000001
|
||||
#define AHCI_P_FBS_DEC 0x00000002
|
||||
#define AHCI_P_FBS_SDE 0x00000004
|
||||
#define AHCI_P_FBS_DEV 0x00000f00
|
||||
#define AHCI_P_FBS_DEV_SHIFT 8
|
||||
#define AHCI_P_FBS_ADO 0x0000f000
|
||||
#define AHCI_P_FBS_ADO_SHIFT 12
|
||||
#define AHCI_P_FBS_DWE 0x000f0000
|
||||
#define AHCI_P_FBS_DWE_SHIFT 16
|
||||
#define AHCI_P_DEVSLP 0x44
|
||||
#define AHCI_P_DEVSLP_ADSE 0x00000001
|
||||
#define AHCI_P_DEVSLP_DSP 0x00000002
|
||||
#define AHCI_P_DEVSLP_DETO 0x000003fc
|
||||
#define AHCI_P_DEVSLP_DETO_SHIFT 2
|
||||
#define AHCI_P_DEVSLP_MDAT 0x00007c00
|
||||
#define AHCI_P_DEVSLP_MDAT_SHIFT 10
|
||||
#define AHCI_P_DEVSLP_DITO 0x01ff8000
|
||||
#define AHCI_P_DEVSLP_DITO_SHIFT 15
|
||||
#define AHCI_P_DEVSLP_DM 0x0e000000
|
||||
#define AHCI_P_DEVSLP_DM_SHIFT 25
|
||||
|
||||
/* Just to be sure, if building as module. */
|
||||
#if MAXPHYS < 512 * 1024
|
||||
#undef MAXPHYS
|
||||
#define MAXPHYS 512 * 1024
|
||||
#endif
|
||||
/* Pessimistic prognosis on number of required S/G entries */
|
||||
#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8))
|
||||
/* Command list. 32 commands. First, 1Kbyte aligned. */
|
||||
#define AHCI_CL_OFFSET 0
|
||||
#define AHCI_CL_SIZE 32
|
||||
/* Command tables. Up to 32 commands, Each, 128byte aligned. */
|
||||
#define AHCI_CT_OFFSET (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS)
|
||||
#define AHCI_CT_SIZE (128 + AHCI_SG_ENTRIES * 16)
|
||||
/* Total main work area. */
|
||||
#define AHCI_WORK_SIZE (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots)
|
||||
|
||||
#endif /* _AHCI_H_ */
|
90
atkbdc.c
Normal file
90
atkbdc.c
Normal file
@ -0,0 +1,90 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
#define KBD_DATA_PORT 0x60
|
||||
|
||||
#define KBD_STS_CTL_PORT 0x64
|
||||
#define KBD_SYS_FLAG 0x4
|
||||
|
||||
#define KBDC_RESET 0xfe
|
||||
|
||||
static int
|
||||
atkbdc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
*eax = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
atkbdc_sts_ctl_handler(struct vmctx *ctx, int vcpu, int in, int port,
|
||||
int bytes, uint32_t *eax, void *arg)
|
||||
{
|
||||
int error, retval;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
retval = 0;
|
||||
if (in) {
|
||||
*eax = KBD_SYS_FLAG; /* system passed POST */
|
||||
} else {
|
||||
switch (*eax) {
|
||||
case KBDC_RESET: /* Pulse "reset" line. */
|
||||
error = vm_suspend(ctx, VM_SUSPEND_RESET);
|
||||
assert(error == 0 || errno == EALREADY);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
INOUT_PORT(atkdbc, KBD_DATA_PORT, IOPORT_F_INOUT, atkbdc_data_handler);
|
||||
SYSRES_IO(KBD_DATA_PORT, 1);
|
||||
INOUT_PORT(atkbdc, KBD_STS_CTL_PORT, IOPORT_F_INOUT,
|
||||
atkbdc_sts_ctl_handler);
|
||||
SYSRES_IO(KBD_STS_CTL_PORT, 1);
|
373
bhyve.8
Normal file
373
bhyve.8
Normal file
@ -0,0 +1,373 @@
|
||||
.\" Copyright (c) 2013 Peter Grehan
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd April 18, 2016
|
||||
.Dt BHYVE 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm bhyve
|
||||
.Nd "run a guest operating system inside a virtual machine"
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl abehuwxACHPSWY
|
||||
.Op Fl c Ar numcpus
|
||||
.Op Fl g Ar gdbport
|
||||
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
|
||||
.Op Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
|
||||
.Op Fl p Ar vcpu:hostcpu
|
||||
.Op Fl s Ar slot,emulation Ns Op , Ns Ar conf
|
||||
.Op Fl U Ar uuid
|
||||
.Ar vmname
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
is a hypervisor that runs guest operating systems inside a
|
||||
virtual machine.
|
||||
.Pp
|
||||
Parameters such as the number of virtual CPUs, amount of guest memory, and
|
||||
I/O connectivity can be specified with command-line parameters.
|
||||
.Pp
|
||||
The guest operating system must be loaded with
|
||||
.Xr bhyveload 8
|
||||
or a similar boot loader before running
|
||||
.Nm .
|
||||
.Pp
|
||||
.Nm
|
||||
runs until the guest operating system reboots or an unhandled hypervisor
|
||||
exit is detected.
|
||||
.Sh OPTIONS
|
||||
.Bl -tag -width 10n
|
||||
.It Fl a
|
||||
The guest's local APIC is configured in xAPIC mode.
|
||||
The xAPIC mode is the default setting so this option is redundant.
|
||||
It will be deprecated in a future version.
|
||||
.It Fl A
|
||||
Generate ACPI tables.
|
||||
Required for
|
||||
.Fx Ns /amd64
|
||||
guests.
|
||||
.It Fl b
|
||||
Enable a low-level console device supported by
|
||||
.Fx
|
||||
kernels compiled with
|
||||
.Cd "device bvmconsole" .
|
||||
This option will be deprecated in a future version.
|
||||
.It Fl c Ar numcpus
|
||||
Number of guest virtual CPUs.
|
||||
The default is 1 and the maximum is 16.
|
||||
.It Fl C
|
||||
Include guest memory in core file.
|
||||
.It Fl e
|
||||
Force
|
||||
.Nm
|
||||
to exit when a guest issues an access to an I/O port that is not emulated.
|
||||
This is intended for debug purposes.
|
||||
.It Fl g Ar gdbport
|
||||
For
|
||||
.Fx
|
||||
kernels compiled with
|
||||
.Cd "device bvmdebug" ,
|
||||
allow a remote kernel kgdb to be relayed to the guest kernel gdb stub
|
||||
via a local IPv4 address and this port.
|
||||
This option will be deprecated in a future version.
|
||||
.It Fl h
|
||||
Print help message and exit.
|
||||
.It Fl H
|
||||
Yield the virtual CPU thread when a HLT instruction is detected.
|
||||
If this option is not specified, virtual CPUs will use 100% of a host CPU.
|
||||
.It Fl l Ar lpcdev Ns Op , Ns Ar conf
|
||||
Allow devices behind the LPC PCI-ISA bridge to be configured.
|
||||
The only supported devices are the TTY-class devices
|
||||
.Ar com1
|
||||
and
|
||||
.Ar com2
|
||||
and the boot ROM device
|
||||
.Ar bootrom .
|
||||
.It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
|
||||
Guest physical memory size in bytes.
|
||||
This must be the same size that was given to
|
||||
.Xr bhyveload 8 .
|
||||
.Pp
|
||||
The size argument may be suffixed with one of K, M, G or T (either upper
|
||||
or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes,
|
||||
or terabytes.
|
||||
If no suffix is given, the value is assumed to be in megabytes.
|
||||
.It Fl p Ar vcpu:hostcpu
|
||||
Pin guest's virtual CPU
|
||||
.Em vcpu
|
||||
to
|
||||
.Em hostcpu .
|
||||
.It Fl P
|
||||
Force the guest virtual CPU to exit when a PAUSE instruction is detected.
|
||||
.It Fl s Ar slot,emulation Ns Op , Ns Ar conf
|
||||
Configure a virtual PCI slot and function.
|
||||
.Pp
|
||||
.Nm
|
||||
provides PCI bus emulation and virtual devices that can be attached to
|
||||
slots on the bus.
|
||||
There are 32 available slots, with the option of providing up to 8 functions
|
||||
per slot.
|
||||
.Bl -tag -width 10n
|
||||
.It Ar slot
|
||||
.Ar pcislot[:function]
|
||||
.Ar bus:pcislot:function
|
||||
.Pp
|
||||
The
|
||||
.Ar pcislot
|
||||
value is 0 to 31.
|
||||
The optional
|
||||
.Ar function
|
||||
value is 0 to 7.
|
||||
The optional
|
||||
.Ar bus
|
||||
value is 0 to 255.
|
||||
If not specified, the
|
||||
.Ar function
|
||||
value defaults to 0.
|
||||
If not specified, the
|
||||
.Ar bus
|
||||
value defaults to 0.
|
||||
.It Ar emulation
|
||||
.Bl -tag -width 10n
|
||||
.It Li hostbridge | Li amd_hostbridge
|
||||
.Pp
|
||||
Provide a simple host bridge.
|
||||
This is usually configured at slot 0, and is required by most guest
|
||||
operating systems.
|
||||
The
|
||||
.Li amd_hostbridge
|
||||
emulation is identical but uses a PCI vendor ID of
|
||||
.Li AMD .
|
||||
.It Li passthru
|
||||
PCI pass-through device.
|
||||
.It Li virtio-net
|
||||
Virtio network interface.
|
||||
.It Li virtio-blk
|
||||
Virtio block storage interface.
|
||||
.It Li virtio-rnd
|
||||
Virtio RNG interface.
|
||||
.It Li ahci-cd
|
||||
AHCI controller attached to an ATAPI CD/DVD.
|
||||
.It Li ahci-hd
|
||||
AHCI controller attached to a SATA hard-drive.
|
||||
.It Li uart
|
||||
PCI 16550 serial device.
|
||||
.It Li lpc
|
||||
LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
|
||||
The LPC bridge emulation can only be configured on bus 0.
|
||||
.El
|
||||
.It Op Ar conf
|
||||
This optional parameter describes the backend for device emulations.
|
||||
If
|
||||
.Ar conf
|
||||
is not specified, the device emulation has no backend and can be
|
||||
considered unconnected.
|
||||
.Pp
|
||||
Network devices:
|
||||
.Bl -tag -width 10n
|
||||
.It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
|
||||
.It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
|
||||
.Pp
|
||||
If
|
||||
.Ar mac
|
||||
is not specified, the MAC address is derived from a fixed OUI and the
|
||||
remaining bytes from an MD5 hash of the slot and function numbers and
|
||||
the device name.
|
||||
.Pp
|
||||
The MAC address is an ASCII string in
|
||||
.Xr ethers 5
|
||||
format.
|
||||
.El
|
||||
.Pp
|
||||
Block storage devices:
|
||||
.Bl -tag -width 10n
|
||||
.It Pa /filename Ns Oo , Ns Ar block-device-options Oc
|
||||
.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
|
||||
.El
|
||||
.Pp
|
||||
The
|
||||
.Ar block-device-options
|
||||
are:
|
||||
.Bl -tag -width 8n
|
||||
.It Li nocache
|
||||
Open the file with
|
||||
.Dv O_DIRECT .
|
||||
.It Li direct
|
||||
Open the file using
|
||||
.Dv O_SYNC .
|
||||
.It Li ro
|
||||
Force the file to be opened read-only.
|
||||
.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
|
||||
Specify the logical and physical sector sizes of the emulated disk.
|
||||
The physical sector size is optional and is equal to the logical sector size
|
||||
if not explicitly specified.
|
||||
.El
|
||||
.Pp
|
||||
TTY devices:
|
||||
.Bl -tag -width 10n
|
||||
.It Li stdio
|
||||
Connect the serial port to the standard input and output of
|
||||
the
|
||||
.Nm
|
||||
process.
|
||||
.It Pa /dev/xxx
|
||||
Use the host TTY device for serial port I/O.
|
||||
.El
|
||||
.Pp
|
||||
Boot ROM device:
|
||||
.Bl -tag -width 10n
|
||||
.It Pa romfile
|
||||
Map
|
||||
.Ar romfile
|
||||
in the guest address space reserved for boot firmware.
|
||||
.El
|
||||
.Pp
|
||||
Pass-through devices:
|
||||
.Bl -tag -width 10n
|
||||
.It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
|
||||
Connect to a PCI device on the host at the selector described by
|
||||
.Ar slot ,
|
||||
.Ar bus ,
|
||||
and
|
||||
.Ar function
|
||||
numbers.
|
||||
.El
|
||||
.Pp
|
||||
Guest memory must be wired using the
|
||||
.Fl S
|
||||
option when a pass-through device is configured.
|
||||
.Pp
|
||||
The host device must have been reserved at boot-time using the
|
||||
.Va pptdev
|
||||
loader variable as described in
|
||||
.Xr vmm 4 .
|
||||
.El
|
||||
.It Fl S
|
||||
Wire guest memory.
|
||||
.It Fl u
|
||||
RTC keeps UTC time.
|
||||
.It Fl U Ar uuid
|
||||
Set the universally unique identifier
|
||||
.Pq UUID
|
||||
in the guest's System Management BIOS System Information structure.
|
||||
By default a UUID is generated from the host's hostname and
|
||||
.Ar vmname .
|
||||
.It Fl w
|
||||
Ignore accesses to unimplemented Model Specific Registers (MSRs).
|
||||
This is intended for debug purposes.
|
||||
.It Fl W
|
||||
Force virtio PCI device emulations to use MSI interrupts instead of MSI-X
|
||||
interrupts.
|
||||
.It Fl x
|
||||
The guest's local APIC is configured in x2APIC mode.
|
||||
.It Fl Y
|
||||
Disable MPtable generation.
|
||||
.It Ar vmname
|
||||
Alphanumeric name of the guest.
|
||||
This should be the same as that created by
|
||||
.Xr bhyveload 8 .
|
||||
.El
|
||||
.Sh SIGNAL HANDLING
|
||||
.Nm
|
||||
deals with the following signals:
|
||||
.Pp
|
||||
.Bl -tag -width indent -compact
|
||||
.It SIGTERM
|
||||
Trigger ACPI poweroff for a VM
|
||||
.El
|
||||
.Sh EXIT STATUS
|
||||
Exit status indicates how the VM was terminated:
|
||||
.Pp
|
||||
.Bl -tag -width indent -compact
|
||||
.It 0
|
||||
rebooted
|
||||
.It 1
|
||||
powered off
|
||||
.It 2
|
||||
halted
|
||||
.It 3
|
||||
triple fault
|
||||
.El
|
||||
.Sh EXAMPLES
|
||||
The guest operating system must have been loaded with
|
||||
.Xr bhyveload 8
|
||||
or a similar boot loader before
|
||||
.Xr bhyve 4
|
||||
can be run.
|
||||
.Pp
|
||||
To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio
|
||||
block device backed by the
|
||||
.Pa /my/image
|
||||
filesystem image, and a serial port for the console:
|
||||
.Bd -literal -offset indent
|
||||
bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\
|
||||
-l com1,stdio -A -H -P -m 1G vm1
|
||||
.Ed
|
||||
.Pp
|
||||
Run a 24GB single-CPU virtual machine with three network ports, one of which
|
||||
has a MAC address specified:
|
||||
.Bd -literal -offset indent
|
||||
bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\
|
||||
-s 2:1,virtio-net,tap1 \\
|
||||
-s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\
|
||||
-s 3,virtio-blk,/my/image -l com1,stdio \\
|
||||
-A -H -P -m 24G bigvm
|
||||
.Ed
|
||||
.Pp
|
||||
Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI
|
||||
CD-ROM, a single virtio network port, an AMD hostbridge, and the console
|
||||
port connected to an
|
||||
.Xr nmdm 4
|
||||
null-modem device.
|
||||
.Bd -literal -offset indent
|
||||
bhyve -c 4 \\
|
||||
-s 0,amd_hostbridge -s 1,lpc \\
|
||||
-s 1:0,ahci-hd,/images/disk.1 \\
|
||||
-s 1:1,ahci-hd,/images/disk.2 \\
|
||||
-s 1:2,ahci-hd,/images/disk.3 \\
|
||||
-s 1:3,ahci-hd,/images/disk.4 \\
|
||||
-s 1:4,ahci-hd,/images/disk.5 \\
|
||||
-s 1:5,ahci-hd,/images/disk.6 \\
|
||||
-s 1:6,ahci-hd,/images/disk.7 \\
|
||||
-s 1:7,ahci-hd,/images/disk.8 \\
|
||||
-s 2,ahci-cd,/images/install.iso \\
|
||||
-s 3,virtio-net,tap0 \\
|
||||
-l com1,/dev/nmdm0A \\
|
||||
-A -H -P -m 8G
|
||||
.Ed
|
||||
.Sh SEE ALSO
|
||||
.Xr bhyve 4 ,
|
||||
.Xr nmdm 4 ,
|
||||
.Xr vmm 4 ,
|
||||
.Xr ethers 5 ,
|
||||
.Xr bhyvectl 8 ,
|
||||
.Xr bhyveload 8
|
||||
.Sh HISTORY
|
||||
.Nm
|
||||
first appeared in
|
||||
.Fx 10.0 .
|
||||
.Sh AUTHORS
|
||||
.An Neel Natu Aq Mt neel@freebsd.org
|
||||
.An Peter Grehan Aq Mt grehan@freebsd.org
|
971
bhyverun.c
Normal file
971
bhyverun.c
Normal file
@ -0,0 +1,971 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <machine/atomic.h>
|
||||
#include <machine/segments.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
#include <libgen.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#include <sysexits.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "acpi.h"
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
#include "fwctl.h"
|
||||
#include "ioapic.h"
|
||||
#include "mem.h"
|
||||
#include "mevent.h"
|
||||
#include "mptbl.h"
|
||||
#include "pci_emul.h"
|
||||
#include "pci_irq.h"
|
||||
#include "pci_lpc.h"
|
||||
#include "smbiostbl.h"
|
||||
#include "xmsr.h"
|
||||
#include "spinup_ap.h"
|
||||
#include "rtc.h"
|
||||
|
||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
||||
|
||||
#define MB (1024UL * 1024)
|
||||
#define GB (1024UL * MB)
|
||||
|
||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
|
||||
char *vmname;
|
||||
|
||||
int guest_ncpus;
|
||||
char *guest_uuid_str;
|
||||
|
||||
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
|
||||
static int virtio_msix = 1;
|
||||
static int x2apic_mode = 0; /* default is xAPIC */
|
||||
|
||||
static int strictio;
|
||||
static int strictmsr = 1;
|
||||
|
||||
static int acpi;
|
||||
|
||||
static char *progname;
|
||||
static const int BSP = 0;
|
||||
|
||||
static cpuset_t cpumask;
|
||||
|
||||
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
|
||||
|
||||
static struct vm_exit vmexit[VM_MAXCPU];
|
||||
|
||||
struct bhyvestats {
|
||||
uint64_t vmexit_bogus;
|
||||
uint64_t vmexit_reqidle;
|
||||
uint64_t vmexit_hlt;
|
||||
uint64_t vmexit_pause;
|
||||
uint64_t vmexit_mtrap;
|
||||
uint64_t vmexit_inst_emul;
|
||||
uint64_t cpu_switch_rotate;
|
||||
uint64_t cpu_switch_direct;
|
||||
} stats;
|
||||
|
||||
struct mt_vmm_info {
|
||||
pthread_t mt_thr;
|
||||
struct vmctx *mt_ctx;
|
||||
int mt_vcpu;
|
||||
} mt_vmm_info[VM_MAXCPU];
|
||||
|
||||
static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
|
||||
|
||||
static void
|
||||
usage(int code)
|
||||
{
|
||||
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
|
||||
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
|
||||
" -a: local apic is in xAPIC mode (deprecated)\n"
|
||||
" -A: create ACPI tables\n"
|
||||
" -c: # cpus (default 1)\n"
|
||||
" -C: include guest memory in core file\n"
|
||||
" -e: exit on unhandled I/O access\n"
|
||||
" -g: gdb port\n"
|
||||
" -h: help\n"
|
||||
" -H: vmexit from the guest on hlt\n"
|
||||
" -l: LPC device configuration\n"
|
||||
" -m: memory size in MB\n"
|
||||
" -p: pin 'vcpu' to 'hostcpu'\n"
|
||||
" -P: vmexit from the guest on pause\n"
|
||||
" -s: <slot,driver,configinfo> PCI slot config\n"
|
||||
" -S: guest memory cannot be swapped\n"
|
||||
" -u: RTC keeps UTC time\n"
|
||||
" -U: uuid\n"
|
||||
" -w: ignore unimplemented MSRs\n"
|
||||
" -W: force virtio to use single-vector MSI\n"
|
||||
" -x: local apic is in x2APIC mode\n"
|
||||
" -Y: disable MPtable generation\n",
|
||||
progname, (int)strlen(progname), "");
|
||||
|
||||
exit(code);
|
||||
}
|
||||
|
||||
static int
|
||||
pincpu_parse(const char *opt)
|
||||
{
|
||||
int vcpu, pcpu;
|
||||
|
||||
if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
|
||||
fprintf(stderr, "invalid format: %s\n", opt);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
|
||||
fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
|
||||
vcpu, VM_MAXCPU - 1);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
|
||||
fprintf(stderr, "hostcpu '%d' outside valid range from "
|
||||
"0 to %d\n", pcpu, CPU_SETSIZE - 1);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (vcpumap[vcpu] == NULL) {
|
||||
if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
|
||||
perror("malloc");
|
||||
return (-1);
|
||||
}
|
||||
CPU_ZERO(vcpumap[vcpu]);
|
||||
}
|
||||
CPU_SET(pcpu, vcpumap[vcpu]);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
|
||||
int errcode)
|
||||
{
|
||||
struct vmctx *ctx;
|
||||
int error, restart_instruction;
|
||||
|
||||
ctx = arg;
|
||||
restart_instruction = 1;
|
||||
|
||||
error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
|
||||
restart_instruction);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
void *
|
||||
paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
|
||||
{
|
||||
|
||||
return (vm_map_gpa(ctx, gaddr, len));
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_pause(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_pause);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_vmexit_on_hlt(void)
|
||||
{
|
||||
|
||||
return (guest_vmexit_on_hlt);
|
||||
}
|
||||
|
||||
int
|
||||
fbsdrun_virtio_msix(void)
|
||||
{
|
||||
|
||||
return (virtio_msix);
|
||||
}
|
||||
|
||||
static void *
|
||||
fbsdrun_start_thread(void *param)
|
||||
{
|
||||
char tname[MAXCOMLEN + 1];
|
||||
struct mt_vmm_info *mtp;
|
||||
int vcpu;
|
||||
|
||||
mtp = param;
|
||||
vcpu = mtp->mt_vcpu;
|
||||
|
||||
snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
|
||||
pthread_set_name_np(mtp->mt_thr, tname);
|
||||
|
||||
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
|
||||
|
||||
/* not reached */
|
||||
exit(1);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
assert(fromcpu == BSP);
|
||||
|
||||
/*
|
||||
* The 'newcpu' must be activated in the context of 'fromcpu'. If
|
||||
* vm_activate_cpu() is delayed until newcpu's pthread starts running
|
||||
* then vmm.ko is out-of-sync with bhyve and this can create a race
|
||||
* with vm_suspend().
|
||||
*/
|
||||
error = vm_activate_cpu(ctx, newcpu);
|
||||
if (error != 0)
|
||||
err(EX_OSERR, "could not activate CPU %d", newcpu);
|
||||
|
||||
CPU_SET_ATOMIC(newcpu, &cpumask);
|
||||
|
||||
/*
|
||||
* Set up the vmexit struct to allow execution to start
|
||||
* at the given RIP
|
||||
*/
|
||||
vmexit[newcpu].rip = rip;
|
||||
vmexit[newcpu].inst_length = 0;
|
||||
|
||||
mt_vmm_info[newcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[newcpu].mt_vcpu = newcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[newcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
|
||||
{
|
||||
|
||||
if (!CPU_ISSET(vcpu, &cpumask)) {
|
||||
fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
CPU_CLR_ATOMIC(vcpu, &cpumask);
|
||||
return (CPU_EMPTY(&cpumask));
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
|
||||
uint32_t eax)
|
||||
{
|
||||
#if BHYVE_DEBUG
|
||||
/*
|
||||
* put guest-driven debug here
|
||||
*/
|
||||
#endif
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int error;
|
||||
int bytes, port, in, out;
|
||||
int vcpu;
|
||||
|
||||
vcpu = *pvcpu;
|
||||
|
||||
port = vme->u.inout.port;
|
||||
bytes = vme->u.inout.bytes;
|
||||
in = vme->u.inout.in;
|
||||
out = !in;
|
||||
|
||||
/* Extra-special case of host notifications */
|
||||
if (out && port == GUEST_NIO_PORT) {
|
||||
error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = emulate_inout(ctx, vcpu, vme, strictio);
|
||||
if (error) {
|
||||
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
|
||||
in ? "in" : "out",
|
||||
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
|
||||
port, vmexit->rip);
|
||||
return (VMEXIT_ABORT);
|
||||
} else {
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
uint64_t val;
|
||||
uint32_t eax, edx;
|
||||
int error;
|
||||
|
||||
val = 0;
|
||||
error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
|
||||
if (error != 0) {
|
||||
fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
|
||||
vme->u.msr.code, *pvcpu);
|
||||
if (strictmsr) {
|
||||
vm_inject_gp(ctx, *pvcpu);
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
|
||||
eax = val;
|
||||
error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
|
||||
assert(error == 0);
|
||||
|
||||
edx = val >> 32;
|
||||
error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
|
||||
assert(error == 0);
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
|
||||
if (error != 0) {
|
||||
fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
|
||||
vme->u.msr.code, vme->u.msr.wval, *pvcpu);
|
||||
if (strictmsr) {
|
||||
vm_inject_gp(ctx, *pvcpu);
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
}
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
|
||||
{
|
||||
int newcpu;
|
||||
int retval = VMEXIT_CONTINUE;
|
||||
|
||||
newcpu = spinup_ap(ctx, *pvcpu,
|
||||
vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
#define DEBUG_EPT_MISCONFIG
|
||||
#ifdef DEBUG_EPT_MISCONFIG
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
|
||||
#define VMCS_IDENT(x) ((x) | 0x80000000)
|
||||
|
||||
static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
|
||||
static int ept_misconfig_ptenum;
|
||||
#endif
|
||||
|
||||
static int
|
||||
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
fprintf(stderr, "vm exit[%d]\n", *pvcpu);
|
||||
fprintf(stderr, "\treason\t\tVMX\n");
|
||||
fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
|
||||
fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
|
||||
fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
|
||||
fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
|
||||
fprintf(stderr, "\tqualification\t0x%016lx\n",
|
||||
vmexit->u.vmx.exit_qualification);
|
||||
fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
|
||||
fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
|
||||
#ifdef DEBUG_EPT_MISCONFIG
|
||||
if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
|
||||
vm_get_register(ctx, *pvcpu,
|
||||
VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
|
||||
&ept_misconfig_gpa);
|
||||
vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
|
||||
&ept_misconfig_ptenum);
|
||||
fprintf(stderr, "\tEPT misconfiguration:\n");
|
||||
fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
|
||||
fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
|
||||
ept_misconfig_ptenum, ept_misconfig_pte[0],
|
||||
ept_misconfig_pte[1], ept_misconfig_pte[2],
|
||||
ept_misconfig_pte[3]);
|
||||
}
|
||||
#endif /* DEBUG_EPT_MISCONFIG */
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
fprintf(stderr, "vm exit[%d]\n", *pvcpu);
|
||||
fprintf(stderr, "\treason\t\tSVM\n");
|
||||
fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
|
||||
fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
|
||||
fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
|
||||
fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
|
||||
fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
assert(vmexit->inst_length == 0);
|
||||
|
||||
stats.vmexit_bogus++;
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
assert(vmexit->inst_length == 0);
|
||||
|
||||
stats.vmexit_reqidle++;
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
stats.vmexit_hlt++;
|
||||
|
||||
/*
|
||||
* Just continue execution with the next instruction. We use
|
||||
* the HLT VM exit as a way to be friendly with the host
|
||||
* scheduler.
|
||||
*/
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
stats.vmexit_pause++;
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
|
||||
assert(vmexit->inst_length == 0);
|
||||
|
||||
stats.vmexit_mtrap++;
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
int err, i;
|
||||
struct vie *vie;
|
||||
|
||||
stats.vmexit_inst_emul++;
|
||||
|
||||
vie = &vmexit->u.inst_emul.vie;
|
||||
err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
|
||||
vie, &vmexit->u.inst_emul.paging);
|
||||
|
||||
if (err) {
|
||||
if (err == ESRCH) {
|
||||
fprintf(stderr, "Unhandled memory access to 0x%lx\n",
|
||||
vmexit->u.inst_emul.gpa);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Failed to emulate instruction [");
|
||||
for (i = 0; i < vie->num_valid; i++) {
|
||||
fprintf(stderr, "0x%02x%s", vie->inst[i],
|
||||
i != (vie->num_valid - 1) ? " " : "");
|
||||
}
|
||||
fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
||||
|
||||
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
static int
|
||||
vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
enum vm_suspend_how how;
|
||||
|
||||
how = vmexit->u.suspended.how;
|
||||
|
||||
fbsdrun_deletecpu(ctx, *pvcpu);
|
||||
|
||||
if (*pvcpu != BSP) {
|
||||
pthread_mutex_lock(&resetcpu_mtx);
|
||||
pthread_cond_signal(&resetcpu_cond);
|
||||
pthread_mutex_unlock(&resetcpu_mtx);
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&resetcpu_mtx);
|
||||
while (!CPU_EMPTY(&cpumask)) {
|
||||
pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&resetcpu_mtx);
|
||||
|
||||
switch (how) {
|
||||
case VM_SUSPEND_RESET:
|
||||
exit(0);
|
||||
case VM_SUSPEND_POWEROFF:
|
||||
exit(1);
|
||||
case VM_SUSPEND_HALT:
|
||||
exit(2);
|
||||
case VM_SUSPEND_TRIPLEFAULT:
|
||||
exit(3);
|
||||
default:
|
||||
fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
|
||||
exit(100);
|
||||
}
|
||||
return (0); /* NOTREACHED */
|
||||
}
|
||||
|
||||
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_INOUT] = vmexit_inout,
|
||||
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
|
||||
[VM_EXITCODE_VMX] = vmexit_vmx,
|
||||
[VM_EXITCODE_SVM] = vmexit_svm,
|
||||
[VM_EXITCODE_BOGUS] = vmexit_bogus,
|
||||
[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
|
||||
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
||||
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
|
||||
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
|
||||
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
|
||||
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
|
||||
};
|
||||
|
||||
static void
|
||||
vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
|
||||
{
|
||||
int error, rc;
|
||||
enum vm_exitcode exitcode;
|
||||
cpuset_t active_cpus;
|
||||
|
||||
if (vcpumap[vcpu] != NULL) {
|
||||
error = pthread_setaffinity_np(pthread_self(),
|
||||
sizeof(cpuset_t), vcpumap[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
error = vm_active_cpus(ctx, &active_cpus);
|
||||
assert(CPU_ISSET(vcpu, &active_cpus));
|
||||
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
|
||||
assert(error == 0);
|
||||
|
||||
while (1) {
|
||||
error = vm_run(ctx, vcpu, &vmexit[vcpu]);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
exitcode = vmexit[vcpu].exitcode;
|
||||
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
|
||||
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
|
||||
exitcode);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
|
||||
|
||||
switch (rc) {
|
||||
case VMEXIT_CONTINUE:
|
||||
break;
|
||||
case VMEXIT_ABORT:
|
||||
abort();
|
||||
default:
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
|
||||
}
|
||||
|
||||
static int
|
||||
num_vcpus_allowed(struct vmctx *ctx)
|
||||
{
|
||||
int tmp, error;
|
||||
|
||||
error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
|
||||
|
||||
/*
|
||||
* The guest is allowed to spinup more than one processor only if the
|
||||
* UNRESTRICTED_GUEST capability is available.
|
||||
*/
|
||||
if (error == 0)
|
||||
return (VM_MAXCPU);
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
|
||||
void
|
||||
fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
|
||||
{
|
||||
int err, tmp;
|
||||
|
||||
if (fbsdrun_vmexit_on_hlt()) {
|
||||
err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "VM exit on HLT not supported\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
|
||||
if (cpu == BSP)
|
||||
handler[VM_EXITCODE_HLT] = vmexit_hlt;
|
||||
}
|
||||
|
||||
if (fbsdrun_vmexit_on_pause()) {
|
||||
/*
|
||||
* pause exit support required for this mode
|
||||
*/
|
||||
err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
|
||||
if (err < 0) {
|
||||
fprintf(stderr,
|
||||
"SMP mux requested, no pause support\n");
|
||||
exit(1);
|
||||
}
|
||||
vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
|
||||
if (cpu == BSP)
|
||||
handler[VM_EXITCODE_PAUSE] = vmexit_pause;
|
||||
}
|
||||
|
||||
if (x2apic_mode)
|
||||
err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
|
||||
else
|
||||
err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
|
||||
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
|
||||
}
|
||||
|
||||
static struct vmctx *
|
||||
do_open(const char *vmname)
|
||||
{
|
||||
struct vmctx *ctx;
|
||||
int error;
|
||||
bool reinit, romboot;
|
||||
|
||||
reinit = romboot = false;
|
||||
|
||||
if (lpc_bootrom())
|
||||
romboot = true;
|
||||
|
||||
error = vm_create(vmname);
|
||||
if (error) {
|
||||
if (errno == EEXIST) {
|
||||
if (romboot) {
|
||||
reinit = true;
|
||||
} else {
|
||||
/*
|
||||
* The virtual machine has been setup by the
|
||||
* userspace bootloader.
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
perror("vm_create");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
if (!romboot) {
|
||||
/*
|
||||
* If the virtual machine was just created then a
|
||||
* bootrom must be configured to boot it.
|
||||
*/
|
||||
fprintf(stderr, "virtual machine cannot be booted\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
ctx = vm_open(vmname);
|
||||
if (ctx == NULL) {
|
||||
perror("vm_open");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (reinit) {
|
||||
error = vm_reinit(ctx);
|
||||
if (error) {
|
||||
perror("vm_reinit");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
return (ctx);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int c, error, gdb_port, err, bvmcons;
|
||||
int max_vcpus, mptgen, memflags;
|
||||
int rtc_localtime;
|
||||
struct vmctx *ctx;
|
||||
uint64_t rip;
|
||||
size_t memsize;
|
||||
char *optstr;
|
||||
|
||||
bvmcons = 0;
|
||||
progname = basename(argv[0]);
|
||||
gdb_port = 0;
|
||||
guest_ncpus = 1;
|
||||
memsize = 256 * MB;
|
||||
mptgen = 1;
|
||||
rtc_localtime = 1;
|
||||
memflags = 0;
|
||||
|
||||
optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
|
||||
while ((c = getopt(argc, argv, optstr)) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
x2apic_mode = 0;
|
||||
break;
|
||||
case 'A':
|
||||
acpi = 1;
|
||||
break;
|
||||
case 'b':
|
||||
bvmcons = 1;
|
||||
break;
|
||||
case 'p':
|
||||
if (pincpu_parse(optarg) != 0) {
|
||||
errx(EX_USAGE, "invalid vcpu pinning "
|
||||
"configuration '%s'", optarg);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
guest_ncpus = atoi(optarg);
|
||||
break;
|
||||
case 'C':
|
||||
memflags |= VM_MEM_F_INCORE;
|
||||
break;
|
||||
case 'g':
|
||||
gdb_port = atoi(optarg);
|
||||
break;
|
||||
case 'l':
|
||||
if (lpc_device_parse(optarg) != 0) {
|
||||
errx(EX_USAGE, "invalid lpc device "
|
||||
"configuration '%s'", optarg);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (pci_parse_slot(optarg) != 0)
|
||||
exit(1);
|
||||
else
|
||||
break;
|
||||
case 'S':
|
||||
memflags |= VM_MEM_F_WIRED;
|
||||
break;
|
||||
case 'm':
|
||||
error = vm_parse_memsize(optarg, &memsize);
|
||||
if (error)
|
||||
errx(EX_USAGE, "invalid memsize '%s'", optarg);
|
||||
break;
|
||||
case 'H':
|
||||
guest_vmexit_on_hlt = 1;
|
||||
break;
|
||||
case 'I':
|
||||
/*
|
||||
* The "-I" option was used to add an ioapic to the
|
||||
* virtual machine.
|
||||
*
|
||||
* An ioapic is now provided unconditionally for each
|
||||
* virtual machine and this option is now deprecated.
|
||||
*/
|
||||
break;
|
||||
case 'P':
|
||||
guest_vmexit_on_pause = 1;
|
||||
break;
|
||||
case 'e':
|
||||
strictio = 1;
|
||||
break;
|
||||
case 'u':
|
||||
rtc_localtime = 0;
|
||||
break;
|
||||
case 'U':
|
||||
guest_uuid_str = optarg;
|
||||
break;
|
||||
case 'w':
|
||||
strictmsr = 0;
|
||||
break;
|
||||
case 'W':
|
||||
virtio_msix = 0;
|
||||
break;
|
||||
case 'x':
|
||||
x2apic_mode = 1;
|
||||
break;
|
||||
case 'Y':
|
||||
mptgen = 0;
|
||||
break;
|
||||
case 'h':
|
||||
usage(0);
|
||||
default:
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (argc != 1)
|
||||
usage(1);
|
||||
|
||||
vmname = argv[0];
|
||||
ctx = do_open(vmname);
|
||||
|
||||
if (guest_ncpus < 1) {
|
||||
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
max_vcpus = num_vcpus_allowed(ctx);
|
||||
if (guest_ncpus > max_vcpus) {
|
||||
fprintf(stderr, "%d vCPUs requested but only %d available\n",
|
||||
guest_ncpus, max_vcpus);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fbsdrun_set_capabilities(ctx, BSP);
|
||||
|
||||
vm_set_memflags(ctx, memflags);
|
||||
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
error = init_msr();
|
||||
if (error) {
|
||||
fprintf(stderr, "init_msr error %d", error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
init_mem();
|
||||
init_inout();
|
||||
pci_irq_init(ctx);
|
||||
ioapic_init(ctx);
|
||||
|
||||
rtc_init(ctx, rtc_localtime);
|
||||
sci_init(ctx);
|
||||
|
||||
/*
|
||||
* Exit if a device emulation finds an error in it's initilization
|
||||
*/
|
||||
if (init_pci(ctx) != 0)
|
||||
exit(1);
|
||||
|
||||
if (gdb_port != 0)
|
||||
init_dbgport(gdb_port);
|
||||
|
||||
if (bvmcons)
|
||||
init_bvmcons();
|
||||
|
||||
if (lpc_bootrom()) {
|
||||
if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
|
||||
fprintf(stderr, "ROM boot failed: unrestricted guest "
|
||||
"capability not available\n");
|
||||
exit(1);
|
||||
}
|
||||
error = vcpu_reset(ctx, BSP);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
|
||||
assert(error == 0);
|
||||
|
||||
/*
|
||||
* build the guest tables, MP etc.
|
||||
*/
|
||||
if (mptgen) {
|
||||
error = mptable_build(ctx, guest_ncpus);
|
||||
if (error)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
error = smbios_build(ctx);
|
||||
assert(error == 0);
|
||||
|
||||
if (acpi) {
|
||||
error = acpi_build(ctx, guest_ncpus);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
if (lpc_bootrom())
|
||||
fwctl_init();
|
||||
|
||||
/*
|
||||
* Change the proc title to include the VM name.
|
||||
*/
|
||||
setproctitle("%s", vmname);
|
||||
|
||||
/*
|
||||
* Add CPU 0
|
||||
*/
|
||||
fbsdrun_addcpu(ctx, BSP, BSP, rip);
|
||||
|
||||
/*
|
||||
* Head off to the main event dispatch loop
|
||||
*/
|
||||
mevent_dispatch();
|
||||
|
||||
exit(1);
|
||||
}
|
55
bhyverun.h
Normal file
55
bhyverun.h
Normal file
@ -0,0 +1,55 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _FBSDRUN_H_
|
||||
#define _FBSDRUN_H_
|
||||
|
||||
#ifndef CTASSERT /* Allow lint to override */
|
||||
#define CTASSERT(x) _CTASSERT(x, __LINE__)
|
||||
#define _CTASSERT(x, y) __CTASSERT(x, y)
|
||||
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
#define VMEXIT_CONTINUE (0)
|
||||
#define VMEXIT_ABORT (-1)
|
||||
|
||||
struct vmctx;
|
||||
extern int guest_ncpus;
|
||||
extern char *guest_uuid_str;
|
||||
extern char *vmname;
|
||||
|
||||
void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
|
||||
|
||||
void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
|
||||
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
|
||||
int fbsdrun_muxed(void);
|
||||
int fbsdrun_vmexit_on_hlt(void);
|
||||
int fbsdrun_vmexit_on_pause(void);
|
||||
int fbsdrun_disable_x2apic(void);
|
||||
int fbsdrun_virtio_msix(void);
|
||||
#endif
|
820
block_if.c
Normal file
820
block_if.c
Normal file
@ -0,0 +1,820 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/disk.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/atomic.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "mevent.h"
|
||||
#include "block_if.h"
|
||||
|
||||
#define BLOCKIF_SIG 0xb109b109
|
||||
|
||||
#define BLOCKIF_NUMTHR 8
|
||||
#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR)
|
||||
|
||||
enum blockop {
|
||||
BOP_READ,
|
||||
BOP_WRITE,
|
||||
BOP_FLUSH,
|
||||
BOP_DELETE
|
||||
};
|
||||
|
||||
enum blockstat {
|
||||
BST_FREE,
|
||||
BST_BLOCK,
|
||||
BST_PEND,
|
||||
BST_BUSY,
|
||||
BST_DONE
|
||||
};
|
||||
|
||||
struct blockif_elem {
|
||||
TAILQ_ENTRY(blockif_elem) be_link;
|
||||
struct blockif_req *be_req;
|
||||
enum blockop be_op;
|
||||
enum blockstat be_status;
|
||||
pthread_t be_tid;
|
||||
off_t be_block;
|
||||
};
|
||||
|
||||
struct blockif_ctxt {
|
||||
int bc_magic;
|
||||
int bc_fd;
|
||||
int bc_ischr;
|
||||
int bc_isgeom;
|
||||
int bc_candelete;
|
||||
int bc_rdonly;
|
||||
off_t bc_size;
|
||||
int bc_sectsz;
|
||||
int bc_psectsz;
|
||||
int bc_psectoff;
|
||||
int bc_closing;
|
||||
pthread_t bc_btid[BLOCKIF_NUMTHR];
|
||||
pthread_mutex_t bc_mtx;
|
||||
pthread_cond_t bc_cond;
|
||||
|
||||
/* Request elements and free/pending/busy queues */
|
||||
TAILQ_HEAD(, blockif_elem) bc_freeq;
|
||||
TAILQ_HEAD(, blockif_elem) bc_pendq;
|
||||
TAILQ_HEAD(, blockif_elem) bc_busyq;
|
||||
struct blockif_elem bc_reqs[BLOCKIF_MAXREQ];
|
||||
};
|
||||
|
||||
static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
|
||||
|
||||
struct blockif_sig_elem {
|
||||
pthread_mutex_t bse_mtx;
|
||||
pthread_cond_t bse_cond;
|
||||
int bse_pending;
|
||||
struct blockif_sig_elem *bse_next;
|
||||
};
|
||||
|
||||
static struct blockif_sig_elem *blockif_bse_head;
|
||||
|
||||
static int
|
||||
blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
enum blockop op)
|
||||
{
|
||||
struct blockif_elem *be, *tbe;
|
||||
off_t off;
|
||||
int i;
|
||||
|
||||
be = TAILQ_FIRST(&bc->bc_freeq);
|
||||
assert(be != NULL);
|
||||
assert(be->be_status == BST_FREE);
|
||||
TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
|
||||
be->be_req = breq;
|
||||
be->be_op = op;
|
||||
switch (op) {
|
||||
case BOP_READ:
|
||||
case BOP_WRITE:
|
||||
case BOP_DELETE:
|
||||
off = breq->br_offset;
|
||||
for (i = 0; i < breq->br_iovcnt; i++)
|
||||
off += breq->br_iov[i].iov_len;
|
||||
break;
|
||||
default:
|
||||
off = OFF_MAX;
|
||||
}
|
||||
be->be_block = off;
|
||||
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
|
||||
if (tbe->be_block == breq->br_offset)
|
||||
break;
|
||||
}
|
||||
if (tbe == NULL) {
|
||||
TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
|
||||
if (tbe->be_block == breq->br_offset)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tbe == NULL)
|
||||
be->be_status = BST_PEND;
|
||||
else
|
||||
be->be_status = BST_BLOCK;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
|
||||
return (be->be_status == BST_PEND);
|
||||
}
|
||||
|
||||
static int
|
||||
blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
|
||||
{
|
||||
struct blockif_elem *be;
|
||||
|
||||
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
|
||||
if (be->be_status == BST_PEND)
|
||||
break;
|
||||
assert(be->be_status == BST_BLOCK);
|
||||
}
|
||||
if (be == NULL)
|
||||
return (0);
|
||||
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
|
||||
be->be_status = BST_BUSY;
|
||||
be->be_tid = t;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
|
||||
*bep = be;
|
||||
return (1);
|
||||
}
|
||||
|
||||
static void
|
||||
blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
|
||||
{
|
||||
struct blockif_elem *tbe;
|
||||
|
||||
if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
|
||||
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
|
||||
else
|
||||
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
|
||||
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
|
||||
if (tbe->be_req->br_offset == be->be_block)
|
||||
tbe->be_status = BST_PEND;
|
||||
}
|
||||
be->be_tid = 0;
|
||||
be->be_status = BST_FREE;
|
||||
be->be_req = NULL;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
|
||||
}
|
||||
|
||||
static void
|
||||
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
|
||||
{
|
||||
struct blockif_req *br;
|
||||
off_t arg[2];
|
||||
ssize_t clen, len, off, boff, voff;
|
||||
int i, err;
|
||||
|
||||
br = be->be_req;
|
||||
if (br->br_iovcnt <= 1)
|
||||
buf = NULL;
|
||||
err = 0;
|
||||
switch (be->be_op) {
|
||||
case BOP_READ:
|
||||
if (buf == NULL) {
|
||||
if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
|
||||
br->br_offset)) < 0)
|
||||
err = errno;
|
||||
else
|
||||
br->br_resid -= len;
|
||||
break;
|
||||
}
|
||||
i = 0;
|
||||
off = voff = 0;
|
||||
while (br->br_resid > 0) {
|
||||
len = MIN(br->br_resid, MAXPHYS);
|
||||
if (pread(bc->bc_fd, buf, len, br->br_offset +
|
||||
off) < 0) {
|
||||
err = errno;
|
||||
break;
|
||||
}
|
||||
boff = 0;
|
||||
do {
|
||||
clen = MIN(len - boff, br->br_iov[i].iov_len -
|
||||
voff);
|
||||
memcpy(br->br_iov[i].iov_base + voff,
|
||||
buf + boff, clen);
|
||||
if (clen < br->br_iov[i].iov_len - voff)
|
||||
voff += clen;
|
||||
else {
|
||||
i++;
|
||||
voff = 0;
|
||||
}
|
||||
boff += clen;
|
||||
} while (boff < len);
|
||||
off += len;
|
||||
br->br_resid -= len;
|
||||
}
|
||||
break;
|
||||
case BOP_WRITE:
|
||||
if (bc->bc_rdonly) {
|
||||
err = EROFS;
|
||||
break;
|
||||
}
|
||||
if (buf == NULL) {
|
||||
if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
|
||||
br->br_offset)) < 0)
|
||||
err = errno;
|
||||
else
|
||||
br->br_resid -= len;
|
||||
break;
|
||||
}
|
||||
i = 0;
|
||||
off = voff = 0;
|
||||
while (br->br_resid > 0) {
|
||||
len = MIN(br->br_resid, MAXPHYS);
|
||||
boff = 0;
|
||||
do {
|
||||
clen = MIN(len - boff, br->br_iov[i].iov_len -
|
||||
voff);
|
||||
memcpy(buf + boff,
|
||||
br->br_iov[i].iov_base + voff, clen);
|
||||
if (clen < br->br_iov[i].iov_len - voff)
|
||||
voff += clen;
|
||||
else {
|
||||
i++;
|
||||
voff = 0;
|
||||
}
|
||||
boff += clen;
|
||||
} while (boff < len);
|
||||
if (pwrite(bc->bc_fd, buf, len, br->br_offset +
|
||||
off) < 0) {
|
||||
err = errno;
|
||||
break;
|
||||
}
|
||||
off += len;
|
||||
br->br_resid -= len;
|
||||
}
|
||||
break;
|
||||
case BOP_FLUSH:
|
||||
if (bc->bc_ischr) {
|
||||
if (ioctl(bc->bc_fd, DIOCGFLUSH))
|
||||
err = errno;
|
||||
} else if (fsync(bc->bc_fd))
|
||||
err = errno;
|
||||
break;
|
||||
case BOP_DELETE:
|
||||
if (!bc->bc_candelete)
|
||||
err = EOPNOTSUPP;
|
||||
else if (bc->bc_rdonly)
|
||||
err = EROFS;
|
||||
else if (bc->bc_ischr) {
|
||||
arg[0] = br->br_offset;
|
||||
arg[1] = br->br_resid;
|
||||
if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
|
||||
err = errno;
|
||||
else
|
||||
br->br_resid = 0;
|
||||
} else
|
||||
err = EOPNOTSUPP;
|
||||
break;
|
||||
default:
|
||||
err = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
be->be_status = BST_DONE;
|
||||
|
||||
(*br->br_callback)(br, err);
|
||||
}
|
||||
|
||||
static void *
|
||||
blockif_thr(void *arg)
|
||||
{
|
||||
struct blockif_ctxt *bc;
|
||||
struct blockif_elem *be;
|
||||
pthread_t t;
|
||||
uint8_t *buf;
|
||||
|
||||
bc = arg;
|
||||
if (bc->bc_isgeom)
|
||||
buf = malloc(MAXPHYS);
|
||||
else
|
||||
buf = NULL;
|
||||
t = pthread_self();
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
for (;;) {
|
||||
while (blockif_dequeue(bc, t, &be)) {
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
blockif_proc(bc, be, buf);
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
blockif_complete(bc, be);
|
||||
}
|
||||
/* Check ctxt status here to see if exit requested */
|
||||
if (bc->bc_closing)
|
||||
break;
|
||||
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
if (buf)
|
||||
free(buf);
|
||||
pthread_exit(NULL);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
|
||||
{
|
||||
struct blockif_sig_elem *bse;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Process the entire list even if not intended for
|
||||
* this thread.
|
||||
*/
|
||||
do {
|
||||
bse = blockif_bse_head;
|
||||
if (bse == NULL)
|
||||
return;
|
||||
} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
|
||||
(uintptr_t)bse,
|
||||
(uintptr_t)bse->bse_next));
|
||||
|
||||
pthread_mutex_lock(&bse->bse_mtx);
|
||||
bse->bse_pending = 0;
|
||||
pthread_cond_signal(&bse->bse_cond);
|
||||
pthread_mutex_unlock(&bse->bse_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
blockif_init(void)
|
||||
{
|
||||
mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
|
||||
(void) signal(SIGCONT, SIG_IGN);
|
||||
}
|
||||
|
||||
struct blockif_ctxt *
|
||||
blockif_open(const char *optstr, const char *ident)
|
||||
{
|
||||
char tname[MAXCOMLEN + 1];
|
||||
char name[MAXPATHLEN];
|
||||
char *nopt, *xopts, *cp;
|
||||
struct blockif_ctxt *bc;
|
||||
struct stat sbuf;
|
||||
struct diocgattr_arg arg;
|
||||
off_t size, psectsz, psectoff;
|
||||
int extra, fd, i, sectsz;
|
||||
int nocache, sync, ro, candelete, geom, ssopt, pssopt;
|
||||
|
||||
pthread_once(&blockif_once, blockif_init);
|
||||
|
||||
fd = -1;
|
||||
ssopt = 0;
|
||||
nocache = 0;
|
||||
sync = 0;
|
||||
ro = 0;
|
||||
|
||||
/*
|
||||
* The first element in the optstring is always a pathname.
|
||||
* Optional elements follow
|
||||
*/
|
||||
nopt = xopts = strdup(optstr);
|
||||
while (xopts != NULL) {
|
||||
cp = strsep(&xopts, ",");
|
||||
if (cp == nopt) /* file or device pathname */
|
||||
continue;
|
||||
else if (!strcmp(cp, "nocache"))
|
||||
nocache = 1;
|
||||
else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
|
||||
sync = 1;
|
||||
else if (!strcmp(cp, "ro"))
|
||||
ro = 1;
|
||||
else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
|
||||
;
|
||||
else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
|
||||
pssopt = ssopt;
|
||||
else {
|
||||
fprintf(stderr, "Invalid device option \"%s\"\n", cp);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
extra = 0;
|
||||
if (nocache)
|
||||
extra |= O_DIRECT;
|
||||
if (sync)
|
||||
extra |= O_SYNC;
|
||||
|
||||
fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
|
||||
if (fd < 0 && !ro) {
|
||||
/* Attempt a r/w fail with a r/o open */
|
||||
fd = open(nopt, O_RDONLY | extra);
|
||||
ro = 1;
|
||||
}
|
||||
|
||||
if (fd < 0) {
|
||||
perror("Could not open backing file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (fstat(fd, &sbuf) < 0) {
|
||||
perror("Could not stat backing file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with raw devices
|
||||
*/
|
||||
size = sbuf.st_size;
|
||||
sectsz = DEV_BSIZE;
|
||||
psectsz = psectoff = 0;
|
||||
candelete = geom = 0;
|
||||
if (S_ISCHR(sbuf.st_mode)) {
|
||||
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
|
||||
ioctl(fd, DIOCGSECTORSIZE, §sz)) {
|
||||
perror("Could not fetch dev blk/sector size");
|
||||
goto err;
|
||||
}
|
||||
assert(size != 0);
|
||||
assert(sectsz != 0);
|
||||
if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
|
||||
ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
|
||||
strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
|
||||
arg.len = sizeof(arg.value.i);
|
||||
if (ioctl(fd, DIOCGATTR, &arg) == 0)
|
||||
candelete = arg.value.i;
|
||||
if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
|
||||
geom = 1;
|
||||
} else
|
||||
psectsz = sbuf.st_blksize;
|
||||
|
||||
if (ssopt != 0) {
|
||||
if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
|
||||
ssopt > pssopt) {
|
||||
fprintf(stderr, "Invalid sector size %d/%d\n",
|
||||
ssopt, pssopt);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some backend drivers (e.g. cd0, ada0) require that the I/O
|
||||
* size be a multiple of the device's sector size.
|
||||
*
|
||||
* Validate that the emulated sector size complies with this
|
||||
* requirement.
|
||||
*/
|
||||
if (S_ISCHR(sbuf.st_mode)) {
|
||||
if (ssopt < sectsz || (ssopt % sectsz) != 0) {
|
||||
fprintf(stderr, "Sector size %d incompatible "
|
||||
"with underlying device sector size %d\n",
|
||||
ssopt, sectsz);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
sectsz = ssopt;
|
||||
psectsz = pssopt;
|
||||
psectoff = 0;
|
||||
}
|
||||
|
||||
bc = calloc(1, sizeof(struct blockif_ctxt));
|
||||
if (bc == NULL) {
|
||||
perror("calloc");
|
||||
goto err;
|
||||
}
|
||||
|
||||
bc->bc_magic = BLOCKIF_SIG;
|
||||
bc->bc_fd = fd;
|
||||
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
|
||||
bc->bc_isgeom = geom;
|
||||
bc->bc_candelete = candelete;
|
||||
bc->bc_rdonly = ro;
|
||||
bc->bc_size = size;
|
||||
bc->bc_sectsz = sectsz;
|
||||
bc->bc_psectsz = psectsz;
|
||||
bc->bc_psectoff = psectoff;
|
||||
pthread_mutex_init(&bc->bc_mtx, NULL);
|
||||
pthread_cond_init(&bc->bc_cond, NULL);
|
||||
TAILQ_INIT(&bc->bc_freeq);
|
||||
TAILQ_INIT(&bc->bc_pendq);
|
||||
TAILQ_INIT(&bc->bc_busyq);
|
||||
for (i = 0; i < BLOCKIF_MAXREQ; i++) {
|
||||
bc->bc_reqs[i].be_status = BST_FREE;
|
||||
TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
|
||||
}
|
||||
|
||||
for (i = 0; i < BLOCKIF_NUMTHR; i++) {
|
||||
pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
|
||||
snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
|
||||
pthread_set_name_np(bc->bc_btid[i], tname);
|
||||
}
|
||||
|
||||
return (bc);
|
||||
err:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
enum blockop op)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
|
||||
/*
|
||||
* Enqueue and inform the block i/o thread
|
||||
* that there is work available
|
||||
*/
|
||||
if (blockif_enqueue(bc, breq, op))
|
||||
pthread_cond_signal(&bc->bc_cond);
|
||||
} else {
|
||||
/*
|
||||
* Callers are not allowed to enqueue more than
|
||||
* the specified blockif queue limit. Return an
|
||||
* error to indicate that the queue length has been
|
||||
* exceeded.
|
||||
*/
|
||||
err = E2BIG;
|
||||
}
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_READ));
|
||||
}
|
||||
|
||||
int
|
||||
blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_WRITE));
|
||||
}
|
||||
|
||||
int
|
||||
blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_FLUSH));
|
||||
}
|
||||
|
||||
int
|
||||
blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (blockif_request(bc, breq, BOP_DELETE));
|
||||
}
|
||||
|
||||
int
|
||||
blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
{
|
||||
struct blockif_elem *be;
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
/*
|
||||
* Check pending requests.
|
||||
*/
|
||||
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
|
||||
if (be->be_req == breq)
|
||||
break;
|
||||
}
|
||||
if (be != NULL) {
|
||||
/*
|
||||
* Found it.
|
||||
*/
|
||||
blockif_complete(bc, be);
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check in-flight requests.
|
||||
*/
|
||||
TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
|
||||
if (be->be_req == breq)
|
||||
break;
|
||||
}
|
||||
if (be == NULL) {
|
||||
/*
|
||||
* Didn't find it.
|
||||
*/
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrupt the processing thread to force it return
|
||||
* prematurely via it's normal callback path.
|
||||
*/
|
||||
while (be->be_status == BST_BUSY) {
|
||||
struct blockif_sig_elem bse, *old_head;
|
||||
|
||||
pthread_mutex_init(&bse.bse_mtx, NULL);
|
||||
pthread_cond_init(&bse.bse_cond, NULL);
|
||||
|
||||
bse.bse_pending = 1;
|
||||
|
||||
do {
|
||||
old_head = blockif_bse_head;
|
||||
bse.bse_next = old_head;
|
||||
} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
|
||||
(uintptr_t)old_head,
|
||||
(uintptr_t)&bse));
|
||||
|
||||
pthread_kill(be->be_tid, SIGCONT);
|
||||
|
||||
pthread_mutex_lock(&bse.bse_mtx);
|
||||
while (bse.bse_pending)
|
||||
pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
|
||||
pthread_mutex_unlock(&bse.bse_mtx);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
/*
|
||||
* The processing thread has been interrupted. Since it's not
|
||||
* clear if the callback has been invoked yet, return EBUSY.
|
||||
*/
|
||||
return (EBUSY);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_close(struct blockif_ctxt *bc)
|
||||
{
|
||||
void *jval;
|
||||
int i;
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
|
||||
/*
|
||||
* Stop the block i/o thread
|
||||
*/
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
bc->bc_closing = 1;
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
pthread_cond_broadcast(&bc->bc_cond);
|
||||
for (i = 0; i < BLOCKIF_NUMTHR; i++)
|
||||
pthread_join(bc->bc_btid[i], &jval);
|
||||
|
||||
/* XXX Cancel queued i/o's ??? */
|
||||
|
||||
/*
|
||||
* Release resources
|
||||
*/
|
||||
bc->bc_magic = 0;
|
||||
close(bc->bc_fd);
|
||||
free(bc);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return virtual C/H/S values for a given block. Use the algorithm
|
||||
* outlined in the VHD specification to calculate values.
|
||||
*/
|
||||
void
|
||||
blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
|
||||
{
|
||||
off_t sectors; /* total sectors of the block dev */
|
||||
off_t hcyl; /* cylinders times heads */
|
||||
uint16_t secpt; /* sectors per track */
|
||||
uint8_t heads;
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
|
||||
sectors = bc->bc_size / bc->bc_sectsz;
|
||||
|
||||
/* Clamp the size to the largest possible with CHS */
|
||||
if (sectors > 65535UL*16*255)
|
||||
sectors = 65535UL*16*255;
|
||||
|
||||
if (sectors >= 65536UL*16*63) {
|
||||
secpt = 255;
|
||||
heads = 16;
|
||||
hcyl = sectors / secpt;
|
||||
} else {
|
||||
secpt = 17;
|
||||
hcyl = sectors / secpt;
|
||||
heads = (hcyl + 1023) / 1024;
|
||||
|
||||
if (heads < 4)
|
||||
heads = 4;
|
||||
|
||||
if (hcyl >= (heads * 1024) || heads > 16) {
|
||||
secpt = 31;
|
||||
heads = 16;
|
||||
hcyl = sectors / secpt;
|
||||
}
|
||||
if (hcyl >= (heads * 1024)) {
|
||||
secpt = 63;
|
||||
heads = 16;
|
||||
hcyl = sectors / secpt;
|
||||
}
|
||||
}
|
||||
|
||||
*c = hcyl / heads;
|
||||
*h = heads;
|
||||
*s = secpt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessors
|
||||
*/
|
||||
off_t
|
||||
blockif_size(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_size);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_sectsz(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_sectsz);
|
||||
}
|
||||
|
||||
void
|
||||
blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
*size = bc->bc_psectsz;
|
||||
*off = bc->bc_psectoff;
|
||||
}
|
||||
|
||||
int
|
||||
blockif_queuesz(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (BLOCKIF_MAXREQ - 1);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_is_ro(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_rdonly);
|
||||
}
|
||||
|
||||
int
|
||||
blockif_candelete(struct blockif_ctxt *bc)
|
||||
{
|
||||
|
||||
assert(bc->bc_magic == BLOCKIF_SIG);
|
||||
return (bc->bc_candelete);
|
||||
}
|
70
block_if.h
Normal file
70
block_if.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* The block API to be used by bhyve block-device emulations. The routines
|
||||
* are thread safe, with no assumptions about the context of the completion
|
||||
* callback - it may occur in the caller's context, or asynchronously in
|
||||
* another thread.
|
||||
*/
|
||||
|
||||
#ifndef _BLOCK_IF_H_
|
||||
#define _BLOCK_IF_H_
|
||||
|
||||
#include <sys/uio.h>
|
||||
#include <sys/unistd.h>
|
||||
|
||||
#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */
|
||||
|
||||
struct blockif_req {
|
||||
struct iovec br_iov[BLOCKIF_IOV_MAX];
|
||||
int br_iovcnt;
|
||||
off_t br_offset;
|
||||
ssize_t br_resid;
|
||||
void (*br_callback)(struct blockif_req *req, int err);
|
||||
void *br_param;
|
||||
};
|
||||
|
||||
struct blockif_ctxt;
|
||||
struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
|
||||
off_t blockif_size(struct blockif_ctxt *bc);
|
||||
void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
|
||||
uint8_t *s);
|
||||
int blockif_sectsz(struct blockif_ctxt *bc);
|
||||
void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
|
||||
int blockif_queuesz(struct blockif_ctxt *bc);
|
||||
int blockif_is_ro(struct blockif_ctxt *bc);
|
||||
int blockif_candelete(struct blockif_ctxt *bc);
|
||||
int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
|
||||
int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
|
||||
int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
|
||||
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
|
||||
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
|
||||
int blockif_close(struct blockif_ctxt *bc);
|
||||
|
||||
#endif /* _BLOCK_IF_H_ */
|
111
bootrom.c
Normal file
111
bootrom.c
Normal file
@ -0,0 +1,111 @@
|
||||
/*-
|
||||
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
#include "bhyverun.h"
|
||||
#include "bootrom.h"
|
||||
|
||||
#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */
|
||||
|
||||
int
|
||||
bootrom_init(struct vmctx *ctx, const char *romfile)
|
||||
{
|
||||
struct stat sbuf;
|
||||
vm_paddr_t gpa;
|
||||
ssize_t rlen;
|
||||
char *ptr;
|
||||
int fd, i, rv, prot;
|
||||
|
||||
rv = -1;
|
||||
fd = open(romfile, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Error opening bootrom \"%s\": %s\n",
|
||||
romfile, strerror(errno));
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (fstat(fd, &sbuf) < 0) {
|
||||
fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n",
|
||||
romfile, strerror(errno));
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit bootrom size to 16MB so it doesn't encroach into reserved
|
||||
* MMIO space (e.g. APIC, HPET, MSI).
|
||||
*/
|
||||
if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) {
|
||||
fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (sbuf.st_size & PAGE_MASK) {
|
||||
fprintf(stderr, "Bootrom size %ld is not a multiple of the "
|
||||
"page size\n", sbuf.st_size);
|
||||
goto done;
|
||||
}
|
||||
|
||||
ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size);
|
||||
if (ptr == MAP_FAILED)
|
||||
goto done;
|
||||
|
||||
/* Map the bootrom into the guest address space */
|
||||
prot = PROT_READ | PROT_EXEC;
|
||||
gpa = (1ULL << 32) - sbuf.st_size;
|
||||
if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0)
|
||||
goto done;
|
||||
|
||||
/* Read 'romfile' into the guest address space */
|
||||
for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) {
|
||||
rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE);
|
||||
if (rlen != PAGE_SIZE) {
|
||||
fprintf(stderr, "Incomplete read of page %d of bootrom "
|
||||
"file %s: %ld bytes\n", i, romfile, rlen);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
rv = 0;
|
||||
done:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return (rv);
|
||||
}
|
38
bootrom.h
Normal file
38
bootrom.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*-
|
||||
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _BOOTROM_H_
|
||||
#define _BOOTROM_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
int bootrom_init(struct vmctx *ctx, const char *romfile);
|
||||
|
||||
#endif
|
153
consport.c
Normal file
153
consport.c
Normal file
@ -0,0 +1,153 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
#define BVM_CONSOLE_PORT 0x220
|
||||
#define BVM_CONS_SIG ('b' << 8 | 'v')
|
||||
|
||||
static struct termios tio_orig, tio_new;
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(void)
|
||||
{
|
||||
tcgetattr(STDIN_FILENO, &tio_orig);
|
||||
|
||||
cfmakeraw(&tio_new);
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
|
||||
|
||||
atexit(ttyclose);
|
||||
}
|
||||
|
||||
static bool
|
||||
tty_char_available(void)
|
||||
{
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(STDIN_FILENO, &rfds);
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
|
||||
return (true);
|
||||
} else {
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(void)
|
||||
{
|
||||
char rb;
|
||||
|
||||
if (tty_char_available()) {
|
||||
read(STDIN_FILENO, &rb, 1);
|
||||
return (rb & 0xff);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(unsigned char wb)
|
||||
{
|
||||
(void) write(STDOUT_FILENO, &wb, 1);
|
||||
}
|
||||
|
||||
static int
|
||||
console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
static int opened;
|
||||
|
||||
if (bytes == 2 && in) {
|
||||
*eax = BVM_CONS_SIG;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Guests might probe this port to look for old ISA devices
|
||||
* using single-byte reads. Return 0xff for those.
|
||||
*/
|
||||
if (bytes == 1 && in) {
|
||||
*eax = 0xff;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
if (!opened) {
|
||||
ttyopen();
|
||||
opened = 1;
|
||||
}
|
||||
|
||||
if (in)
|
||||
*eax = ttyread();
|
||||
else
|
||||
ttywrite(*eax);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
SYSRES_IO(BVM_CONSOLE_PORT, 4);
|
||||
|
||||
static struct inout_port consport = {
|
||||
"bvmcons",
|
||||
BVM_CONSOLE_PORT,
|
||||
1,
|
||||
IOPORT_F_INOUT,
|
||||
console_handler
|
||||
};
|
||||
|
||||
void
|
||||
init_bvmcons(void)
|
||||
{
|
||||
|
||||
register_inout(&consport);
|
||||
}
|
151
dbgport.c
Normal file
151
dbgport.c
Normal file
@ -0,0 +1,151 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "dbgport.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
#define BVM_DBG_PORT 0x224
|
||||
#define BVM_DBG_SIG ('B' << 8 | 'V')
|
||||
|
||||
static int listen_fd, conn_fd;
|
||||
|
||||
static struct sockaddr_in sin;
|
||||
|
||||
static int
|
||||
dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
char ch;
|
||||
int nwritten, nread, printonce;
|
||||
|
||||
if (bytes == 2 && in) {
|
||||
*eax = BVM_DBG_SIG;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (bytes != 4)
|
||||
return (-1);
|
||||
|
||||
again:
|
||||
printonce = 0;
|
||||
while (conn_fd < 0) {
|
||||
if (!printonce) {
|
||||
printf("Waiting for connection from gdb\r\n");
|
||||
printonce = 1;
|
||||
}
|
||||
conn_fd = accept(listen_fd, NULL, NULL);
|
||||
if (conn_fd >= 0)
|
||||
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
|
||||
else if (errno != EINTR)
|
||||
perror("accept");
|
||||
}
|
||||
|
||||
if (in) {
|
||||
nread = read(conn_fd, &ch, 1);
|
||||
if (nread == -1 && errno == EAGAIN)
|
||||
*eax = -1;
|
||||
else if (nread == 1)
|
||||
*eax = ch;
|
||||
else {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
ch = *eax;
|
||||
nwritten = write(conn_fd, &ch, 1);
|
||||
if (nwritten != 1) {
|
||||
close(conn_fd);
|
||||
conn_fd = -1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct inout_port dbgport = {
|
||||
"bvmdbg",
|
||||
BVM_DBG_PORT,
|
||||
1,
|
||||
IOPORT_F_INOUT,
|
||||
dbg_handler
|
||||
};
|
||||
|
||||
SYSRES_IO(BVM_DBG_PORT, 4);
|
||||
|
||||
void
|
||||
init_dbgport(int sport)
|
||||
{
|
||||
int reuse;
|
||||
|
||||
conn_fd = -1;
|
||||
|
||||
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(sport);
|
||||
|
||||
reuse = 1;
|
||||
if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
|
||||
sizeof(reuse)) < 0) {
|
||||
perror("setsockopt");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(listen_fd, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
register_inout(&dbgport);
|
||||
}
|
34
dbgport.h
Normal file
34
dbgport.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _DBGPORT_H_
|
||||
#define _DBGPORT_H_
|
||||
|
||||
void init_dbgport(int port);
|
||||
|
||||
#endif
|
549
fwctl.c
Normal file
549
fwctl.c
Normal file
@ -0,0 +1,549 @@
|
||||
/*-
|
||||
* Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Guest firmware interface. Uses i/o ports x510/x511 as Qemu does,
|
||||
* but with a request/response messaging protocol.
|
||||
*/
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "inout.h"
|
||||
#include "fwctl.h"
|
||||
|
||||
/*
|
||||
* Messaging protocol base operations
|
||||
*/
|
||||
#define OP_NULL 1
|
||||
#define OP_ECHO 2
|
||||
#define OP_GET 3
|
||||
#define OP_GET_LEN 4
|
||||
#define OP_SET 5
|
||||
#define OP_MAX OP_SET
|
||||
|
||||
/* I/O ports */
|
||||
#define FWCTL_OUT 0x510
|
||||
#define FWCTL_IN 0x511
|
||||
|
||||
/*
|
||||
* Back-end state-machine
|
||||
*/
|
||||
enum state {
|
||||
DORMANT,
|
||||
IDENT_WAIT,
|
||||
IDENT_SEND,
|
||||
REQ,
|
||||
RESP
|
||||
} be_state = DORMANT;
|
||||
|
||||
static uint8_t sig[] = { 'B', 'H', 'Y', 'V' };
|
||||
static u_int ident_idx;
|
||||
|
||||
struct op_info {
|
||||
int op;
|
||||
int (*op_start)(int len);
|
||||
void (*op_data)(uint32_t data, int len);
|
||||
int (*op_result)(struct iovec **data);
|
||||
void (*op_done)(struct iovec *data);
|
||||
};
|
||||
static struct op_info *ops[OP_MAX+1];
|
||||
|
||||
/* Return 0-padded uint32_t */
|
||||
static uint32_t
|
||||
fwctl_send_rest(uint32_t *data, size_t len)
|
||||
{
|
||||
union {
|
||||
uint8_t c[4];
|
||||
uint32_t w;
|
||||
} u;
|
||||
uint8_t *cdata;
|
||||
int i;
|
||||
|
||||
cdata = (uint8_t *) data;
|
||||
u.w = 0;
|
||||
|
||||
for (i = 0, u.w = 0; i < len; i++)
|
||||
u.c[i] = *cdata++;
|
||||
|
||||
return (u.w);
|
||||
}
|
||||
|
||||
/*
|
||||
* error op dummy proto - drop all data sent and return an error
|
||||
*/
|
||||
static int errop_code;
|
||||
|
||||
static void
|
||||
errop_set(int err)
|
||||
{
|
||||
|
||||
errop_code = err;
|
||||
}
|
||||
|
||||
static int
|
||||
errop_start(int len)
|
||||
{
|
||||
errop_code = ENOENT;
|
||||
|
||||
/* accept any length */
|
||||
return (errop_code);
|
||||
}
|
||||
|
||||
static void
|
||||
errop_data(uint32_t data, int len)
|
||||
{
|
||||
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
static int
|
||||
errop_result(struct iovec **data)
|
||||
{
|
||||
|
||||
/* no data to send back; always successful */
|
||||
*data = NULL;
|
||||
return (errop_code);
|
||||
}
|
||||
|
||||
static void
|
||||
errop_done(struct iovec *data)
|
||||
{
|
||||
|
||||
/* assert data is NULL */
|
||||
}
|
||||
|
||||
static struct op_info errop_info = {
|
||||
.op_start = errop_start,
|
||||
.op_data = errop_data,
|
||||
.op_result = errop_result,
|
||||
.op_done = errop_done
|
||||
};
|
||||
|
||||
/* OID search */
|
||||
SET_DECLARE(ctl_set, struct ctl);
|
||||
|
||||
CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus));
|
||||
|
||||
static struct ctl *
|
||||
ctl_locate(const char *str, int maxlen)
|
||||
{
|
||||
struct ctl *cp, **cpp;
|
||||
|
||||
SET_FOREACH(cpp, ctl_set) {
|
||||
cp = *cpp;
|
||||
if (!strncmp(str, cp->c_oid, maxlen))
|
||||
return (cp);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/* uefi-sysctl get-len */
|
||||
#define FGET_STRSZ 80
|
||||
static struct iovec fget_biov[2];
|
||||
static char fget_str[FGET_STRSZ];
|
||||
static struct {
|
||||
size_t f_sz;
|
||||
uint32_t f_data[1024];
|
||||
} fget_buf;
|
||||
static int fget_cnt;
|
||||
static size_t fget_size;
|
||||
|
||||
static int
|
||||
fget_start(int len)
|
||||
{
|
||||
|
||||
if (len > FGET_STRSZ)
|
||||
return(E2BIG);
|
||||
|
||||
fget_cnt = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
fget_data(uint32_t data, int len)
|
||||
{
|
||||
|
||||
*((uint32_t *) &fget_str[fget_cnt]) = data;
|
||||
fget_cnt += sizeof(uint32_t);
|
||||
}
|
||||
|
||||
static int
|
||||
fget_result(struct iovec **data, int val)
|
||||
{
|
||||
struct ctl *cp;
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
/* Locate the OID */
|
||||
cp = ctl_locate(fget_str, fget_cnt);
|
||||
if (cp == NULL) {
|
||||
*data = NULL;
|
||||
err = ENOENT;
|
||||
} else {
|
||||
if (val) {
|
||||
/* For now, copy the len/data into a buffer */
|
||||
memset(&fget_buf, 0, sizeof(fget_buf));
|
||||
fget_buf.f_sz = cp->c_len;
|
||||
memcpy(fget_buf.f_data, cp->c_data, cp->c_len);
|
||||
fget_biov[0].iov_base = (char *)&fget_buf;
|
||||
fget_biov[0].iov_len = sizeof(fget_buf.f_sz) +
|
||||
cp->c_len;
|
||||
} else {
|
||||
fget_size = cp->c_len;
|
||||
fget_biov[0].iov_base = (char *)&fget_size;
|
||||
fget_biov[0].iov_len = sizeof(fget_size);
|
||||
}
|
||||
|
||||
fget_biov[1].iov_base = NULL;
|
||||
fget_biov[1].iov_len = 0;
|
||||
*data = fget_biov;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
fget_done(struct iovec *data)
|
||||
{
|
||||
|
||||
/* nothing needs to be freed */
|
||||
}
|
||||
|
||||
static int
|
||||
fget_len_result(struct iovec **data)
|
||||
{
|
||||
return (fget_result(data, 0));
|
||||
}
|
||||
|
||||
static int
|
||||
fget_val_result(struct iovec **data)
|
||||
{
|
||||
return (fget_result(data, 1));
|
||||
}
|
||||
|
||||
static struct op_info fgetlen_info = {
|
||||
.op_start = fget_start,
|
||||
.op_data = fget_data,
|
||||
.op_result = fget_len_result,
|
||||
.op_done = fget_done
|
||||
};
|
||||
|
||||
static struct op_info fgetval_info = {
|
||||
.op_start = fget_start,
|
||||
.op_data = fget_data,
|
||||
.op_result = fget_val_result,
|
||||
.op_done = fget_done
|
||||
};
|
||||
|
||||
static struct req_info {
|
||||
int req_error;
|
||||
u_int req_count;
|
||||
uint32_t req_size;
|
||||
uint32_t req_type;
|
||||
uint32_t req_txid;
|
||||
struct op_info *req_op;
|
||||
int resp_error;
|
||||
int resp_count;
|
||||
int resp_size;
|
||||
int resp_off;
|
||||
struct iovec *resp_biov;
|
||||
} rinfo;
|
||||
|
||||
static void
|
||||
fwctl_response_done(void)
|
||||
{
|
||||
|
||||
(*rinfo.req_op->op_done)(rinfo.resp_biov);
|
||||
|
||||
/* reinit the req data struct */
|
||||
memset(&rinfo, 0, sizeof(rinfo));
|
||||
}
|
||||
|
||||
static void
|
||||
fwctl_request_done(void)
|
||||
{
|
||||
|
||||
rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov);
|
||||
|
||||
/* XXX only a single vector supported at the moment */
|
||||
rinfo.resp_off = 0;
|
||||
if (rinfo.resp_biov == NULL) {
|
||||
rinfo.resp_size = 0;
|
||||
} else {
|
||||
rinfo.resp_size = rinfo.resp_biov[0].iov_len;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fwctl_request_start(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Data size doesn't include header */
|
||||
rinfo.req_size -= 12;
|
||||
|
||||
rinfo.req_op = &errop_info;
|
||||
if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL)
|
||||
rinfo.req_op = ops[rinfo.req_type];
|
||||
|
||||
err = (*rinfo.req_op->op_start)(rinfo.req_size);
|
||||
|
||||
if (err) {
|
||||
errop_set(err);
|
||||
rinfo.req_op = &errop_info;
|
||||
}
|
||||
|
||||
/* Catch case of zero-length message here */
|
||||
if (rinfo.req_size == 0) {
|
||||
fwctl_request_done();
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
fwctl_request_data(uint32_t value)
|
||||
{
|
||||
int remlen;
|
||||
|
||||
/* Make sure remaining size is >= 0 */
|
||||
rinfo.req_size -= sizeof(uint32_t);
|
||||
remlen = MAX(rinfo.req_size, 0);
|
||||
|
||||
(*rinfo.req_op->op_data)(value, remlen);
|
||||
|
||||
if (rinfo.req_size < sizeof(uint32_t)) {
|
||||
fwctl_request_done();
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
fwctl_request(uint32_t value)
|
||||
{
|
||||
|
||||
int ret;
|
||||
|
||||
ret = 0;
|
||||
|
||||
switch (rinfo.req_count) {
|
||||
case 0:
|
||||
/* Verify size */
|
||||
if (value < 12) {
|
||||
printf("msg size error");
|
||||
exit(1);
|
||||
}
|
||||
rinfo.req_size = value;
|
||||
rinfo.req_count = 1;
|
||||
break;
|
||||
case 1:
|
||||
rinfo.req_type = value;
|
||||
rinfo.req_count++;
|
||||
break;
|
||||
case 2:
|
||||
rinfo.req_txid = value;
|
||||
rinfo.req_count++;
|
||||
ret = fwctl_request_start();
|
||||
break;
|
||||
default:
|
||||
ret = fwctl_request_data(value);
|
||||
break;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
fwctl_response(uint32_t *retval)
|
||||
{
|
||||
uint32_t *dp;
|
||||
int remlen;
|
||||
|
||||
switch(rinfo.resp_count) {
|
||||
case 0:
|
||||
/* 4 x u32 header len + data */
|
||||
*retval = 4*sizeof(uint32_t) +
|
||||
roundup(rinfo.resp_size, sizeof(uint32_t));
|
||||
rinfo.resp_count++;
|
||||
break;
|
||||
case 1:
|
||||
*retval = rinfo.req_type;
|
||||
rinfo.resp_count++;
|
||||
break;
|
||||
case 2:
|
||||
*retval = rinfo.req_txid;
|
||||
rinfo.resp_count++;
|
||||
break;
|
||||
case 3:
|
||||
*retval = rinfo.resp_error;
|
||||
rinfo.resp_count++;
|
||||
break;
|
||||
default:
|
||||
remlen = rinfo.resp_size - rinfo.resp_off;
|
||||
dp = (uint32_t *)
|
||||
((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off);
|
||||
if (remlen >= sizeof(uint32_t)) {
|
||||
*retval = *dp;
|
||||
} else if (remlen > 0) {
|
||||
*retval = fwctl_send_rest(dp, remlen);
|
||||
}
|
||||
rinfo.resp_off += sizeof(uint32_t);
|
||||
break;
|
||||
}
|
||||
|
||||
if (rinfo.resp_count > 3 &&
|
||||
rinfo.resp_size - rinfo.resp_off <= 0) {
|
||||
fwctl_response_done();
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* i/o port handling.
|
||||
*/
|
||||
static uint8_t
|
||||
fwctl_inb(void)
|
||||
{
|
||||
uint8_t retval;
|
||||
|
||||
retval = 0xff;
|
||||
|
||||
switch (be_state) {
|
||||
case IDENT_SEND:
|
||||
retval = sig[ident_idx++];
|
||||
if (ident_idx >= sizeof(sig))
|
||||
be_state = REQ;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static void
|
||||
fwctl_outw(uint16_t val)
|
||||
{
|
||||
switch (be_state) {
|
||||
case IDENT_WAIT:
|
||||
if (val == 0) {
|
||||
be_state = IDENT_SEND;
|
||||
ident_idx = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* ignore */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
fwctl_inl(void)
|
||||
{
|
||||
uint32_t retval;
|
||||
|
||||
switch (be_state) {
|
||||
case RESP:
|
||||
if (fwctl_response(&retval))
|
||||
be_state = REQ;
|
||||
break;
|
||||
default:
|
||||
retval = 0xffffffff;
|
||||
break;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static void
|
||||
fwctl_outl(uint32_t val)
|
||||
{
|
||||
|
||||
switch (be_state) {
|
||||
case REQ:
|
||||
if (fwctl_request(val))
|
||||
be_state = RESP;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
|
||||
if (in) {
|
||||
if (bytes == 1)
|
||||
*eax = fwctl_inb();
|
||||
else if (bytes == 4)
|
||||
*eax = fwctl_inl();
|
||||
else
|
||||
*eax = 0xffff;
|
||||
} else {
|
||||
if (bytes == 2)
|
||||
fwctl_outw(*eax);
|
||||
else if (bytes == 4)
|
||||
fwctl_outl(*eax);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
|
||||
INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
|
||||
|
||||
void
|
||||
fwctl_init(void)
|
||||
{
|
||||
|
||||
ops[OP_GET_LEN] = &fgetlen_info;
|
||||
ops[OP_GET] = &fgetval_info;
|
||||
|
||||
be_state = IDENT_WAIT;
|
||||
}
|
54
fwctl.h
Normal file
54
fwctl.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*-
|
||||
* Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _FWCTL_H_
|
||||
#define _FWCTL_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
/*
|
||||
* Linker set api for export of information to guest firmware via
|
||||
* a sysctl-like OID interface
|
||||
*/
|
||||
struct ctl {
|
||||
const char *c_oid;
|
||||
const void *c_data;
|
||||
const int c_len;
|
||||
};
|
||||
|
||||
#define CTL_NODE(oid, data, len) \
|
||||
static struct ctl __CONCAT(__ctl, __LINE__) = { \
|
||||
oid, \
|
||||
(data), \
|
||||
(len), \
|
||||
}; \
|
||||
DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
|
||||
|
||||
void fwctl_init(void);
|
||||
|
||||
#endif /* _FWCTL_H_ */
|
297
inout.c
Normal file
297
inout.c
Normal file
@ -0,0 +1,297 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/_iovec.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <x86/psl.h>
|
||||
#include <x86/segments.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "inout.h"
|
||||
|
||||
SET_DECLARE(inout_port_set, struct inout_port);
|
||||
|
||||
#define MAX_IOPORTS (1 << 16)
|
||||
|
||||
#define VERIFY_IOPORT(port, size) \
|
||||
assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
} inout_handlers[MAX_IOPORTS];
|
||||
|
||||
static int
|
||||
default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
if (in) {
|
||||
switch (bytes) {
|
||||
case 4:
|
||||
*eax = 0xffffffff;
|
||||
break;
|
||||
case 2:
|
||||
*eax = 0xffff;
|
||||
break;
|
||||
case 1:
|
||||
*eax = 0xff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
register_default_iohandler(int start, int size)
|
||||
{
|
||||
struct inout_port iop;
|
||||
|
||||
VERIFY_IOPORT(start, size);
|
||||
|
||||
bzero(&iop, sizeof(iop));
|
||||
iop.name = "default";
|
||||
iop.port = start;
|
||||
iop.size = size;
|
||||
iop.flags = IOPORT_F_INOUT | IOPORT_F_DEFAULT;
|
||||
iop.handler = default_inout;
|
||||
|
||||
register_inout(&iop);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
|
||||
{
|
||||
int addrsize, bytes, flags, in, port, prot, rep;
|
||||
uint32_t eax, val;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
int error, fault, retval;
|
||||
enum vm_reg_name idxreg;
|
||||
uint64_t gla, index, iterations, count;
|
||||
struct vm_inout_str *vis;
|
||||
struct iovec iov[2];
|
||||
|
||||
bytes = vmexit->u.inout.bytes;
|
||||
in = vmexit->u.inout.in;
|
||||
port = vmexit->u.inout.port;
|
||||
|
||||
assert(port < MAX_IOPORTS);
|
||||
assert(bytes == 1 || bytes == 2 || bytes == 4);
|
||||
|
||||
handler = inout_handlers[port].handler;
|
||||
|
||||
if (strict && handler == default_inout)
|
||||
return (-1);
|
||||
|
||||
flags = inout_handlers[port].flags;
|
||||
arg = inout_handlers[port].arg;
|
||||
|
||||
if (in) {
|
||||
if (!(flags & IOPORT_F_IN))
|
||||
return (-1);
|
||||
} else {
|
||||
if (!(flags & IOPORT_F_OUT))
|
||||
return (-1);
|
||||
}
|
||||
|
||||
retval = 0;
|
||||
if (vmexit->u.inout.string) {
|
||||
vis = &vmexit->u.inout_str;
|
||||
rep = vis->inout.rep;
|
||||
addrsize = vis->addrsize;
|
||||
prot = in ? PROT_WRITE : PROT_READ;
|
||||
assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
|
||||
|
||||
/* Index register */
|
||||
idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
|
||||
index = vis->index & vie_size2mask(addrsize);
|
||||
|
||||
/* Count register */
|
||||
count = vis->count & vie_size2mask(addrsize);
|
||||
|
||||
/* Limit number of back-to-back in/out emulations to 16 */
|
||||
iterations = MIN(count, 16);
|
||||
while (iterations > 0) {
|
||||
assert(retval == 0);
|
||||
if (vie_calculate_gla(vis->paging.cpu_mode,
|
||||
vis->seg_name, &vis->seg_desc, index, bytes,
|
||||
addrsize, prot, &gla)) {
|
||||
vm_inject_gp(ctx, vcpu);
|
||||
break;
|
||||
}
|
||||
|
||||
error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
|
||||
bytes, prot, iov, nitems(iov), &fault);
|
||||
if (error) {
|
||||
retval = -1; /* Unrecoverable error */
|
||||
break;
|
||||
} else if (fault) {
|
||||
retval = 0; /* Resume guest to handle fault */
|
||||
break;
|
||||
}
|
||||
|
||||
if (vie_alignment_check(vis->paging.cpl, bytes,
|
||||
vis->cr0, vis->rflags, gla)) {
|
||||
vm_inject_ac(ctx, vcpu, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
val = 0;
|
||||
if (!in)
|
||||
vm_copyin(ctx, vcpu, iov, &val, bytes);
|
||||
|
||||
retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
|
||||
if (retval != 0)
|
||||
break;
|
||||
|
||||
if (in)
|
||||
vm_copyout(ctx, vcpu, &val, iov, bytes);
|
||||
|
||||
/* Update index */
|
||||
if (vis->rflags & PSL_D)
|
||||
index -= bytes;
|
||||
else
|
||||
index += bytes;
|
||||
|
||||
count--;
|
||||
iterations--;
|
||||
}
|
||||
|
||||
/* Update index register */
|
||||
error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
|
||||
assert(error == 0);
|
||||
|
||||
/*
|
||||
* Update count register only if the instruction had a repeat
|
||||
* prefix.
|
||||
*/
|
||||
if (rep) {
|
||||
error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
|
||||
count, addrsize);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/* Restart the instruction if more iterations remain */
|
||||
if (retval == 0 && count != 0) {
|
||||
error = vm_restart_instruction(ctx, vcpu);
|
||||
assert(error == 0);
|
||||
}
|
||||
} else {
|
||||
eax = vmexit->u.inout.eax;
|
||||
val = eax & vie_size2mask(bytes);
|
||||
retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
|
||||
if (retval == 0 && in) {
|
||||
eax &= ~vie_size2mask(bytes);
|
||||
eax |= val & vie_size2mask(bytes);
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
|
||||
eax);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
return (retval);
|
||||
}
|
||||
|
||||
void
|
||||
init_inout(void)
|
||||
{
|
||||
struct inout_port **iopp, *iop;
|
||||
|
||||
/*
|
||||
* Set up the default handler for all ports
|
||||
*/
|
||||
register_default_iohandler(0, MAX_IOPORTS);
|
||||
|
||||
/*
|
||||
* Overwrite with specified handlers
|
||||
*/
|
||||
SET_FOREACH(iopp, inout_port_set) {
|
||||
iop = *iopp;
|
||||
assert(iop->port < MAX_IOPORTS);
|
||||
inout_handlers[iop->port].name = iop->name;
|
||||
inout_handlers[iop->port].flags = iop->flags;
|
||||
inout_handlers[iop->port].handler = iop->handler;
|
||||
inout_handlers[iop->port].arg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
register_inout(struct inout_port *iop)
|
||||
{
|
||||
int i;
|
||||
|
||||
VERIFY_IOPORT(iop->port, iop->size);
|
||||
|
||||
/*
|
||||
* Verify that the new registration is not overwriting an already
|
||||
* allocated i/o range.
|
||||
*/
|
||||
if ((iop->flags & IOPORT_F_DEFAULT) == 0) {
|
||||
for (i = iop->port; i < iop->port + iop->size; i++) {
|
||||
if ((inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0)
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = iop->port; i < iop->port + iop->size; i++) {
|
||||
inout_handlers[i].name = iop->name;
|
||||
inout_handlers[i].flags = iop->flags;
|
||||
inout_handlers[i].handler = iop->handler;
|
||||
inout_handlers[i].arg = iop->arg;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
unregister_inout(struct inout_port *iop)
|
||||
{
|
||||
|
||||
VERIFY_IOPORT(iop->port, iop->size);
|
||||
assert(inout_handlers[iop->port].name == iop->name);
|
||||
|
||||
register_default_iohandler(iop->port, iop->size);
|
||||
|
||||
return (0);
|
||||
}
|
79
inout.h
Normal file
79
inout.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _INOUT_H_
|
||||
#define _INOUT_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
struct vm_exit;
|
||||
|
||||
/*
|
||||
* inout emulation handlers return 0 on success and -1 on failure.
|
||||
*/
|
||||
typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
|
||||
int bytes, uint32_t *eax, void *arg);
|
||||
|
||||
struct inout_port {
|
||||
const char *name;
|
||||
int port;
|
||||
int size;
|
||||
int flags;
|
||||
inout_func_t handler;
|
||||
void *arg;
|
||||
};
|
||||
#define IOPORT_F_IN 0x1
|
||||
#define IOPORT_F_OUT 0x2
|
||||
#define IOPORT_F_INOUT (IOPORT_F_IN | IOPORT_F_OUT)
|
||||
|
||||
/*
|
||||
* The following flags are used internally and must not be used by
|
||||
* device models.
|
||||
*/
|
||||
#define IOPORT_F_DEFAULT 0x80000000 /* claimed by default handler */
|
||||
|
||||
#define INOUT_PORT(name, port, flags, handler) \
|
||||
static struct inout_port __CONCAT(__inout_port, __LINE__) = { \
|
||||
#name, \
|
||||
(port), \
|
||||
1, \
|
||||
(flags), \
|
||||
(handler), \
|
||||
0 \
|
||||
}; \
|
||||
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
|
||||
|
||||
void init_inout(void);
|
||||
int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit,
|
||||
int strict);
|
||||
int register_inout(struct inout_port *iop);
|
||||
int unregister_inout(struct inout_port *iop);
|
||||
void init_bvmcons(void);
|
||||
|
||||
#endif /* _INOUT_H_ */
|
74
ioapic.c
Normal file
74
ioapic.c
Normal file
@ -0,0 +1,74 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Hudson River Trading LLC
|
||||
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "ioapic.h"
|
||||
|
||||
/*
|
||||
* Assign PCI INTx interrupts to I/O APIC pins in a round-robin
|
||||
* fashion. Note that we have no idea what the HPET is using, but the
|
||||
* HPET is also programmable whereas this is intended for hardwired
|
||||
* PCI interrupts.
|
||||
*
|
||||
* This assumes a single I/O APIC where pins >= 16 are permitted for
|
||||
* PCI devices.
|
||||
*/
|
||||
static int pci_pins;
|
||||
|
||||
void
|
||||
ioapic_init(struct vmctx *ctx)
|
||||
{
|
||||
|
||||
if (vm_ioapic_pincount(ctx, &pci_pins) < 0) {
|
||||
pci_pins = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Ignore the first 16 pins. */
|
||||
if (pci_pins <= 16) {
|
||||
pci_pins = 0;
|
||||
return;
|
||||
}
|
||||
pci_pins -= 16;
|
||||
}
|
||||
|
||||
int
|
||||
ioapic_pci_alloc_irq(void)
|
||||
{
|
||||
static int last_pin;
|
||||
|
||||
if (pci_pins == 0)
|
||||
return (-1);
|
||||
return (16 + (last_pin++ % pci_pins));
|
||||
}
|
39
ioapic.h
Normal file
39
ioapic.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Hudson River Trading LLC
|
||||
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IOAPIC_H_
|
||||
#define _IOAPIC_H_
|
||||
|
||||
/*
|
||||
* Allocate a PCI IRQ from the I/O APIC.
|
||||
*/
|
||||
void ioapic_init(struct vmctx *ctx);
|
||||
int ioapic_pci_alloc_irq(void);
|
||||
|
||||
#endif
|
291
mem.c
Normal file
291
mem.c
Normal file
@ -0,0 +1,291 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Memory ranges are represented with an RB tree. On insertion, the range
|
||||
* is checked for overlaps. On lookup, the key has the same base and limit
|
||||
* so it can be searched within the range.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/tree.h>
|
||||
#include <sys/errno.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
struct mmio_rb_range {
|
||||
RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
|
||||
struct mem_range mr_param;
|
||||
uint64_t mr_base;
|
||||
uint64_t mr_end;
|
||||
};
|
||||
|
||||
struct mmio_rb_tree;
|
||||
RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
|
||||
|
||||
RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
|
||||
|
||||
/*
|
||||
* Per-vCPU cache. Since most accesses from a vCPU will be to
|
||||
* consecutive addresses in a range, it makes sense to cache the
|
||||
* result of a lookup.
|
||||
*/
|
||||
static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
|
||||
|
||||
static pthread_rwlock_t mmio_rwlock;
|
||||
|
||||
static int
|
||||
mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
|
||||
{
|
||||
if (a->mr_end < b->mr_base)
|
||||
return (-1);
|
||||
else if (a->mr_base > b->mr_end)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
|
||||
struct mmio_rb_range **entry)
|
||||
{
|
||||
struct mmio_rb_range find, *res;
|
||||
|
||||
find.mr_base = find.mr_end = addr;
|
||||
|
||||
res = RB_FIND(mmio_rb_tree, rbt, &find);
|
||||
|
||||
if (res != NULL) {
|
||||
*entry = res;
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
static int
|
||||
mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
|
||||
{
|
||||
struct mmio_rb_range *overlap;
|
||||
|
||||
overlap = RB_INSERT(mmio_rb_tree, rbt, new);
|
||||
|
||||
if (overlap != NULL) {
|
||||
#ifdef RB_DEBUG
|
||||
printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
|
||||
new->mr_base, new->mr_end,
|
||||
overlap->mr_base, overlap->mr_end);
|
||||
#endif
|
||||
|
||||
return (EEXIST);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
mmio_rb_dump(struct mmio_rb_tree *rbt)
|
||||
{
|
||||
struct mmio_rb_range *np;
|
||||
|
||||
pthread_rwlock_rdlock(&mmio_rwlock);
|
||||
RB_FOREACH(np, mmio_rb_tree, rbt) {
|
||||
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
|
||||
np->mr_param.name);
|
||||
}
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
|
||||
|
||||
static int
|
||||
mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
|
||||
{
|
||||
int error;
|
||||
struct mem_range *mr = arg;
|
||||
|
||||
error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
|
||||
rval, mr->arg1, mr->arg2);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
|
||||
{
|
||||
int error;
|
||||
struct mem_range *mr = arg;
|
||||
|
||||
error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
|
||||
&wval, mr->arg1, mr->arg2);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
|
||||
struct vm_guest_paging *paging)
|
||||
|
||||
{
|
||||
struct mmio_rb_range *entry;
|
||||
int err, immutable;
|
||||
|
||||
pthread_rwlock_rdlock(&mmio_rwlock);
|
||||
/*
|
||||
* First check the per-vCPU cache
|
||||
*/
|
||||
if (mmio_hint[vcpu] &&
|
||||
paddr >= mmio_hint[vcpu]->mr_base &&
|
||||
paddr <= mmio_hint[vcpu]->mr_end) {
|
||||
entry = mmio_hint[vcpu];
|
||||
} else
|
||||
entry = NULL;
|
||||
|
||||
if (entry == NULL) {
|
||||
if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
|
||||
/* Update the per-vCPU cache */
|
||||
mmio_hint[vcpu] = entry;
|
||||
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
return (ESRCH);
|
||||
}
|
||||
}
|
||||
|
||||
assert(entry != NULL);
|
||||
|
||||
/*
|
||||
* An 'immutable' memory range is guaranteed to be never removed
|
||||
* so there is no need to hold 'mmio_rwlock' while calling the
|
||||
* handler.
|
||||
*
|
||||
* XXX writes to the PCIR_COMMAND register can cause register_mem()
|
||||
* to be called. If the guest is using PCI extended config space
|
||||
* to modify the PCIR_COMMAND register then register_mem() can
|
||||
* deadlock on 'mmio_rwlock'. However by registering the extended
|
||||
* config space window as 'immutable' the deadlock can be avoided.
|
||||
*/
|
||||
immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
|
||||
if (immutable)
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
|
||||
err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
|
||||
mem_read, mem_write, &entry->mr_param);
|
||||
|
||||
if (!immutable)
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
|
||||
{
|
||||
struct mmio_rb_range *entry, *mrp;
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
|
||||
mrp = malloc(sizeof(struct mmio_rb_range));
|
||||
|
||||
if (mrp != NULL) {
|
||||
mrp->mr_param = *memp;
|
||||
mrp->mr_base = memp->base;
|
||||
mrp->mr_end = memp->base + memp->size - 1;
|
||||
pthread_rwlock_wrlock(&mmio_rwlock);
|
||||
if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
|
||||
err = mmio_rb_add(rbt, mrp);
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
if (err)
|
||||
free(mrp);
|
||||
} else
|
||||
err = ENOMEM;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
register_mem(struct mem_range *memp)
|
||||
{
|
||||
|
||||
return (register_mem_int(&mmio_rb_root, memp));
|
||||
}
|
||||
|
||||
int
|
||||
register_mem_fallback(struct mem_range *memp)
|
||||
{
|
||||
|
||||
return (register_mem_int(&mmio_rb_fallback, memp));
|
||||
}
|
||||
|
||||
int
|
||||
unregister_mem(struct mem_range *memp)
|
||||
{
|
||||
struct mem_range *mr;
|
||||
struct mmio_rb_range *entry = NULL;
|
||||
int err, i;
|
||||
|
||||
pthread_rwlock_wrlock(&mmio_rwlock);
|
||||
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
|
||||
if (err == 0) {
|
||||
mr = &entry->mr_param;
|
||||
assert(mr->name == memp->name);
|
||||
assert(mr->base == memp->base && mr->size == memp->size);
|
||||
assert((mr->flags & MEM_F_IMMUTABLE) == 0);
|
||||
RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
|
||||
|
||||
/* flush Per-vCPU cache */
|
||||
for (i=0; i < VM_MAXCPU; i++) {
|
||||
if (mmio_hint[i] == entry)
|
||||
mmio_hint[i] = NULL;
|
||||
}
|
||||
}
|
||||
pthread_rwlock_unlock(&mmio_rwlock);
|
||||
|
||||
if (entry)
|
||||
free(entry);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
init_mem(void)
|
||||
{
|
||||
|
||||
RB_INIT(&mmio_rb_root);
|
||||
RB_INIT(&mmio_rb_fallback);
|
||||
pthread_rwlock_init(&mmio_rwlock, NULL);
|
||||
}
|
61
mem.h
Normal file
61
mem.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEM_H_
|
||||
#define _MEM_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
struct vmctx;
|
||||
|
||||
typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
|
||||
int size, uint64_t *val, void *arg1, long arg2);
|
||||
|
||||
struct mem_range {
|
||||
const char *name;
|
||||
int flags;
|
||||
mem_func_t handler;
|
||||
void *arg1;
|
||||
long arg2;
|
||||
uint64_t base;
|
||||
uint64_t size;
|
||||
};
|
||||
#define MEM_F_READ 0x1
|
||||
#define MEM_F_WRITE 0x2
|
||||
#define MEM_F_RW 0x3
|
||||
#define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */
|
||||
|
||||
void init_mem(void);
|
||||
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
|
||||
struct vm_guest_paging *paging);
|
||||
|
||||
int register_mem(struct mem_range *memp);
|
||||
int register_mem_fallback(struct mem_range *memp);
|
||||
int unregister_mem(struct mem_range *memp);
|
||||
|
||||
#endif /* _MEM_H_ */
|
456
mevent.c
Normal file
456
mevent.c
Normal file
@ -0,0 +1,456 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Micro event library for FreeBSD, designed for a single i/o thread
|
||||
* using kqueue, and having events be persistent by default.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/event.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define MEVENT_MAX 64
|
||||
|
||||
#define MEV_ADD 1
|
||||
#define MEV_ENABLE 2
|
||||
#define MEV_DISABLE 3
|
||||
#define MEV_DEL_PENDING 4
|
||||
|
||||
extern char *vmname;
|
||||
|
||||
static pthread_t mevent_tid;
|
||||
static int mevent_timid = 43;
|
||||
static int mevent_pipefd[2];
|
||||
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct mevent {
|
||||
void (*me_func)(int, enum ev_type, void *);
|
||||
#define me_msecs me_fd
|
||||
int me_fd;
|
||||
int me_timid;
|
||||
enum ev_type me_type;
|
||||
void *me_param;
|
||||
int me_cq;
|
||||
int me_state;
|
||||
int me_closefd;
|
||||
LIST_ENTRY(mevent) me_list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(listhead, mevent) global_head, change_head;
|
||||
|
||||
static void
|
||||
mevent_qlock(void)
|
||||
{
|
||||
pthread_mutex_lock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_qunlock(void)
|
||||
{
|
||||
pthread_mutex_unlock(&mevent_lmutex);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_pipe_read(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
char buf[MEVENT_MAX];
|
||||
int status;
|
||||
|
||||
/*
|
||||
* Drain the pipe read side. The fd is non-blocking so this is
|
||||
* safe to do.
|
||||
*/
|
||||
do {
|
||||
status = read(fd, buf, sizeof(buf));
|
||||
} while (status == MEVENT_MAX);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_notify(void)
|
||||
{
|
||||
char c;
|
||||
|
||||
/*
|
||||
* If calling from outside the i/o thread, write a byte on the
|
||||
* pipe to force the i/o thread to exit the blocking kevent call.
|
||||
*/
|
||||
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
|
||||
write(mevent_pipefd[1], &c, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_filter(struct mevent *mevp)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = 0;
|
||||
|
||||
if (mevp->me_type == EVF_READ)
|
||||
retval = EVFILT_READ;
|
||||
|
||||
if (mevp->me_type == EVF_WRITE)
|
||||
retval = EVFILT_WRITE;
|
||||
|
||||
if (mevp->me_type == EVF_TIMER)
|
||||
retval = EVFILT_TIMER;
|
||||
|
||||
if (mevp->me_type == EVF_SIGNAL)
|
||||
retval = EVFILT_SIGNAL;
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_flags(struct mevent *mevp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (mevp->me_state) {
|
||||
case MEV_ADD:
|
||||
ret = EV_ADD; /* implicitly enabled */
|
||||
break;
|
||||
case MEV_ENABLE:
|
||||
ret = EV_ENABLE;
|
||||
break;
|
||||
case MEV_DISABLE:
|
||||
ret = EV_DISABLE;
|
||||
break;
|
||||
case MEV_DEL_PENDING:
|
||||
ret = EV_DELETE;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_kq_fflags(struct mevent *mevp)
|
||||
{
|
||||
/* XXX nothing yet, perhaps EV_EOF for reads ? */
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_build(int mfd, struct kevent *kev)
|
||||
{
|
||||
struct mevent *mevp, *tmpp;
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
|
||||
if (mevp->me_closefd) {
|
||||
/*
|
||||
* A close of the file descriptor will remove the
|
||||
* event
|
||||
*/
|
||||
close(mevp->me_fd);
|
||||
} else {
|
||||
if (mevp->me_type == EVF_TIMER) {
|
||||
kev[i].ident = mevp->me_timid;
|
||||
kev[i].data = mevp->me_msecs;
|
||||
} else {
|
||||
kev[i].ident = mevp->me_fd;
|
||||
kev[i].data = 0;
|
||||
}
|
||||
kev[i].filter = mevent_kq_filter(mevp);
|
||||
kev[i].flags = mevent_kq_flags(mevp);
|
||||
kev[i].fflags = mevent_kq_fflags(mevp);
|
||||
kev[i].udata = mevp;
|
||||
i++;
|
||||
}
|
||||
|
||||
mevp->me_cq = 0;
|
||||
LIST_REMOVE(mevp, me_list);
|
||||
|
||||
if (mevp->me_state == MEV_DEL_PENDING) {
|
||||
free(mevp);
|
||||
} else {
|
||||
LIST_INSERT_HEAD(&global_head, mevp, me_list);
|
||||
}
|
||||
|
||||
assert(i < MEVENT_MAX);
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (i);
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_handle(struct kevent *kev, int numev)
|
||||
{
|
||||
struct mevent *mevp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numev; i++) {
|
||||
mevp = kev[i].udata;
|
||||
|
||||
/* XXX check for EV_ERROR ? */
|
||||
|
||||
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
|
||||
}
|
||||
}
|
||||
|
||||
struct mevent *
|
||||
mevent_add(int tfd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *), void *param)
|
||||
{
|
||||
struct mevent *lp, *mevp;
|
||||
|
||||
if (tfd < 0 || func == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
mevp = NULL;
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Verify that the fd/type tuple is not present in any list
|
||||
*/
|
||||
LIST_FOREACH(lp, &global_head, me_list) {
|
||||
if (type != EVF_TIMER && lp->me_fd == tfd &&
|
||||
lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
LIST_FOREACH(lp, &change_head, me_list) {
|
||||
if (type != EVF_TIMER && lp->me_fd == tfd &&
|
||||
lp->me_type == type) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an entry, populate it, and add it to the change list.
|
||||
*/
|
||||
mevp = calloc(1, sizeof(struct mevent));
|
||||
if (mevp == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (type == EVF_TIMER) {
|
||||
mevp->me_msecs = tfd;
|
||||
mevp->me_timid = mevent_timid++;
|
||||
} else
|
||||
mevp->me_fd = tfd;
|
||||
mevp->me_type = type;
|
||||
mevp->me_func = func;
|
||||
mevp->me_param = param;
|
||||
|
||||
LIST_INSERT_HEAD(&change_head, mevp, me_list);
|
||||
mevp->me_cq = 1;
|
||||
mevp->me_state = MEV_ADD;
|
||||
mevent_notify();
|
||||
|
||||
exit:
|
||||
mevent_qunlock();
|
||||
|
||||
return (mevp);
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_update(struct mevent *evp, int newstate)
|
||||
{
|
||||
/*
|
||||
* It's not possible to enable/disable a deleted event
|
||||
*/
|
||||
if (evp->me_state == MEV_DEL_PENDING)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* No update needed if state isn't changing
|
||||
*/
|
||||
if (evp->me_state == newstate)
|
||||
return (0);
|
||||
|
||||
mevent_qlock();
|
||||
|
||||
evp->me_state = newstate;
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_enable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_ENABLE));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_disable(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_update(evp, MEV_DISABLE));
|
||||
}
|
||||
|
||||
static int
|
||||
mevent_delete_event(struct mevent *evp, int closefd)
|
||||
{
|
||||
mevent_qlock();
|
||||
|
||||
/*
|
||||
* Place the entry onto the changed list if not already there, and
|
||||
* mark as to be deleted.
|
||||
*/
|
||||
if (evp->me_cq == 0) {
|
||||
evp->me_cq = 1;
|
||||
LIST_REMOVE(evp, me_list);
|
||||
LIST_INSERT_HEAD(&change_head, evp, me_list);
|
||||
mevent_notify();
|
||||
}
|
||||
evp->me_state = MEV_DEL_PENDING;
|
||||
|
||||
if (closefd)
|
||||
evp->me_closefd = 1;
|
||||
|
||||
mevent_qunlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 0));
|
||||
}
|
||||
|
||||
int
|
||||
mevent_delete_close(struct mevent *evp)
|
||||
{
|
||||
|
||||
return (mevent_delete_event(evp, 1));
|
||||
}
|
||||
|
||||
static void
|
||||
mevent_set_name(void)
|
||||
{
|
||||
|
||||
pthread_set_name_np(mevent_tid, "mevent");
|
||||
}
|
||||
|
||||
void
|
||||
mevent_dispatch(void)
|
||||
{
|
||||
struct kevent changelist[MEVENT_MAX];
|
||||
struct kevent eventlist[MEVENT_MAX];
|
||||
struct mevent *pipev;
|
||||
int mfd;
|
||||
int numev;
|
||||
int ret;
|
||||
|
||||
mevent_tid = pthread_self();
|
||||
mevent_set_name();
|
||||
|
||||
mfd = kqueue();
|
||||
assert(mfd > 0);
|
||||
|
||||
/*
|
||||
* Open the pipe that will be used for other threads to force
|
||||
* the blocking kqueue call to exit by writing to it. Set the
|
||||
* descriptor to non-blocking.
|
||||
*/
|
||||
ret = pipe(mevent_pipefd);
|
||||
if (ret < 0) {
|
||||
perror("pipe");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add internal event handler for the pipe write fd
|
||||
*/
|
||||
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
|
||||
assert(pipev != NULL);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Build changelist if required.
|
||||
* XXX the changelist can be put into the blocking call
|
||||
* to eliminate the extra syscall. Currently better for
|
||||
* debug.
|
||||
*/
|
||||
numev = mevent_build(mfd, changelist);
|
||||
if (numev) {
|
||||
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
|
||||
if (ret == -1) {
|
||||
perror("Error return from kevent change");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Block awaiting events
|
||||
*/
|
||||
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
|
||||
if (ret == -1 && errno != EINTR) {
|
||||
perror("Error return from kevent monitor");
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle reported events
|
||||
*/
|
||||
mevent_handle(eventlist, ret);
|
||||
}
|
||||
}
|
51
mevent.h
Normal file
51
mevent.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MEVENT_H_
|
||||
#define _MEVENT_H_
|
||||
|
||||
enum ev_type {
|
||||
EVF_READ,
|
||||
EVF_WRITE,
|
||||
EVF_TIMER,
|
||||
EVF_SIGNAL
|
||||
};
|
||||
|
||||
struct mevent;
|
||||
|
||||
struct mevent *mevent_add(int fd, enum ev_type type,
|
||||
void (*func)(int, enum ev_type, void *),
|
||||
void *param);
|
||||
int mevent_enable(struct mevent *evp);
|
||||
int mevent_disable(struct mevent *evp);
|
||||
int mevent_delete(struct mevent *evp);
|
||||
int mevent_delete_close(struct mevent *evp);
|
||||
|
||||
void mevent_dispatch(void);
|
||||
|
||||
#endif /* _MEVENT_H_ */
|
256
mevent_test.c
Normal file
256
mevent_test.c
Normal file
@ -0,0 +1,256 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test program for the micro event library. Set up a simple TCP echo
|
||||
* service.
|
||||
*
|
||||
* cc mevent_test.c mevent.c -lpthread
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stdint.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "mevent.h"
|
||||
|
||||
#define TEST_PORT 4321
|
||||
|
||||
static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
static struct mevent *tevp;
|
||||
|
||||
char *vmname = "test vm";
|
||||
|
||||
|
||||
#define MEVENT_ECHO
|
||||
|
||||
/* Number of timer events to capture */
|
||||
#define TEVSZ 4096
|
||||
uint64_t tevbuf[TEVSZ];
|
||||
|
||||
static void
|
||||
timer_print(void)
|
||||
{
|
||||
uint64_t min, max, diff, sum, tsc_freq;
|
||||
size_t len;
|
||||
int j;
|
||||
|
||||
min = UINT64_MAX;
|
||||
max = 0;
|
||||
sum = 0;
|
||||
|
||||
len = sizeof(tsc_freq);
|
||||
sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
|
||||
|
||||
for (j = 1; j < TEVSZ; j++) {
|
||||
/* Convert a tsc diff into microseconds */
|
||||
diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
|
||||
sum += diff;
|
||||
if (min > diff)
|
||||
min = diff;
|
||||
if (max < diff)
|
||||
max = diff;
|
||||
}
|
||||
|
||||
printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max,
|
||||
sum/(TEVSZ - 1));
|
||||
}
|
||||
|
||||
static void
|
||||
timer_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
static int i;
|
||||
|
||||
if (i >= TEVSZ)
|
||||
abort();
|
||||
|
||||
tevbuf[i++] = rdtsc();
|
||||
|
||||
if (i == TEVSZ) {
|
||||
mevent_delete(tevp);
|
||||
timer_print();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef MEVENT_ECHO
|
||||
struct esync {
|
||||
pthread_mutex_t e_mt;
|
||||
pthread_cond_t e_cond;
|
||||
};
|
||||
|
||||
static void
|
||||
echoer_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct esync *sync = param;
|
||||
|
||||
pthread_mutex_lock(&sync->e_mt);
|
||||
pthread_cond_signal(&sync->e_cond);
|
||||
pthread_mutex_unlock(&sync->e_mt);
|
||||
}
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
struct esync sync;
|
||||
struct mevent *mev;
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
pthread_mutex_init(&sync.e_mt, NULL);
|
||||
pthread_cond_init(&sync.e_cond, NULL);
|
||||
|
||||
pthread_mutex_lock(&sync.e_mt);
|
||||
|
||||
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
|
||||
if (mev == NULL) {
|
||||
printf("Could not allocate echoer event\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
|
||||
len = read(fd, buf, sizeof(buf));
|
||||
if (len > 0) {
|
||||
write(fd, buf, len);
|
||||
write(0, buf, len);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mevent_delete_close(mev);
|
||||
|
||||
pthread_mutex_unlock(&sync.e_mt);
|
||||
pthread_mutex_destroy(&sync.e_mt);
|
||||
pthread_cond_destroy(&sync.e_cond);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void *
|
||||
echoer(void *param)
|
||||
{
|
||||
char buf[128];
|
||||
int fd = (int)(uintptr_t) param;
|
||||
int len;
|
||||
|
||||
while ((len = read(fd, buf, sizeof(buf))) > 0) {
|
||||
write(1, buf, len);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
#endif /* MEVENT_ECHO */
|
||||
|
||||
static void
|
||||
acceptor_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
pthread_cond_signal(&accept_condvar);
|
||||
pthread_mutex_unlock(&accept_mutex);
|
||||
}
|
||||
|
||||
static void *
|
||||
acceptor(void *param)
|
||||
{
|
||||
struct sockaddr_in sin;
|
||||
pthread_t tid;
|
||||
int news;
|
||||
int s;
|
||||
static int first;
|
||||
|
||||
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sin.sin_len = sizeof(sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
sin.sin_port = htons(TEST_PORT);
|
||||
|
||||
if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
|
||||
perror("bind");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (listen(s, 1) < 0) {
|
||||
perror("listen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
|
||||
|
||||
pthread_mutex_lock(&accept_mutex);
|
||||
|
||||
while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
|
||||
news = accept(s, NULL, NULL);
|
||||
if (news < 0) {
|
||||
perror("accept error");
|
||||
} else {
|
||||
static int first = 1;
|
||||
|
||||
if (first) {
|
||||
/*
|
||||
* Start a timer
|
||||
*/
|
||||
first = 0;
|
||||
tevp = mevent_add(1, EVF_TIMER, timer_callback,
|
||||
NULL);
|
||||
}
|
||||
|
||||
printf("incoming connection, spawning thread\n");
|
||||
pthread_create(&tid, NULL, echoer,
|
||||
(void *)(uintptr_t)news);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
pthread_t tid;
|
||||
|
||||
pthread_create(&tid, NULL, acceptor, NULL);
|
||||
|
||||
mevent_dispatch();
|
||||
}
|
377
mptbl.c
Normal file
377
mptbl.c
Normal file
@ -0,0 +1,377 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <x86/mptable.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "bhyverun.h"
|
||||
#include "mptbl.h"
|
||||
#include "pci_emul.h"
|
||||
|
||||
#define MPTABLE_BASE 0xF0000
|
||||
|
||||
/* floating pointer length + maximum length of configuration table */
|
||||
#define MPTABLE_MAX_LENGTH (65536 + 16)
|
||||
|
||||
#define LAPIC_PADDR 0xFEE00000
|
||||
#define LAPIC_VERSION 16
|
||||
|
||||
#define IOAPIC_PADDR 0xFEC00000
|
||||
#define IOAPIC_VERSION 0x11
|
||||
|
||||
#define MP_SPECREV 4
|
||||
#define MPFP_SIG "_MP_"
|
||||
|
||||
/* Configuration header defines */
|
||||
#define MPCH_SIG "PCMP"
|
||||
#define MPCH_OEMID "BHyVe "
|
||||
#define MPCH_OEMID_LEN 8
|
||||
#define MPCH_PRODID "Hypervisor "
|
||||
#define MPCH_PRODID_LEN 12
|
||||
|
||||
/* Processor entry defines */
|
||||
#define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */
|
||||
#define MPEP_SIG_MODEL 26
|
||||
#define MPEP_SIG_STEPPING 5
|
||||
#define MPEP_SIG \
|
||||
((MPEP_SIG_FAMILY << 8) | \
|
||||
(MPEP_SIG_MODEL << 4) | \
|
||||
(MPEP_SIG_STEPPING))
|
||||
|
||||
#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */
|
||||
|
||||
/* Number of local intr entries */
|
||||
#define MPEII_NUM_LOCAL_IRQ 2
|
||||
|
||||
/* Bus entry defines */
|
||||
#define MPE_NUM_BUSES 2
|
||||
#define MPE_BUSNAME_LEN 6
|
||||
#define MPE_BUSNAME_ISA "ISA "
|
||||
#define MPE_BUSNAME_PCI "PCI "
|
||||
|
||||
static void *oem_tbl_start;
|
||||
static int oem_tbl_size;
|
||||
|
||||
static uint8_t
|
||||
mpt_compute_checksum(void *base, size_t len)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t sum;
|
||||
|
||||
for(bytes = base, sum = 0; len > 0; len--) {
|
||||
sum += *bytes++;
|
||||
}
|
||||
|
||||
return (256 - sum);
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa)
|
||||
{
|
||||
|
||||
memset(mpfp, 0, sizeof(*mpfp));
|
||||
memcpy(mpfp->signature, MPFP_SIG, 4);
|
||||
mpfp->pap = gpa + sizeof(*mpfp);
|
||||
mpfp->length = 1;
|
||||
mpfp->spec_rev = MP_SPECREV;
|
||||
mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp));
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_mpch(mpcth_t mpch)
|
||||
{
|
||||
|
||||
memset(mpch, 0, sizeof(*mpch));
|
||||
memcpy(mpch->signature, MPCH_SIG, 4);
|
||||
mpch->spec_rev = MP_SPECREV;
|
||||
memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN);
|
||||
memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN);
|
||||
mpch->apic_address = LAPIC_PADDR;
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpu; i++) {
|
||||
memset(mpep, 0, sizeof(*mpep));
|
||||
mpep->type = MPCT_ENTRY_PROCESSOR;
|
||||
mpep->apic_id = i; // XXX
|
||||
mpep->apic_version = LAPIC_VERSION;
|
||||
mpep->cpu_flags = PROCENTRY_FLAG_EN;
|
||||
if (i == 0)
|
||||
mpep->cpu_flags |= PROCENTRY_FLAG_BP;
|
||||
mpep->cpu_signature = MPEP_SIG;
|
||||
mpep->feature_flags = MPEP_FEATURES;
|
||||
mpep++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_localint_entries(int_entry_ptr mpie)
|
||||
{
|
||||
|
||||
/* Hardcode LINT0 as ExtINT on all CPUs. */
|
||||
memset(mpie, 0, sizeof(*mpie));
|
||||
mpie->type = MPCT_ENTRY_LOCAL_INT;
|
||||
mpie->int_type = INTENTRY_TYPE_EXTINT;
|
||||
mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
|
||||
INTENTRY_FLAGS_TRIGGER_CONFORM;
|
||||
mpie->dst_apic_id = 0xff;
|
||||
mpie->dst_apic_int = 0;
|
||||
mpie++;
|
||||
|
||||
/* Hardcode LINT1 as NMI on all CPUs. */
|
||||
memset(mpie, 0, sizeof(*mpie));
|
||||
mpie->type = MPCT_ENTRY_LOCAL_INT;
|
||||
mpie->int_type = INTENTRY_TYPE_NMI;
|
||||
mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
|
||||
INTENTRY_FLAGS_TRIGGER_CONFORM;
|
||||
mpie->dst_apic_id = 0xff;
|
||||
mpie->dst_apic_int = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_bus_entries(bus_entry_ptr mpeb)
|
||||
{
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->type = MPCT_ENTRY_BUS;
|
||||
mpeb->bus_id = 0;
|
||||
memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
|
||||
mpeb++;
|
||||
|
||||
memset(mpeb, 0, sizeof(*mpeb));
|
||||
mpeb->type = MPCT_ENTRY_BUS;
|
||||
mpeb->bus_id = 1;
|
||||
memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id)
|
||||
{
|
||||
|
||||
memset(mpei, 0, sizeof(*mpei));
|
||||
mpei->type = MPCT_ENTRY_IOAPIC;
|
||||
mpei->apic_id = id;
|
||||
mpei->apic_version = IOAPIC_VERSION;
|
||||
mpei->apic_flags = IOAPICENTRY_FLAG_EN;
|
||||
mpei->apic_address = IOAPIC_PADDR;
|
||||
}
|
||||
|
||||
static int
|
||||
mpt_count_ioint_entries(void)
|
||||
{
|
||||
int bus, count;
|
||||
|
||||
count = 0;
|
||||
for (bus = 0; bus <= PCI_BUSMAX; bus++)
|
||||
count += pci_count_lintr(bus);
|
||||
|
||||
/*
|
||||
* Always include entries for the first 16 pins along with a entry
|
||||
* for each active PCI INTx pin.
|
||||
*/
|
||||
return (16 + count);
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
|
||||
void *arg)
|
||||
{
|
||||
int_entry_ptr *mpiep, mpie;
|
||||
|
||||
mpiep = arg;
|
||||
mpie = *mpiep;
|
||||
memset(mpie, 0, sizeof(*mpie));
|
||||
|
||||
/*
|
||||
* This is always after another I/O interrupt entry, so cheat
|
||||
* and fetch the I/O APIC ID from the prior entry.
|
||||
*/
|
||||
mpie->type = MPCT_ENTRY_INT;
|
||||
mpie->int_type = INTENTRY_TYPE_INT;
|
||||
mpie->src_bus_id = bus;
|
||||
mpie->src_bus_irq = slot << 2 | (pin - 1);
|
||||
mpie->dst_apic_id = mpie[-1].dst_apic_id;
|
||||
mpie->dst_apic_int = ioapic_irq;
|
||||
|
||||
*mpiep = mpie + 1;
|
||||
}
|
||||
|
||||
static void
|
||||
mpt_build_ioint_entries(int_entry_ptr mpie, int id)
|
||||
{
|
||||
int pin, bus;
|
||||
|
||||
/*
|
||||
* The following config is taken from kernel mptable.c
|
||||
* mptable_parse_default_config_ints(...), for now
|
||||
* just use the default config, tweek later if needed.
|
||||
*/
|
||||
|
||||
/* First, generate the first 16 pins. */
|
||||
for (pin = 0; pin < 16; pin++) {
|
||||
memset(mpie, 0, sizeof(*mpie));
|
||||
mpie->type = MPCT_ENTRY_INT;
|
||||
mpie->src_bus_id = 1;
|
||||
mpie->dst_apic_id = id;
|
||||
|
||||
/*
|
||||
* All default configs route IRQs from bus 0 to the first 16
|
||||
* pins of the first I/O APIC with an APIC ID of 2.
|
||||
*/
|
||||
mpie->dst_apic_int = pin;
|
||||
switch (pin) {
|
||||
case 0:
|
||||
/* Pin 0 is an ExtINT pin. */
|
||||
mpie->int_type = INTENTRY_TYPE_EXTINT;
|
||||
break;
|
||||
case 2:
|
||||
/* IRQ 0 is routed to pin 2. */
|
||||
mpie->int_type = INTENTRY_TYPE_INT;
|
||||
mpie->src_bus_irq = 0;
|
||||
break;
|
||||
case SCI_INT:
|
||||
/* ACPI SCI is level triggered and active-lo. */
|
||||
mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
|
||||
INTENTRY_FLAGS_TRIGGER_LEVEL;
|
||||
mpie->int_type = INTENTRY_TYPE_INT;
|
||||
mpie->src_bus_irq = SCI_INT;
|
||||
break;
|
||||
default:
|
||||
/* All other pins are identity mapped. */
|
||||
mpie->int_type = INTENTRY_TYPE_INT;
|
||||
mpie->src_bus_irq = pin;
|
||||
break;
|
||||
}
|
||||
mpie++;
|
||||
}
|
||||
|
||||
/* Next, generate entries for any PCI INTx interrupts. */
|
||||
for (bus = 0; bus <= PCI_BUSMAX; bus++)
|
||||
pci_walk_lintr(bus, mpt_generate_pci_int, &mpie);
|
||||
}
|
||||
|
||||
void
|
||||
mptable_add_oemtbl(void *tbl, int tblsz)
|
||||
{
|
||||
|
||||
oem_tbl_start = tbl;
|
||||
oem_tbl_size = tblsz;
|
||||
}
|
||||
|
||||
int
|
||||
mptable_build(struct vmctx *ctx, int ncpu)
|
||||
{
|
||||
mpcth_t mpch;
|
||||
bus_entry_ptr mpeb;
|
||||
io_apic_entry_ptr mpei;
|
||||
proc_entry_ptr mpep;
|
||||
mpfps_t mpfp;
|
||||
int_entry_ptr mpie;
|
||||
int ioints, bus;
|
||||
char *curraddr;
|
||||
char *startaddr;
|
||||
|
||||
startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH);
|
||||
if (startaddr == NULL) {
|
||||
fprintf(stderr, "mptable requires mapped mem\n");
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no way to advertise multiple PCI hierarchies via MPtable
|
||||
* so require that there is no PCI hierarchy with a non-zero bus
|
||||
* number.
|
||||
*/
|
||||
for (bus = 1; bus <= PCI_BUSMAX; bus++) {
|
||||
if (pci_bus_configured(bus)) {
|
||||
fprintf(stderr, "MPtable is incompatible with "
|
||||
"multiple PCI hierarchies.\r\n");
|
||||
fprintf(stderr, "MPtable generation can be disabled "
|
||||
"by passing the -Y option to bhyve(8).\r\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
curraddr = startaddr;
|
||||
mpfp = (mpfps_t)curraddr;
|
||||
mpt_build_mpfp(mpfp, MPTABLE_BASE);
|
||||
curraddr += sizeof(*mpfp);
|
||||
|
||||
mpch = (mpcth_t)curraddr;
|
||||
mpt_build_mpch(mpch);
|
||||
curraddr += sizeof(*mpch);
|
||||
|
||||
mpep = (proc_entry_ptr)curraddr;
|
||||
mpt_build_proc_entries(mpep, ncpu);
|
||||
curraddr += sizeof(*mpep) * ncpu;
|
||||
mpch->entry_count += ncpu;
|
||||
|
||||
mpeb = (bus_entry_ptr) curraddr;
|
||||
mpt_build_bus_entries(mpeb);
|
||||
curraddr += sizeof(*mpeb) * MPE_NUM_BUSES;
|
||||
mpch->entry_count += MPE_NUM_BUSES;
|
||||
|
||||
mpei = (io_apic_entry_ptr)curraddr;
|
||||
mpt_build_ioapic_entries(mpei, 0);
|
||||
curraddr += sizeof(*mpei);
|
||||
mpch->entry_count++;
|
||||
|
||||
mpie = (int_entry_ptr) curraddr;
|
||||
ioints = mpt_count_ioint_entries();
|
||||
mpt_build_ioint_entries(mpie, 0);
|
||||
curraddr += sizeof(*mpie) * ioints;
|
||||
mpch->entry_count += ioints;
|
||||
|
||||
mpie = (int_entry_ptr)curraddr;
|
||||
mpt_build_localint_entries(mpie);
|
||||
curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
|
||||
mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
|
||||
|
||||
if (oem_tbl_start) {
|
||||
mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
|
||||
mpch->oem_table_size = oem_tbl_size;
|
||||
memcpy(curraddr, oem_tbl_start, oem_tbl_size);
|
||||
}
|
||||
|
||||
mpch->base_table_length = curraddr - (char *)mpch;
|
||||
mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length);
|
||||
|
||||
return (0);
|
||||
}
|
35
mptbl.h
Normal file
35
mptbl.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MPTBL_H_
|
||||
#define _MPTBL_H_
|
||||
|
||||
int mptable_build(struct vmctx *ctx, int ncpu);
|
||||
void mptable_add_oemtbl(void *tbl, int tblsz);
|
||||
|
||||
#endif /* _MPTBL_H_ */
|
2347
pci_ahci.c
Normal file
2347
pci_ahci.c
Normal file
File diff suppressed because it is too large
Load Diff
2108
pci_emul.c
Normal file
2108
pci_emul.c
Normal file
File diff suppressed because it is too large
Load Diff
285
pci_emul.h
Normal file
285
pci_emul.h
Normal file
@ -0,0 +1,285 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _PCI_EMUL_H_
|
||||
#define _PCI_EMUL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/_pthreadtypes.h>
|
||||
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
|
||||
|
||||
struct vmctx;
|
||||
struct pci_devinst;
|
||||
struct memory_region;
|
||||
|
||||
struct pci_devemu {
|
||||
char *pe_emu; /* Name of device emulation */
|
||||
|
||||
/* instance creation */
|
||||
int (*pe_init)(struct vmctx *, struct pci_devinst *,
|
||||
char *opts);
|
||||
|
||||
/* ACPI DSDT enumeration */
|
||||
void (*pe_write_dsdt)(struct pci_devinst *);
|
||||
|
||||
/* config space read/write callbacks */
|
||||
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t val);
|
||||
int (*pe_cfgread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int offset,
|
||||
int bytes, uint32_t *retval);
|
||||
|
||||
/* BAR read/write callbacks */
|
||||
void (*pe_barwrite)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size, uint64_t value);
|
||||
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
|
||||
struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size);
|
||||
};
|
||||
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
|
||||
|
||||
enum pcibar_type {
|
||||
PCIBAR_NONE,
|
||||
PCIBAR_IO,
|
||||
PCIBAR_MEM32,
|
||||
PCIBAR_MEM64,
|
||||
PCIBAR_MEMHI64
|
||||
};
|
||||
|
||||
struct pcibar {
|
||||
enum pcibar_type type; /* io or memory */
|
||||
uint64_t size;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
#define PI_NAMESZ 40
|
||||
|
||||
struct msix_table_entry {
|
||||
uint64_t addr;
|
||||
uint32_t msg_data;
|
||||
uint32_t vector_control;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* In case the structure is modified to hold extra information, use a define
|
||||
* for the size that should be emulated.
|
||||
*/
|
||||
#define MSIX_TABLE_ENTRY_SIZE 16
|
||||
#define MAX_MSIX_TABLE_ENTRIES 2048
|
||||
#define PBA_SIZE(msgnum) (roundup2((msgnum), 64) / 8)
|
||||
|
||||
enum lintr_stat {
|
||||
IDLE,
|
||||
ASSERTED,
|
||||
PENDING
|
||||
};
|
||||
|
||||
struct pci_devinst {
|
||||
struct pci_devemu *pi_d;
|
||||
struct vmctx *pi_vmctx;
|
||||
uint8_t pi_bus, pi_slot, pi_func;
|
||||
char pi_name[PI_NAMESZ];
|
||||
int pi_bar_getsize;
|
||||
int pi_prevcap;
|
||||
int pi_capend;
|
||||
|
||||
struct {
|
||||
int8_t pin;
|
||||
enum lintr_stat state;
|
||||
int pirq_pin;
|
||||
int ioapic_irq;
|
||||
pthread_mutex_t lock;
|
||||
} pi_lintr;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
uint64_t addr;
|
||||
uint64_t msg_data;
|
||||
int maxmsgnum;
|
||||
} pi_msi;
|
||||
|
||||
struct {
|
||||
int enabled;
|
||||
int table_bar;
|
||||
int pba_bar;
|
||||
uint32_t table_offset;
|
||||
int table_count;
|
||||
uint32_t pba_offset;
|
||||
int pba_size;
|
||||
int function_mask;
|
||||
struct msix_table_entry *table; /* allocated at runtime */
|
||||
void *pba_page;
|
||||
int pba_page_offset;
|
||||
} pi_msix;
|
||||
|
||||
void *pi_arg; /* devemu-private data */
|
||||
|
||||
u_char pi_cfgdata[PCI_REGMAX + 1];
|
||||
struct pcibar pi_bar[PCI_BARMAX + 1];
|
||||
};
|
||||
|
||||
struct msicap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t addrlo;
|
||||
uint32_t addrhi;
|
||||
uint16_t msgdata;
|
||||
} __packed;
|
||||
|
||||
struct msixcap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t msgctrl;
|
||||
uint32_t table_info; /* bar index and offset within it */
|
||||
uint32_t pba_info; /* bar index and offset within it */
|
||||
} __packed;
|
||||
|
||||
struct pciecap {
|
||||
uint8_t capid;
|
||||
uint8_t nextptr;
|
||||
uint16_t pcie_capabilities;
|
||||
|
||||
uint32_t dev_capabilities; /* all devices */
|
||||
uint16_t dev_control;
|
||||
uint16_t dev_status;
|
||||
|
||||
uint32_t link_capabilities; /* devices with links */
|
||||
uint16_t link_control;
|
||||
uint16_t link_status;
|
||||
|
||||
uint32_t slot_capabilities; /* ports with slots */
|
||||
uint16_t slot_control;
|
||||
uint16_t slot_status;
|
||||
|
||||
uint16_t root_control; /* root ports */
|
||||
uint16_t root_capabilities;
|
||||
uint32_t root_status;
|
||||
|
||||
uint32_t dev_capabilities2; /* all devices */
|
||||
uint16_t dev_control2;
|
||||
uint16_t dev_status2;
|
||||
|
||||
uint32_t link_capabilities2; /* devices with links */
|
||||
uint16_t link_control2;
|
||||
uint16_t link_status2;
|
||||
|
||||
uint32_t slot_capabilities2; /* ports with slots */
|
||||
uint16_t slot_control2;
|
||||
uint16_t slot_status2;
|
||||
} __packed;
|
||||
|
||||
typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
|
||||
int ioapic_irq, void *arg);
|
||||
|
||||
int init_pci(struct vmctx *ctx);
|
||||
void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
|
||||
int bytes, uint32_t val);
|
||||
void pci_callback(void);
|
||||
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
|
||||
enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx,
|
||||
uint64_t hostbase, enum pcibar_type type, uint64_t size);
|
||||
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
|
||||
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
|
||||
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
|
||||
void pci_generate_msix(struct pci_devinst *pi, int msgnum);
|
||||
void pci_lintr_assert(struct pci_devinst *pi);
|
||||
void pci_lintr_deassert(struct pci_devinst *pi);
|
||||
void pci_lintr_request(struct pci_devinst *pi);
|
||||
int pci_msi_enabled(struct pci_devinst *pi);
|
||||
int pci_msix_enabled(struct pci_devinst *pi);
|
||||
int pci_msix_table_bar(struct pci_devinst *pi);
|
||||
int pci_msix_pba_bar(struct pci_devinst *pi);
|
||||
int pci_msi_msgnum(struct pci_devinst *pi);
|
||||
int pci_parse_slot(char *opt);
|
||||
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
|
||||
int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum);
|
||||
int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
|
||||
uint64_t value);
|
||||
uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size);
|
||||
int pci_count_lintr(int bus);
|
||||
void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg);
|
||||
void pci_write_dsdt(void);
|
||||
uint64_t pci_ecfg_base(void);
|
||||
int pci_bus_configured(int bus);
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
*(uint8_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
*(uint16_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
*(uint32_t *)(pi->pi_cfgdata + offset) = val;
|
||||
}
|
||||
|
||||
static __inline uint8_t
|
||||
pci_get_cfgdata8(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= PCI_REGMAX);
|
||||
return (*(uint8_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint16_t
|
||||
pci_get_cfgdata16(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
|
||||
return (*(uint16_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
static __inline uint32_t
|
||||
pci_get_cfgdata32(struct pci_devinst *pi, int offset)
|
||||
{
|
||||
assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
|
||||
return (*(uint32_t *)(pi->pi_cfgdata + offset));
|
||||
}
|
||||
|
||||
#endif /* _PCI_EMUL_H_ */
|
70
pci_hostbridge.c
Normal file
70
pci_hostbridge.c
Normal file
@ -0,0 +1,70 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "pci_emul.h"
|
||||
|
||||
static int
|
||||
pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
|
||||
/* config space */
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
|
||||
pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
|
||||
|
||||
pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_PORT);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_amd_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
(void) pci_hostbridge_init(ctx, pi, opts);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1022); /* AMD */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x7432); /* made up */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_amd_hostbridge = {
|
||||
.pe_emu = "amd_hostbridge",
|
||||
.pe_init = pci_amd_hostbridge_init,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_amd_hostbridge);
|
||||
|
||||
struct pci_devemu pci_de_hostbridge = {
|
||||
.pe_emu = "hostbridge",
|
||||
.pe_init = pci_hostbridge_init,
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_hostbridge);
|
346
pci_irq.c
Normal file
346
pci_irq.c
Normal file
@ -0,0 +1,346 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Hudson River Trading LLC
|
||||
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "inout.h"
|
||||
#include "pci_emul.h"
|
||||
#include "pci_irq.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
/*
|
||||
* Implement an 8 pin PCI interrupt router compatible with the router
|
||||
* present on Intel's ICH10 chip.
|
||||
*/
|
||||
|
||||
/* Fields in each PIRQ register. */
|
||||
#define PIRQ_DIS 0x80
|
||||
#define PIRQ_IRQ 0x0f
|
||||
|
||||
/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */
|
||||
#define PERMITTED_IRQS 0xdef8
|
||||
#define IRQ_PERMITTED(irq) (((1U << (irq)) & PERMITTED_IRQS) != 0)
|
||||
|
||||
/* IRQ count to disable an IRQ. */
|
||||
#define IRQ_DISABLED 0xff
|
||||
|
||||
static struct pirq {
|
||||
uint8_t reg;
|
||||
int use_count;
|
||||
int active_count;
|
||||
pthread_mutex_t lock;
|
||||
} pirqs[8];
|
||||
|
||||
static u_char irq_counts[16];
|
||||
static int pirq_cold = 1;
|
||||
|
||||
/*
|
||||
* Returns true if this pin is enabled with a valid IRQ. Setting the
|
||||
* register to a reserved IRQ causes interrupts to not be asserted as
|
||||
* if the pin was disabled.
|
||||
*/
|
||||
static bool
|
||||
pirq_valid_irq(int reg)
|
||||
{
|
||||
|
||||
if (reg & PIRQ_DIS)
|
||||
return (false);
|
||||
return (IRQ_PERMITTED(reg & PIRQ_IRQ));
|
||||
}
|
||||
|
||||
uint8_t
|
||||
pirq_read(int pin)
|
||||
{
|
||||
|
||||
assert(pin > 0 && pin <= nitems(pirqs));
|
||||
return (pirqs[pin - 1].reg);
|
||||
}
|
||||
|
||||
void
|
||||
pirq_write(struct vmctx *ctx, int pin, uint8_t val)
|
||||
{
|
||||
struct pirq *pirq;
|
||||
|
||||
assert(pin > 0 && pin <= nitems(pirqs));
|
||||
pirq = &pirqs[pin - 1];
|
||||
pthread_mutex_lock(&pirq->lock);
|
||||
if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) {
|
||||
if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
|
||||
vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
|
||||
pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ);
|
||||
if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
|
||||
vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
|
||||
}
|
||||
pthread_mutex_unlock(&pirq->lock);
|
||||
}
|
||||
|
||||
void
|
||||
pci_irq_reserve(int irq)
|
||||
{
|
||||
|
||||
assert(irq >= 0 && irq < nitems(irq_counts));
|
||||
assert(pirq_cold);
|
||||
assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED);
|
||||
irq_counts[irq] = IRQ_DISABLED;
|
||||
}
|
||||
|
||||
void
|
||||
pci_irq_use(int irq)
|
||||
{
|
||||
|
||||
assert(irq >= 0 && irq < nitems(irq_counts));
|
||||
assert(pirq_cold);
|
||||
assert(irq_counts[irq] != IRQ_DISABLED);
|
||||
irq_counts[irq]++;
|
||||
}
|
||||
|
||||
void
|
||||
pci_irq_init(struct vmctx *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nitems(pirqs); i++) {
|
||||
pirqs[i].reg = PIRQ_DIS;
|
||||
pirqs[i].use_count = 0;
|
||||
pirqs[i].active_count = 0;
|
||||
pthread_mutex_init(&pirqs[i].lock, NULL);
|
||||
}
|
||||
for (i = 0; i < nitems(irq_counts); i++) {
|
||||
if (IRQ_PERMITTED(i))
|
||||
irq_counts[i] = 0;
|
||||
else
|
||||
irq_counts[i] = IRQ_DISABLED;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pci_irq_assert(struct pci_devinst *pi)
|
||||
{
|
||||
struct pirq *pirq;
|
||||
|
||||
if (pi->pi_lintr.pirq_pin > 0) {
|
||||
assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
|
||||
pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
|
||||
pthread_mutex_lock(&pirq->lock);
|
||||
pirq->active_count++;
|
||||
if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) {
|
||||
vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
|
||||
pi->pi_lintr.ioapic_irq);
|
||||
pthread_mutex_unlock(&pirq->lock);
|
||||
return;
|
||||
}
|
||||
pthread_mutex_unlock(&pirq->lock);
|
||||
}
|
||||
vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
|
||||
}
|
||||
|
||||
void
|
||||
pci_irq_deassert(struct pci_devinst *pi)
|
||||
{
|
||||
struct pirq *pirq;
|
||||
|
||||
if (pi->pi_lintr.pirq_pin > 0) {
|
||||
assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
|
||||
pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
|
||||
pthread_mutex_lock(&pirq->lock);
|
||||
pirq->active_count--;
|
||||
if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) {
|
||||
vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
|
||||
pi->pi_lintr.ioapic_irq);
|
||||
pthread_mutex_unlock(&pirq->lock);
|
||||
return;
|
||||
}
|
||||
pthread_mutex_unlock(&pirq->lock);
|
||||
}
|
||||
vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
|
||||
}
|
||||
|
||||
int
|
||||
pirq_alloc_pin(struct vmctx *ctx)
|
||||
{
|
||||
int best_count, best_irq, best_pin, irq, pin;
|
||||
|
||||
pirq_cold = 0;
|
||||
|
||||
/* First, find the least-used PIRQ pin. */
|
||||
best_pin = 0;
|
||||
best_count = pirqs[0].use_count;
|
||||
for (pin = 1; pin < nitems(pirqs); pin++) {
|
||||
if (pirqs[pin].use_count < best_count) {
|
||||
best_pin = pin;
|
||||
best_count = pirqs[pin].use_count;
|
||||
}
|
||||
}
|
||||
pirqs[best_pin].use_count++;
|
||||
|
||||
/* Second, route this pin to an IRQ. */
|
||||
if (pirqs[best_pin].reg == PIRQ_DIS) {
|
||||
best_irq = -1;
|
||||
best_count = 0;
|
||||
for (irq = 0; irq < nitems(irq_counts); irq++) {
|
||||
if (irq_counts[irq] == IRQ_DISABLED)
|
||||
continue;
|
||||
if (best_irq == -1 || irq_counts[irq] < best_count) {
|
||||
best_irq = irq;
|
||||
best_count = irq_counts[irq];
|
||||
}
|
||||
}
|
||||
assert(best_irq >= 0);
|
||||
irq_counts[best_irq]++;
|
||||
pirqs[best_pin].reg = best_irq;
|
||||
vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER);
|
||||
}
|
||||
|
||||
return (best_pin + 1);
|
||||
}
|
||||
|
||||
int
|
||||
pirq_irq(int pin)
|
||||
{
|
||||
assert(pin > 0 && pin <= nitems(pirqs));
|
||||
return (pirqs[pin - 1].reg & PIRQ_IRQ);
|
||||
}
|
||||
|
||||
/* XXX: Generate $PIR table. */
|
||||
|
||||
static void
|
||||
pirq_dsdt(void)
|
||||
{
|
||||
char *irq_prs, *old;
|
||||
int irq, pin;
|
||||
|
||||
irq_prs = NULL;
|
||||
for (irq = 0; irq < nitems(irq_counts); irq++) {
|
||||
if (!IRQ_PERMITTED(irq))
|
||||
continue;
|
||||
if (irq_prs == NULL)
|
||||
asprintf(&irq_prs, "%d", irq);
|
||||
else {
|
||||
old = irq_prs;
|
||||
asprintf(&irq_prs, "%s,%d", old, irq);
|
||||
free(old);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A helper method to validate a link register's value. This
|
||||
* duplicates pirq_valid_irq().
|
||||
*/
|
||||
dsdt_line("");
|
||||
dsdt_line("Method (PIRV, 1, NotSerialized)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" If (And (Arg0, 0x%02X))", PIRQ_DIS);
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x00)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" And (Arg0, 0x%02X, Local0)", PIRQ_IRQ);
|
||||
dsdt_line(" If (LLess (Local0, 0x03))");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x00)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" If (LEqual (Local0, 0x08))");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x00)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" If (LEqual (Local0, 0x0D))");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x00)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Return (0x01)");
|
||||
dsdt_line("}");
|
||||
|
||||
for (pin = 0; pin < nitems(pirqs); pin++) {
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (LNK%c)", 'A' + pin);
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0C0F\"))");
|
||||
dsdt_line(" Name (_UID, 0x%02X)", pin + 1);
|
||||
dsdt_line(" Method (_STA, 0, NotSerialized)");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" If (PIRV (PIR%c))", 'A' + pin);
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x0B)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Else");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Return (0x09)");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Name (_PRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" IRQ (Level, ActiveLow, Shared, )");
|
||||
dsdt_line(" {%s}", irq_prs);
|
||||
dsdt_line(" })");
|
||||
dsdt_line(" Name (CB%02X, ResourceTemplate ()", pin + 1);
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" IRQ (Level, ActiveLow, Shared, )");
|
||||
dsdt_line(" {}");
|
||||
dsdt_line(" })");
|
||||
dsdt_line(" CreateWordField (CB%02X, 0x01, CIR%c)",
|
||||
pin + 1, 'A' + pin);
|
||||
dsdt_line(" Method (_CRS, 0, NotSerialized)");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" And (PIR%c, 0x%02X, Local0)", 'A' + pin,
|
||||
PIRQ_DIS | PIRQ_IRQ);
|
||||
dsdt_line(" If (PIRV (Local0))");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin);
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Else");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Store (0x00, CIR%c)", 'A' + pin);
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Return (CB%02X)", pin + 1);
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Method (_DIS, 0, NotSerialized)");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Store (0x80, PIR%c)", 'A' + pin);
|
||||
dsdt_line(" }");
|
||||
dsdt_line(" Method (_SRS, 1, NotSerialized)");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin);
|
||||
dsdt_line(" FindSetRightBit (SIR%c, Local0)", 'A' + pin);
|
||||
dsdt_line(" Store (Decrement (Local0), PIR%c)", 'A' + pin);
|
||||
dsdt_line(" }");
|
||||
dsdt_line("}");
|
||||
}
|
||||
free(irq_prs);
|
||||
}
|
||||
LPC_DSDT(pirq_dsdt);
|
45
pci_irq.h
Normal file
45
pci_irq.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Hudson River Trading LLC
|
||||
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef __PCI_IRQ_H__
|
||||
#define __PCI_IRQ_H__
|
||||
|
||||
struct pci_devinst;
|
||||
|
||||
void pci_irq_assert(struct pci_devinst *pi);
|
||||
void pci_irq_deassert(struct pci_devinst *pi);
|
||||
void pci_irq_init(struct vmctx *ctx);
|
||||
void pci_irq_reserve(int irq);
|
||||
void pci_irq_use(int irq);
|
||||
int pirq_alloc_pin(struct vmctx *ctx);
|
||||
int pirq_irq(int pin);
|
||||
uint8_t pirq_read(int pin);
|
||||
void pirq_write(struct vmctx *ctx, int pin, uint8_t val);
|
||||
|
||||
#endif
|
450
pci_lpc.c
Normal file
450
pci_lpc.c
Normal file
@ -0,0 +1,450 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "bootrom.h"
|
||||
#include "inout.h"
|
||||
#include "pci_emul.h"
|
||||
#include "pci_irq.h"
|
||||
#include "pci_lpc.h"
|
||||
#include "uart_emul.h"
|
||||
|
||||
#define IO_ICU1 0x20
|
||||
#define IO_ICU2 0xA0
|
||||
|
||||
SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt);
|
||||
SET_DECLARE(lpc_sysres_set, struct lpc_sysres);
|
||||
|
||||
#define ELCR_PORT 0x4d0
|
||||
SYSRES_IO(ELCR_PORT, 2);
|
||||
|
||||
#define IO_TIMER1_PORT 0x40
|
||||
|
||||
#define NMISC_PORT 0x61
|
||||
SYSRES_IO(NMISC_PORT, 1);
|
||||
|
||||
static struct pci_devinst *lpc_bridge;
|
||||
|
||||
static const char *romfile;
|
||||
|
||||
#define LPC_UART_NUM 2
|
||||
static struct lpc_uart_softc {
|
||||
struct uart_softc *uart_softc;
|
||||
const char *opts;
|
||||
int iobase;
|
||||
int irq;
|
||||
int enabled;
|
||||
} lpc_uart_softc[LPC_UART_NUM];
|
||||
|
||||
static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
|
||||
|
||||
/*
|
||||
* LPC device configuration is in the following form:
|
||||
* <lpc_device_name>[,<options>]
|
||||
* For e.g. "com1,stdio" or "bootrom,/var/romfile"
|
||||
*/
|
||||
int
|
||||
lpc_device_parse(const char *opts)
|
||||
{
|
||||
int unit, error;
|
||||
char *str, *cpy, *lpcdev;
|
||||
|
||||
error = -1;
|
||||
str = cpy = strdup(opts);
|
||||
lpcdev = strsep(&str, ",");
|
||||
if (lpcdev != NULL) {
|
||||
if (strcasecmp(lpcdev, "bootrom") == 0) {
|
||||
romfile = str;
|
||||
error = 0;
|
||||
goto done;
|
||||
}
|
||||
for (unit = 0; unit < LPC_UART_NUM; unit++) {
|
||||
if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
|
||||
lpc_uart_softc[unit].opts = str;
|
||||
error = 0;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (error)
|
||||
free(cpy);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
const char *
|
||||
lpc_bootrom(void)
|
||||
{
|
||||
|
||||
return (romfile);
|
||||
}
|
||||
|
||||
static void
|
||||
lpc_uart_intr_assert(void *arg)
|
||||
{
|
||||
struct lpc_uart_softc *sc = arg;
|
||||
|
||||
assert(sc->irq >= 0);
|
||||
|
||||
vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq);
|
||||
}
|
||||
|
||||
static void
|
||||
lpc_uart_intr_deassert(void *arg)
|
||||
{
|
||||
/*
|
||||
* The COM devices on the LPC bus generate edge triggered interrupts,
|
||||
* so nothing more to do here.
|
||||
*/
|
||||
}
|
||||
|
||||
static int
|
||||
lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int offset;
|
||||
struct lpc_uart_softc *sc = arg;
|
||||
|
||||
offset = port - sc->iobase;
|
||||
|
||||
switch (bytes) {
|
||||
case 1:
|
||||
if (in)
|
||||
*eax = uart_read(sc->uart_softc, offset);
|
||||
else
|
||||
uart_write(sc->uart_softc, offset, *eax);
|
||||
break;
|
||||
case 2:
|
||||
if (in) {
|
||||
*eax = uart_read(sc->uart_softc, offset);
|
||||
*eax |= uart_read(sc->uart_softc, offset + 1) << 8;
|
||||
} else {
|
||||
uart_write(sc->uart_softc, offset, *eax);
|
||||
uart_write(sc->uart_softc, offset + 1, *eax >> 8);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
lpc_init(struct vmctx *ctx)
|
||||
{
|
||||
struct lpc_uart_softc *sc;
|
||||
struct inout_port iop;
|
||||
const char *name;
|
||||
int unit, error;
|
||||
|
||||
if (romfile != NULL) {
|
||||
error = bootrom_init(ctx, romfile);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* COM1 and COM2 */
|
||||
for (unit = 0; unit < LPC_UART_NUM; unit++) {
|
||||
sc = &lpc_uart_softc[unit];
|
||||
name = lpc_uart_names[unit];
|
||||
|
||||
if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) {
|
||||
fprintf(stderr, "Unable to allocate resources for "
|
||||
"LPC device %s\n", name);
|
||||
return (-1);
|
||||
}
|
||||
pci_irq_reserve(sc->irq);
|
||||
|
||||
sc->uart_softc = uart_init(lpc_uart_intr_assert,
|
||||
lpc_uart_intr_deassert, sc);
|
||||
|
||||
if (uart_set_backend(sc->uart_softc, sc->opts) != 0) {
|
||||
fprintf(stderr, "Unable to initialize backend '%s' "
|
||||
"for LPC device %s\n", sc->opts, name);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
bzero(&iop, sizeof(struct inout_port));
|
||||
iop.name = name;
|
||||
iop.port = sc->iobase;
|
||||
iop.size = UART_IO_BAR_SIZE;
|
||||
iop.flags = IOPORT_F_INOUT;
|
||||
iop.handler = lpc_uart_io_handler;
|
||||
iop.arg = sc;
|
||||
|
||||
error = register_inout(&iop);
|
||||
assert(error == 0);
|
||||
sc->enabled = 1;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_lpc_write_dsdt(struct pci_devinst *pi)
|
||||
{
|
||||
struct lpc_dsdt **ldpp, *ldp;
|
||||
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (ISA)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func);
|
||||
dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)");
|
||||
dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)");
|
||||
dsdt_line(" {");
|
||||
dsdt_line(" Offset (0x60),");
|
||||
dsdt_line(" PIRA, 8,");
|
||||
dsdt_line(" PIRB, 8,");
|
||||
dsdt_line(" PIRC, 8,");
|
||||
dsdt_line(" PIRD, 8,");
|
||||
dsdt_line(" Offset (0x68),");
|
||||
dsdt_line(" PIRE, 8,");
|
||||
dsdt_line(" PIRF, 8,");
|
||||
dsdt_line(" PIRG, 8,");
|
||||
dsdt_line(" PIRH, 8");
|
||||
dsdt_line(" }");
|
||||
dsdt_line("");
|
||||
|
||||
dsdt_indent(1);
|
||||
SET_FOREACH(ldpp, lpc_dsdt_set) {
|
||||
ldp = *ldpp;
|
||||
ldp->handler();
|
||||
}
|
||||
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (PIC)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))");
|
||||
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
dsdt_indent(2);
|
||||
dsdt_fixed_ioport(IO_ICU1, 2);
|
||||
dsdt_fixed_ioport(IO_ICU2, 2);
|
||||
dsdt_fixed_irq(2);
|
||||
dsdt_unindent(2);
|
||||
dsdt_line(" })");
|
||||
dsdt_line("}");
|
||||
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (TIMR)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))");
|
||||
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
dsdt_indent(2);
|
||||
dsdt_fixed_ioport(IO_TIMER1_PORT, 4);
|
||||
dsdt_fixed_irq(0);
|
||||
dsdt_unindent(2);
|
||||
dsdt_line(" })");
|
||||
dsdt_line("}");
|
||||
dsdt_unindent(1);
|
||||
|
||||
dsdt_line("}");
|
||||
}
|
||||
|
||||
static void
|
||||
pci_lpc_sysres_dsdt(void)
|
||||
{
|
||||
struct lpc_sysres **lspp, *lsp;
|
||||
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (SIO)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))");
|
||||
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
|
||||
dsdt_indent(2);
|
||||
SET_FOREACH(lspp, lpc_sysres_set) {
|
||||
lsp = *lspp;
|
||||
switch (lsp->type) {
|
||||
case LPC_SYSRES_IO:
|
||||
dsdt_fixed_ioport(lsp->base, lsp->length);
|
||||
break;
|
||||
case LPC_SYSRES_MEM:
|
||||
dsdt_fixed_mem32(lsp->base, lsp->length);
|
||||
break;
|
||||
}
|
||||
}
|
||||
dsdt_unindent(2);
|
||||
|
||||
dsdt_line(" })");
|
||||
dsdt_line("}");
|
||||
}
|
||||
LPC_DSDT(pci_lpc_sysres_dsdt);
|
||||
|
||||
static void
|
||||
pci_lpc_uart_dsdt(void)
|
||||
{
|
||||
struct lpc_uart_softc *sc;
|
||||
int unit;
|
||||
|
||||
for (unit = 0; unit < LPC_UART_NUM; unit++) {
|
||||
sc = &lpc_uart_softc[unit];
|
||||
if (!sc->enabled)
|
||||
continue;
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (%s)", lpc_uart_names[unit]);
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))");
|
||||
dsdt_line(" Name (_UID, %d)", unit + 1);
|
||||
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
dsdt_indent(2);
|
||||
dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE);
|
||||
dsdt_fixed_irq(sc->irq);
|
||||
dsdt_unindent(2);
|
||||
dsdt_line(" })");
|
||||
dsdt_line("}");
|
||||
}
|
||||
}
|
||||
LPC_DSDT(pci_lpc_uart_dsdt);
|
||||
|
||||
static int
|
||||
pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int coff, int bytes, uint32_t val)
|
||||
{
|
||||
int pirq_pin;
|
||||
|
||||
if (bytes == 1) {
|
||||
pirq_pin = 0;
|
||||
if (coff >= 0x60 && coff <= 0x63)
|
||||
pirq_pin = coff - 0x60 + 1;
|
||||
if (coff >= 0x68 && coff <= 0x6b)
|
||||
pirq_pin = coff - 0x68 + 5;
|
||||
if (pirq_pin != 0) {
|
||||
pirq_write(ctx, pirq_pin, val);
|
||||
pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin));
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define LPC_DEV 0x7000
|
||||
#define LPC_VENDOR 0x8086
|
||||
|
||||
static int
|
||||
pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
|
||||
/*
|
||||
* Do not allow more than one LPC bridge to be configured.
|
||||
*/
|
||||
if (lpc_bridge != NULL) {
|
||||
fprintf(stderr, "Only one LPC bridge is allowed.\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enforce that the LPC can only be configured on bus 0. This
|
||||
* simplifies the ACPI DSDT because it can provide a decode for
|
||||
* all legacy i/o ports behind bus 0.
|
||||
*/
|
||||
if (pi->pi_bus != 0) {
|
||||
fprintf(stderr, "LPC bridge can be present only on bus 0.\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (lpc_init(ctx) != 0)
|
||||
return (-1);
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
|
||||
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
|
||||
|
||||
lpc_bridge = pi;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
char *
|
||||
lpc_pirq_name(int pin)
|
||||
{
|
||||
char *name;
|
||||
|
||||
if (lpc_bridge == NULL)
|
||||
return (NULL);
|
||||
asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1);
|
||||
return (name);
|
||||
}
|
||||
|
||||
void
|
||||
lpc_pirq_routed(void)
|
||||
{
|
||||
int pin;
|
||||
|
||||
if (lpc_bridge == NULL)
|
||||
return;
|
||||
|
||||
for (pin = 0; pin < 4; pin++)
|
||||
pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1));
|
||||
for (pin = 0; pin < 4; pin++)
|
||||
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_lpc = {
|
||||
.pe_emu = "lpc",
|
||||
.pe_init = pci_lpc_init,
|
||||
.pe_write_dsdt = pci_lpc_write_dsdt,
|
||||
.pe_cfgwrite = pci_lpc_cfgwrite,
|
||||
.pe_barwrite = pci_lpc_write,
|
||||
.pe_barread = pci_lpc_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_lpc);
|
73
pci_lpc.h
Normal file
73
pci_lpc.h
Normal file
@ -0,0 +1,73 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _LPC_H_
|
||||
#define _LPC_H_
|
||||
|
||||
#include <sys/linker_set.h>
|
||||
|
||||
typedef void (*lpc_write_dsdt_t)(void);
|
||||
|
||||
struct lpc_dsdt {
|
||||
lpc_write_dsdt_t handler;
|
||||
};
|
||||
|
||||
#define LPC_DSDT(handler) \
|
||||
static struct lpc_dsdt __CONCAT(__lpc_dsdt, __LINE__) = { \
|
||||
(handler), \
|
||||
}; \
|
||||
DATA_SET(lpc_dsdt_set, __CONCAT(__lpc_dsdt, __LINE__))
|
||||
|
||||
enum lpc_sysres_type {
|
||||
LPC_SYSRES_IO,
|
||||
LPC_SYSRES_MEM
|
||||
};
|
||||
|
||||
struct lpc_sysres {
|
||||
enum lpc_sysres_type type;
|
||||
uint32_t base;
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
#define LPC_SYSRES(type, base, length) \
|
||||
static struct lpc_sysres __CONCAT(__lpc_sysres, __LINE__) = { \
|
||||
(type), \
|
||||
(base), \
|
||||
(length) \
|
||||
}; \
|
||||
DATA_SET(lpc_sysres_set, __CONCAT(__lpc_sysres, __LINE__))
|
||||
|
||||
#define SYSRES_IO(base, length) LPC_SYSRES(LPC_SYSRES_IO, base, length)
|
||||
#define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length)
|
||||
|
||||
int lpc_device_parse(const char *opt);
|
||||
char *lpc_pirq_name(int pin);
|
||||
void lpc_pirq_routed(void);
|
||||
const char *lpc_bootrom(void);
|
||||
|
||||
#endif
|
897
pci_passthru.c
Normal file
897
pci_passthru.c
Normal file
@ -0,0 +1,897 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <dev/io/iodev.h>
|
||||
#include <dev/pci/pcireg.h>
|
||||
|
||||
#include <machine/iodev.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
#include "pci_emul.h"
|
||||
#include "mem.h"
|
||||
|
||||
#ifndef _PATH_DEVPCI
|
||||
#define _PATH_DEVPCI "/dev/pci"
|
||||
#endif
|
||||
|
||||
#ifndef _PATH_DEVIO
|
||||
#define _PATH_DEVIO "/dev/io"
|
||||
#endif
|
||||
|
||||
#ifndef _PATH_MEM
|
||||
#define _PATH_MEM "/dev/mem"
|
||||
#endif
|
||||
|
||||
#define LEGACY_SUPPORT 1
|
||||
|
||||
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
|
||||
#define MSIX_CAPLEN 12
|
||||
|
||||
static int pcifd = -1;
|
||||
static int iofd = -1;
|
||||
static int memfd = -1;
|
||||
|
||||
struct passthru_softc {
|
||||
struct pci_devinst *psc_pi;
|
||||
struct pcibar psc_bar[PCI_BARMAX + 1];
|
||||
struct {
|
||||
int capoff;
|
||||
int msgctrl;
|
||||
int emulated;
|
||||
} psc_msi;
|
||||
struct {
|
||||
int capoff;
|
||||
} psc_msix;
|
||||
struct pcisel psc_sel;
|
||||
};
|
||||
|
||||
static int
|
||||
msi_caplen(int msgctrl)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = 10; /* minimum length of msi capability */
|
||||
|
||||
if (msgctrl & PCIM_MSICTRL_64BIT)
|
||||
len += 4;
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Ignore the 'mask' and 'pending' bits in the MSI capability.
|
||||
* We'll let the guest manipulate them directly.
|
||||
*/
|
||||
if (msgctrl & PCIM_MSICTRL_VECTOR)
|
||||
len += 10;
|
||||
#endif
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
read_config(const struct pcisel *sel, long reg, int width)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
|
||||
if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
|
||||
return (0); /* XXX */
|
||||
else
|
||||
return (pi.pi_data);
|
||||
}
|
||||
|
||||
static void
|
||||
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
|
||||
{
|
||||
struct pci_io pi;
|
||||
|
||||
bzero(&pi, sizeof(pi));
|
||||
pi.pi_sel = *sel;
|
||||
pi.pi_reg = reg;
|
||||
pi.pi_width = width;
|
||||
pi.pi_data = data;
|
||||
|
||||
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
static int
|
||||
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
|
||||
{
|
||||
int capoff, i;
|
||||
struct msicap msicap;
|
||||
u_char *capdata;
|
||||
|
||||
pci_populate_msicap(&msicap, msgnum, nextptr);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* Copy the msi capability structure in the last 16 bytes of the
|
||||
* config space. This is wrong because it could shadow something
|
||||
* useful to the device.
|
||||
*/
|
||||
capoff = 256 - roundup(sizeof(msicap), 4);
|
||||
capdata = (u_char *)&msicap;
|
||||
for (i = 0; i < sizeof(msicap); i++)
|
||||
pci_set_cfgdata8(pi, capoff + i, capdata[i]);
|
||||
|
||||
return (capoff);
|
||||
}
|
||||
#endif /* LEGACY_SUPPORT */
|
||||
|
||||
static int
|
||||
cfginitmsi(struct passthru_softc *sc)
|
||||
{
|
||||
int i, ptr, capptr, cap, sts, caplen, table_size;
|
||||
uint32_t u32;
|
||||
struct pcisel sel;
|
||||
struct pci_devinst *pi;
|
||||
struct msixcap msixcap;
|
||||
uint32_t *msixcap_ptr;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
sel = sc->psc_sel;
|
||||
|
||||
/*
|
||||
* Parse the capabilities and cache the location of the MSI
|
||||
* and MSI-X capabilities.
|
||||
*/
|
||||
sts = read_config(&sel, PCIR_STATUS, 2);
|
||||
if (sts & PCIM_STATUS_CAPPRESENT) {
|
||||
ptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
while (ptr != 0 && ptr != 0xff) {
|
||||
cap = read_config(&sel, ptr + PCICAP_ID, 1);
|
||||
if (cap == PCIY_MSI) {
|
||||
/*
|
||||
* Copy the MSI capability into the config
|
||||
* space of the emulated pci device
|
||||
*/
|
||||
sc->psc_msi.capoff = ptr;
|
||||
sc->psc_msi.msgctrl = read_config(&sel,
|
||||
ptr + 2, 2);
|
||||
sc->psc_msi.emulated = 0;
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
capptr = ptr;
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, capptr, 4);
|
||||
pci_set_cfgdata32(pi, capptr, u32);
|
||||
caplen -= 4;
|
||||
capptr += 4;
|
||||
}
|
||||
} else if (cap == PCIY_MSIX) {
|
||||
/*
|
||||
* Copy the MSI-X capability
|
||||
*/
|
||||
sc->psc_msix.capoff = ptr;
|
||||
caplen = 12;
|
||||
msixcap_ptr = (uint32_t*) &msixcap;
|
||||
capptr = ptr;
|
||||
while (caplen > 0) {
|
||||
u32 = read_config(&sel, capptr, 4);
|
||||
*msixcap_ptr = u32;
|
||||
pci_set_cfgdata32(pi, capptr, u32);
|
||||
caplen -= 4;
|
||||
capptr += 4;
|
||||
msixcap_ptr++;
|
||||
}
|
||||
}
|
||||
ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (sc->psc_msix.capoff != 0) {
|
||||
pi->pi_msix.pba_bar =
|
||||
msixcap.pba_info & PCIM_MSIX_BIR_MASK;
|
||||
pi->pi_msix.pba_offset =
|
||||
msixcap.pba_info & ~PCIM_MSIX_BIR_MASK;
|
||||
pi->pi_msix.table_bar =
|
||||
msixcap.table_info & PCIM_MSIX_BIR_MASK;
|
||||
pi->pi_msix.table_offset =
|
||||
msixcap.table_info & ~PCIM_MSIX_BIR_MASK;
|
||||
pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
|
||||
pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count);
|
||||
|
||||
/* Allocate the emulated MSI-X table array */
|
||||
table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
|
||||
pi->pi_msix.table = calloc(1, table_size);
|
||||
|
||||
/* Mask all table entries */
|
||||
for (i = 0; i < pi->pi_msix.table_count; i++) {
|
||||
pi->pi_msix.table[i].vector_control |=
|
||||
PCIM_MSIX_VCTRL_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If the passthrough device does not support MSI then craft a
|
||||
* MSI capability for it. We link the new MSI capability at the
|
||||
* head of the list of capabilities.
|
||||
*/
|
||||
if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
|
||||
int origptr, msiptr;
|
||||
origptr = read_config(&sel, PCIR_CAP_PTR, 1);
|
||||
msiptr = passthru_add_msicap(pi, 1, origptr);
|
||||
sc->psc_msi.capoff = msiptr;
|
||||
sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
|
||||
sc->psc_msi.emulated = 1;
|
||||
pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Make sure one of the capabilities is present */
|
||||
if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct msix_table_entry *entry;
|
||||
uint8_t *src8;
|
||||
uint16_t *src16;
|
||||
uint32_t *src32;
|
||||
uint64_t *src64;
|
||||
uint64_t data;
|
||||
size_t entry_offset;
|
||||
int index;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
if (offset >= pi->pi_msix.pba_offset &&
|
||||
offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
|
||||
switch(size) {
|
||||
case 1:
|
||||
src8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
data = *src8;
|
||||
break;
|
||||
case 2:
|
||||
src16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
data = *src16;
|
||||
break;
|
||||
case 4:
|
||||
src32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
data = *src32;
|
||||
break;
|
||||
case 8:
|
||||
src64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
data = *src64;
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
return (data);
|
||||
}
|
||||
|
||||
if (offset < pi->pi_msix.table_offset)
|
||||
return (-1);
|
||||
|
||||
offset -= pi->pi_msix.table_offset;
|
||||
index = offset / MSIX_TABLE_ENTRY_SIZE;
|
||||
if (index >= pi->pi_msix.table_count)
|
||||
return (-1);
|
||||
|
||||
entry = &pi->pi_msix.table[index];
|
||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
||||
|
||||
switch(size) {
|
||||
case 1:
|
||||
src8 = (uint8_t *)((void *)entry + entry_offset);
|
||||
data = *src8;
|
||||
break;
|
||||
case 2:
|
||||
src16 = (uint16_t *)((void *)entry + entry_offset);
|
||||
data = *src16;
|
||||
break;
|
||||
case 4:
|
||||
src32 = (uint32_t *)((void *)entry + entry_offset);
|
||||
data = *src32;
|
||||
break;
|
||||
case 8:
|
||||
src64 = (uint64_t *)((void *)entry + entry_offset);
|
||||
data = *src64;
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (data);
|
||||
}
|
||||
|
||||
static void
|
||||
msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
|
||||
uint64_t offset, int size, uint64_t data)
|
||||
{
|
||||
struct pci_devinst *pi;
|
||||
struct msix_table_entry *entry;
|
||||
uint8_t *dest8;
|
||||
uint16_t *dest16;
|
||||
uint32_t *dest32;
|
||||
uint64_t *dest64;
|
||||
size_t entry_offset;
|
||||
uint32_t vector_control;
|
||||
int error, index;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
if (offset >= pi->pi_msix.pba_offset &&
|
||||
offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
|
||||
switch(size) {
|
||||
case 1:
|
||||
dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
*dest8 = data;
|
||||
break;
|
||||
case 2:
|
||||
dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
*dest16 = data;
|
||||
break;
|
||||
case 4:
|
||||
dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
*dest32 = data;
|
||||
break;
|
||||
case 8:
|
||||
dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
|
||||
pi->pi_msix.pba_page_offset);
|
||||
*dest64 = data;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (offset < pi->pi_msix.table_offset)
|
||||
return;
|
||||
|
||||
offset -= pi->pi_msix.table_offset;
|
||||
index = offset / MSIX_TABLE_ENTRY_SIZE;
|
||||
if (index >= pi->pi_msix.table_count)
|
||||
return;
|
||||
|
||||
entry = &pi->pi_msix.table[index];
|
||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
|
||||
|
||||
/* Only 4 byte naturally-aligned writes are supported */
|
||||
assert(size == 4);
|
||||
assert(entry_offset % 4 == 0);
|
||||
|
||||
vector_control = entry->vector_control;
|
||||
dest32 = (uint32_t *)((void *)entry + entry_offset);
|
||||
*dest32 = data;
|
||||
/* If MSI-X hasn't been enabled, do nothing */
|
||||
if (pi->pi_msix.enabled) {
|
||||
/* If the entry is masked, don't set it up */
|
||||
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
|
||||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
|
||||
error = vm_setup_pptdev_msix(ctx, vcpu,
|
||||
sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
|
||||
sc->psc_sel.pc_func, index, entry->addr,
|
||||
entry->msg_data, entry->vector_control);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
|
||||
{
|
||||
int b, s, f;
|
||||
int error, idx;
|
||||
size_t len, remaining;
|
||||
uint32_t table_size, table_offset;
|
||||
uint32_t pba_size, pba_offset;
|
||||
vm_paddr_t start;
|
||||
struct pci_devinst *pi = sc->psc_pi;
|
||||
|
||||
assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
|
||||
|
||||
b = sc->psc_sel.pc_bus;
|
||||
s = sc->psc_sel.pc_dev;
|
||||
f = sc->psc_sel.pc_func;
|
||||
|
||||
/*
|
||||
* If the MSI-X table BAR maps memory intended for
|
||||
* other uses, it is at least assured that the table
|
||||
* either resides in its own page within the region,
|
||||
* or it resides in a page shared with only the PBA.
|
||||
*/
|
||||
table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
|
||||
|
||||
table_size = pi->pi_msix.table_offset - table_offset;
|
||||
table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
|
||||
table_size = roundup2(table_size, 4096);
|
||||
|
||||
idx = pi->pi_msix.table_bar;
|
||||
start = pi->pi_bar[idx].addr;
|
||||
remaining = pi->pi_bar[idx].size;
|
||||
|
||||
if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) {
|
||||
pba_offset = pi->pi_msix.pba_offset;
|
||||
pba_size = pi->pi_msix.pba_size;
|
||||
if (pba_offset >= table_offset + table_size ||
|
||||
table_offset >= pba_offset + pba_size) {
|
||||
/*
|
||||
* If the PBA does not share a page with the MSI-x
|
||||
* tables, no PBA emulation is required.
|
||||
*/
|
||||
pi->pi_msix.pba_page = NULL;
|
||||
pi->pi_msix.pba_page_offset = 0;
|
||||
} else {
|
||||
/*
|
||||
* The PBA overlaps with either the first or last
|
||||
* page of the MSI-X table region. Map the
|
||||
* appropriate page.
|
||||
*/
|
||||
if (pba_offset <= table_offset)
|
||||
pi->pi_msix.pba_page_offset = table_offset;
|
||||
else
|
||||
pi->pi_msix.pba_page_offset = table_offset +
|
||||
table_size - 4096;
|
||||
pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ |
|
||||
PROT_WRITE, MAP_SHARED, memfd, start +
|
||||
pi->pi_msix.pba_page_offset);
|
||||
if (pi->pi_msix.pba_page == MAP_FAILED) {
|
||||
warn(
|
||||
"Failed to map PBA page for MSI-X on %d/%d/%d",
|
||||
b, s, f);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Map everything before the MSI-X table */
|
||||
if (table_offset > 0) {
|
||||
len = table_offset;
|
||||
error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
base += len;
|
||||
start += len;
|
||||
remaining -= len;
|
||||
}
|
||||
|
||||
/* Skip the MSI-X table */
|
||||
base += table_size;
|
||||
start += table_size;
|
||||
remaining -= table_size;
|
||||
|
||||
/* Map everything beyond the end of the MSI-X table */
|
||||
if (remaining > 0) {
|
||||
len = remaining;
|
||||
error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
|
||||
{
|
||||
int i, error;
|
||||
struct pci_devinst *pi;
|
||||
struct pci_bar_io bar;
|
||||
enum pcibar_type bartype;
|
||||
uint64_t base, size;
|
||||
|
||||
pi = sc->psc_pi;
|
||||
|
||||
/*
|
||||
* Initialize BAR registers
|
||||
*/
|
||||
for (i = 0; i <= PCI_BARMAX; i++) {
|
||||
bzero(&bar, sizeof(bar));
|
||||
bar.pbi_sel = sc->psc_sel;
|
||||
bar.pbi_reg = PCIR_BAR(i);
|
||||
|
||||
if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
|
||||
continue;
|
||||
|
||||
if (PCI_BAR_IO(bar.pbi_base)) {
|
||||
bartype = PCIBAR_IO;
|
||||
base = bar.pbi_base & PCIM_BAR_IO_BASE;
|
||||
} else {
|
||||
switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
|
||||
case PCIM_BAR_MEM_64:
|
||||
bartype = PCIBAR_MEM64;
|
||||
break;
|
||||
default:
|
||||
bartype = PCIBAR_MEM32;
|
||||
break;
|
||||
}
|
||||
base = bar.pbi_base & PCIM_BAR_MEM_BASE;
|
||||
}
|
||||
size = bar.pbi_length;
|
||||
|
||||
if (bartype != PCIBAR_IO) {
|
||||
if (((base | size) & PAGE_MASK) != 0) {
|
||||
warnx("passthru device %d/%d/%d BAR %d: "
|
||||
"base %#lx or size %#lx not page aligned\n",
|
||||
sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
|
||||
sc->psc_sel.pc_func, i, base, size);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Cache information about the "real" BAR */
|
||||
sc->psc_bar[i].type = bartype;
|
||||
sc->psc_bar[i].size = size;
|
||||
sc->psc_bar[i].addr = base;
|
||||
|
||||
/* Allocate the BAR in the guest I/O or MMIO space */
|
||||
error = pci_emul_alloc_pbar(pi, i, base, bartype, size);
|
||||
if (error)
|
||||
return (-1);
|
||||
|
||||
/* The MSI-X table needs special handling */
|
||||
if (i == pci_msix_table_bar(pi)) {
|
||||
error = init_msix_table(ctx, sc, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
} else if (bartype != PCIBAR_IO) {
|
||||
/* Map the physical BAR in the guest MMIO space */
|
||||
error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
|
||||
if (error)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* 64-bit BAR takes up two slots so skip the next one.
|
||||
*/
|
||||
if (bartype == PCIBAR_MEM64) {
|
||||
i++;
|
||||
assert(i <= PCI_BARMAX);
|
||||
sc->psc_bar[i].type = PCIBAR_MEMHI64;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
error = 1;
|
||||
sc = pi->pi_arg;
|
||||
|
||||
bzero(&sc->psc_sel, sizeof(struct pcisel));
|
||||
sc->psc_sel.pc_bus = bus;
|
||||
sc->psc_sel.pc_dev = slot;
|
||||
sc->psc_sel.pc_func = func;
|
||||
|
||||
if (cfginitmsi(sc) != 0) {
|
||||
warnx("failed to initialize MSI for PCI %d/%d/%d",
|
||||
bus, slot, func);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (cfginitbar(ctx, sc) != 0) {
|
||||
warnx("failed to initialize BARs for PCI %d/%d/%d",
|
||||
bus, slot, func);
|
||||
goto done;
|
||||
}
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
int bus, slot, func, error, memflags;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = NULL;
|
||||
error = 1;
|
||||
|
||||
memflags = vm_get_memflags(ctx);
|
||||
if (!(memflags & VM_MEM_F_WIRED)) {
|
||||
warnx("passthru requires guest memory to be wired");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (pcifd < 0) {
|
||||
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
|
||||
if (pcifd < 0) {
|
||||
warn("failed to open %s", _PATH_DEVPCI);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (iofd < 0) {
|
||||
iofd = open(_PATH_DEVIO, O_RDWR, 0);
|
||||
if (iofd < 0) {
|
||||
warn("failed to open %s", _PATH_DEVIO);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (memfd < 0) {
|
||||
memfd = open(_PATH_MEM, O_RDWR, 0);
|
||||
if (memfd < 0) {
|
||||
warn("failed to open %s", _PATH_MEM);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts == NULL ||
|
||||
sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) {
|
||||
warnx("invalid passthru options");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (vm_assign_pptdev(ctx, bus, slot, func) != 0) {
|
||||
warnx("PCI device at %d/%d/%d is not using the ppt(4) driver",
|
||||
bus, slot, func);
|
||||
goto done;
|
||||
}
|
||||
|
||||
sc = calloc(1, sizeof(struct passthru_softc));
|
||||
|
||||
pi->pi_arg = sc;
|
||||
sc->psc_pi = pi;
|
||||
|
||||
/* initialize config space */
|
||||
if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
|
||||
goto done;
|
||||
|
||||
error = 0; /* success */
|
||||
done:
|
||||
if (error) {
|
||||
free(sc);
|
||||
vm_unassign_pptdev(ctx, bus, slot, func);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
bar_access(int coff)
|
||||
{
|
||||
if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msicap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
int caplen;
|
||||
|
||||
if (sc->psc_msi.capoff == 0)
|
||||
return (0);
|
||||
|
||||
caplen = msi_caplen(sc->psc_msi.msgctrl);
|
||||
|
||||
if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
|
||||
return (1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
msixcap_access(struct passthru_softc *sc, int coff)
|
||||
{
|
||||
if (sc->psc_msix.capoff == 0)
|
||||
return (0);
|
||||
|
||||
return (coff >= sc->psc_msix.capoff &&
|
||||
coff < sc->psc_msix.capoff + MSIX_CAPLEN);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int coff, int bytes, uint32_t *rv)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs and MSI capability is emulated.
|
||||
*/
|
||||
if (bar_access(coff) || msicap_access(sc, coff))
|
||||
return (-1);
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* Emulate PCIR_CAP_PTR if this device does not support MSI capability
|
||||
* natively.
|
||||
*/
|
||||
if (sc->psc_msi.emulated) {
|
||||
if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
|
||||
return (-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Everything else just read from the device's config space */
|
||||
*rv = read_config(&sc->psc_sel, coff, bytes);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int coff, int bytes, uint32_t val)
|
||||
{
|
||||
int error, msix_table_entries, i;
|
||||
struct passthru_softc *sc;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
/*
|
||||
* PCI BARs are emulated
|
||||
*/
|
||||
if (bar_access(coff))
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* MSI capability is emulated
|
||||
*/
|
||||
if (msicap_access(sc, coff)) {
|
||||
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
|
||||
|
||||
error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
|
||||
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
|
||||
pi->pi_msi.addr, pi->pi_msi.msg_data,
|
||||
pi->pi_msi.maxmsgnum);
|
||||
if (error != 0)
|
||||
err(1, "vm_setup_pptdev_msi");
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (msixcap_access(sc, coff)) {
|
||||
msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
|
||||
if (pi->pi_msix.enabled) {
|
||||
msix_table_entries = pi->pi_msix.table_count;
|
||||
for (i = 0; i < msix_table_entries; i++) {
|
||||
error = vm_setup_pptdev_msix(ctx, vcpu,
|
||||
sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
|
||||
sc->psc_sel.pc_func, i,
|
||||
pi->pi_msix.table[i].addr,
|
||||
pi->pi_msix.table[i].msg_data,
|
||||
pi->pi_msix.table[i].vector_control);
|
||||
|
||||
if (error)
|
||||
err(1, "vm_setup_pptdev_msix");
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef LEGACY_SUPPORT
|
||||
/*
|
||||
* If this device does not support MSI natively then we cannot let
|
||||
* the guest disable legacy interrupts from the device. It is the
|
||||
* legacy interrupt that is triggering the virtual MSI to the guest.
|
||||
*/
|
||||
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
|
||||
if (coff == PCIR_COMMAND && bytes == 2)
|
||||
val &= ~PCIM_CMD_INTxDIS;
|
||||
}
|
||||
#endif
|
||||
|
||||
write_config(&sc->psc_sel, coff, bytes, val);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
if (baridx == pci_msix_table_bar(pi)) {
|
||||
msix_table_write(ctx, vcpu, sc, offset, size, value);
|
||||
} else {
|
||||
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_WRITE;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = value;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
|
||||
uint64_t offset, int size)
|
||||
{
|
||||
struct passthru_softc *sc;
|
||||
struct iodev_pio_req pio;
|
||||
uint64_t val;
|
||||
|
||||
sc = pi->pi_arg;
|
||||
|
||||
if (baridx == pci_msix_table_bar(pi)) {
|
||||
val = msix_table_read(sc, offset, size);
|
||||
} else {
|
||||
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
|
||||
bzero(&pio, sizeof(struct iodev_pio_req));
|
||||
pio.access = IODEV_PIO_READ;
|
||||
pio.port = sc->psc_bar[baridx].addr + offset;
|
||||
pio.width = size;
|
||||
pio.val = 0;
|
||||
|
||||
(void)ioctl(iofd, IODEV_PIO, &pio);
|
||||
|
||||
val = pio.val;
|
||||
}
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
struct pci_devemu passthru = {
|
||||
.pe_emu = "passthru",
|
||||
.pe_init = passthru_init,
|
||||
.pe_cfgwrite = passthru_cfgwrite,
|
||||
.pe_cfgread = passthru_cfgread,
|
||||
.pe_barwrite = passthru_write,
|
||||
.pe_barread = passthru_read,
|
||||
};
|
||||
PCI_EMUL_SET(passthru);
|
119
pci_uart.c
Normal file
119
pci_uart.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "uart_emul.h"
|
||||
|
||||
/*
|
||||
* Pick a PCI vid/did of a chip with a single uart at
|
||||
* BAR0, that most versions of FreeBSD can understand:
|
||||
* Siig CyberSerial 1-port.
|
||||
*/
|
||||
#define COM_VENDOR 0x131f
|
||||
#define COM_DEV 0x2000
|
||||
|
||||
static void
|
||||
pci_uart_intr_assert(void *arg)
|
||||
{
|
||||
struct pci_devinst *pi = arg;
|
||||
|
||||
pci_lintr_assert(pi);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_intr_deassert(void *arg)
|
||||
{
|
||||
struct pci_devinst *pi = arg;
|
||||
|
||||
pci_lintr_deassert(pi);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
|
||||
assert(baridx == 0);
|
||||
assert(size == 1);
|
||||
|
||||
uart_write(pi->pi_arg, offset, value);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
uint8_t val;
|
||||
|
||||
assert(baridx == 0);
|
||||
assert(size == 1);
|
||||
|
||||
val = uart_read(pi->pi_arg, offset);
|
||||
return (val);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct uart_softc *sc;
|
||||
|
||||
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, UART_IO_BAR_SIZE);
|
||||
pci_lintr_request(pi);
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM);
|
||||
|
||||
sc = uart_init(pci_uart_intr_assert, pci_uart_intr_deassert, pi);
|
||||
pi->pi_arg = sc;
|
||||
|
||||
if (uart_set_backend(sc, opts) != 0) {
|
||||
fprintf(stderr, "Unable to initialize backend '%s' for "
|
||||
"pci uart at %d:%d\n", opts, pi->pi_slot, pi->pi_func);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_com = {
|
||||
.pe_emu = "uart",
|
||||
.pe_init = pci_uart_init,
|
||||
.pe_barwrite = pci_uart_write,
|
||||
.pe_barread = pci_uart_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_com);
|
410
pci_virtio_block.c
Normal file
410
pci_virtio_block.c
Normal file
@ -0,0 +1,410 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/disk.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
#include <md5.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
#include "block_if.h"
|
||||
|
||||
#define VTBLK_RINGSZ 64
|
||||
|
||||
#define VTBLK_S_OK 0
|
||||
#define VTBLK_S_IOERR 1
|
||||
#define VTBLK_S_UNSUPP 2
|
||||
|
||||
#define VTBLK_BLK_ID_BYTES 20
|
||||
|
||||
/* Capability bits */
|
||||
#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */
|
||||
#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */
|
||||
#define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */
|
||||
#define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */
|
||||
|
||||
/*
|
||||
* Host capabilities
|
||||
*/
|
||||
#define VTBLK_S_HOSTCAPS \
|
||||
( VTBLK_F_SEG_MAX | \
|
||||
VTBLK_F_BLK_SIZE | \
|
||||
VTBLK_F_FLUSH | \
|
||||
VTBLK_F_TOPOLOGY | \
|
||||
VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
|
||||
|
||||
/*
|
||||
* Config space "registers"
|
||||
*/
|
||||
struct vtblk_config {
|
||||
uint64_t vbc_capacity;
|
||||
uint32_t vbc_size_max;
|
||||
uint32_t vbc_seg_max;
|
||||
struct {
|
||||
uint16_t cylinders;
|
||||
uint8_t heads;
|
||||
uint8_t sectors;
|
||||
} vbc_geometry;
|
||||
uint32_t vbc_blk_size;
|
||||
struct {
|
||||
uint8_t physical_block_exp;
|
||||
uint8_t alignment_offset;
|
||||
uint16_t min_io_size;
|
||||
uint32_t opt_io_size;
|
||||
} vbc_topology;
|
||||
uint8_t vbc_writeback;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Fixed-size block header
|
||||
*/
|
||||
struct virtio_blk_hdr {
|
||||
#define VBH_OP_READ 0
|
||||
#define VBH_OP_WRITE 1
|
||||
#define VBH_OP_FLUSH 4
|
||||
#define VBH_OP_FLUSH_OUT 5
|
||||
#define VBH_OP_IDENT 8
|
||||
#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
|
||||
uint32_t vbh_type;
|
||||
uint32_t vbh_ioprio;
|
||||
uint64_t vbh_sector;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtblk_debug;
|
||||
#define DPRINTF(params) if (pci_vtblk_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
struct pci_vtblk_ioreq {
|
||||
struct blockif_req io_req;
|
||||
struct pci_vtblk_softc *io_sc;
|
||||
uint8_t *io_status;
|
||||
uint16_t io_idx;
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtblk_softc {
|
||||
struct virtio_softc vbsc_vs;
|
||||
pthread_mutex_t vsc_mtx;
|
||||
struct vqueue_info vbsc_vq;
|
||||
struct vtblk_config vbsc_cfg;
|
||||
struct blockif_ctxt *bc;
|
||||
char vbsc_ident[VTBLK_BLK_ID_BYTES];
|
||||
struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
|
||||
};
|
||||
|
||||
static void pci_vtblk_reset(void *);
|
||||
static void pci_vtblk_notify(void *, struct vqueue_info *);
|
||||
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
|
||||
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
|
||||
|
||||
static struct virtio_consts vtblk_vi_consts = {
|
||||
"vtblk", /* our name */
|
||||
1, /* we support 1 virtqueue */
|
||||
sizeof(struct vtblk_config), /* config reg size */
|
||||
pci_vtblk_reset, /* reset */
|
||||
pci_vtblk_notify, /* device-wide qnotify */
|
||||
pci_vtblk_cfgread, /* read PCI config */
|
||||
pci_vtblk_cfgwrite, /* write PCI config */
|
||||
NULL, /* apply negotiated features */
|
||||
VTBLK_S_HOSTCAPS, /* our capabilities */
|
||||
};
|
||||
|
||||
static void
|
||||
pci_vtblk_reset(void *vsc)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = vsc;
|
||||
|
||||
DPRINTF(("vtblk: device reset requested !\n"));
|
||||
vi_reset_dev(&sc->vbsc_vs);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_done(struct blockif_req *br, int err)
|
||||
{
|
||||
struct pci_vtblk_ioreq *io = br->br_param;
|
||||
struct pci_vtblk_softc *sc = io->io_sc;
|
||||
|
||||
/* convert errno into a virtio block error return */
|
||||
if (err == EOPNOTSUPP || err == ENOSYS)
|
||||
*io->io_status = VTBLK_S_UNSUPP;
|
||||
else if (err != 0)
|
||||
*io->io_status = VTBLK_S_IOERR;
|
||||
else
|
||||
*io->io_status = VTBLK_S_OK;
|
||||
|
||||
/*
|
||||
* Return the descriptor back to the host.
|
||||
* We wrote 1 byte (our status) to host.
|
||||
*/
|
||||
pthread_mutex_lock(&sc->vsc_mtx);
|
||||
vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
|
||||
vq_endchains(&sc->vbsc_vq, 0);
|
||||
pthread_mutex_unlock(&sc->vsc_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
|
||||
{
|
||||
struct virtio_blk_hdr *vbh;
|
||||
struct pci_vtblk_ioreq *io;
|
||||
int i, n;
|
||||
int err;
|
||||
ssize_t iolen;
|
||||
int writeop, type;
|
||||
struct iovec iov[BLOCKIF_IOV_MAX + 2];
|
||||
uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
|
||||
|
||||
n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
|
||||
|
||||
/*
|
||||
* The first descriptor will be the read-only fixed header,
|
||||
* and the last is for status (hence +2 above and below).
|
||||
* The remaining iov's are the actual data I/O vectors.
|
||||
*
|
||||
* XXX - note - this fails on crash dump, which does a
|
||||
* VIRTIO_BLK_T_FLUSH with a zero transfer length
|
||||
*/
|
||||
assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
|
||||
|
||||
io = &sc->vbsc_ios[idx];
|
||||
assert((flags[0] & VRING_DESC_F_WRITE) == 0);
|
||||
assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
|
||||
vbh = iov[0].iov_base;
|
||||
memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
|
||||
io->io_req.br_iovcnt = n - 2;
|
||||
io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
|
||||
io->io_status = iov[--n].iov_base;
|
||||
assert(iov[n].iov_len == 1);
|
||||
assert(flags[n] & VRING_DESC_F_WRITE);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* The guest should not be setting the BARRIER flag because
|
||||
* we don't advertise the capability.
|
||||
*/
|
||||
type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
|
||||
writeop = (type == VBH_OP_WRITE);
|
||||
|
||||
iolen = 0;
|
||||
for (i = 1; i < n; i++) {
|
||||
/*
|
||||
* - write op implies read-only descriptor,
|
||||
* - read/ident op implies write-only descriptor,
|
||||
* therefore test the inverse of the descriptor bit
|
||||
* to the op.
|
||||
*/
|
||||
assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
|
||||
iolen += iov[i].iov_len;
|
||||
}
|
||||
io->io_req.br_resid = iolen;
|
||||
|
||||
DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
|
||||
writeop ? "write" : "read/ident", iolen, i - 1,
|
||||
io->io_req.br_offset));
|
||||
|
||||
switch (type) {
|
||||
case VBH_OP_READ:
|
||||
err = blockif_read(sc->bc, &io->io_req);
|
||||
break;
|
||||
case VBH_OP_WRITE:
|
||||
err = blockif_write(sc->bc, &io->io_req);
|
||||
break;
|
||||
case VBH_OP_FLUSH:
|
||||
case VBH_OP_FLUSH_OUT:
|
||||
err = blockif_flush(sc->bc, &io->io_req);
|
||||
break;
|
||||
case VBH_OP_IDENT:
|
||||
/* Assume a single buffer */
|
||||
/* S/n equal to buffer is not zero-terminated. */
|
||||
memset(iov[1].iov_base, 0, iov[1].iov_len);
|
||||
strncpy(iov[1].iov_base, sc->vbsc_ident,
|
||||
MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
|
||||
pci_vtblk_done(&io->io_req, 0);
|
||||
return;
|
||||
default:
|
||||
pci_vtblk_done(&io->io_req, EOPNOTSUPP);
|
||||
return;
|
||||
}
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = vsc;
|
||||
|
||||
while (vq_has_descs(vq))
|
||||
pci_vtblk_proc(sc, vq);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
char bident[sizeof("XX:X:X")];
|
||||
struct blockif_ctxt *bctxt;
|
||||
MD5_CTX mdctx;
|
||||
u_char digest[16];
|
||||
struct pci_vtblk_softc *sc;
|
||||
off_t size;
|
||||
int i, sectsz, sts, sto;
|
||||
|
||||
if (opts == NULL) {
|
||||
printf("virtio-block: backing device required\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The supplied backing file has to exist
|
||||
*/
|
||||
snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
|
||||
bctxt = blockif_open(opts, bident);
|
||||
if (bctxt == NULL) {
|
||||
perror("Could not open backing file");
|
||||
return (1);
|
||||
}
|
||||
|
||||
size = blockif_size(bctxt);
|
||||
sectsz = blockif_sectsz(bctxt);
|
||||
blockif_psectsz(bctxt, &sts, &sto);
|
||||
|
||||
sc = calloc(1, sizeof(struct pci_vtblk_softc));
|
||||
sc->bc = bctxt;
|
||||
for (i = 0; i < VTBLK_RINGSZ; i++) {
|
||||
struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
|
||||
io->io_req.br_callback = pci_vtblk_done;
|
||||
io->io_req.br_param = io;
|
||||
io->io_sc = sc;
|
||||
io->io_idx = i;
|
||||
}
|
||||
|
||||
pthread_mutex_init(&sc->vsc_mtx, NULL);
|
||||
|
||||
/* init virtio softc and virtqueues */
|
||||
vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
|
||||
sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
|
||||
|
||||
sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
|
||||
/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
|
||||
|
||||
/*
|
||||
* Create an identifier for the backing file. Use parts of the
|
||||
* md5 sum of the filename
|
||||
*/
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, opts, strlen(opts));
|
||||
MD5Final(digest, &mdctx);
|
||||
sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
|
||||
digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
|
||||
|
||||
/* setup virtio block config space */
|
||||
sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
|
||||
sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
|
||||
sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX;
|
||||
sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
|
||||
sc->vbsc_cfg.vbc_geometry.heads = 0;
|
||||
sc->vbsc_cfg.vbc_geometry.sectors = 0;
|
||||
sc->vbsc_cfg.vbc_blk_size = sectsz;
|
||||
sc->vbsc_cfg.vbc_topology.physical_block_exp =
|
||||
(sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
|
||||
sc->vbsc_cfg.vbc_topology.alignment_offset =
|
||||
(sto != 0) ? ((sts - sto) / sectsz) : 0;
|
||||
sc->vbsc_cfg.vbc_topology.min_io_size = 0;
|
||||
sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
|
||||
sc->vbsc_cfg.vbc_writeback = 0;
|
||||
|
||||
/*
|
||||
* Should we move some of this into virtio.c? Could
|
||||
* have the device, class, and subdev_0 as fields in
|
||||
* the virtio constants structure.
|
||||
*/
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
|
||||
|
||||
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
|
||||
blockif_close(sc->bc);
|
||||
free(sc);
|
||||
return (1);
|
||||
}
|
||||
vi_set_io_bar(&sc->vbsc_vs, 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
|
||||
{
|
||||
|
||||
DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
|
||||
{
|
||||
struct pci_vtblk_softc *sc = vsc;
|
||||
void *ptr;
|
||||
|
||||
/* our caller has already verified offset and size */
|
||||
ptr = (uint8_t *)&sc->vbsc_cfg + offset;
|
||||
memcpy(retval, ptr, size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vblk = {
|
||||
.pe_emu = "virtio-blk",
|
||||
.pe_init = pci_vtblk_init,
|
||||
.pe_barwrite = vi_pci_write,
|
||||
.pe_barread = vi_pci_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vblk);
|
976
pci_virtio_net.c
Normal file
976
pci_virtio_net.c
Normal file
@ -0,0 +1,976 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <machine/atomic.h>
|
||||
#include <net/ethernet.h>
|
||||
#ifndef NETMAP_WITH_LIBS
|
||||
#define NETMAP_WITH_LIBS
|
||||
#endif
|
||||
#include <net/netmap_user.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <md5.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "mevent.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTNET_RINGSZ 1024
|
||||
|
||||
#define VTNET_MAXSEGS 256
|
||||
|
||||
/*
|
||||
* Host capabilities. Note that we only offer a few of these.
|
||||
*/
|
||||
#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
|
||||
#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
|
||||
#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
|
||||
#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
|
||||
#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
|
||||
#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
|
||||
#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
|
||||
#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
|
||||
#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
|
||||
#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
|
||||
#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
|
||||
#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
|
||||
#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
|
||||
#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
|
||||
#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
|
||||
#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
|
||||
#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
|
||||
#define VIRTIO_NET_F_GUEST_ANNOUNCE \
|
||||
(1 << 21) /* guest can send gratuitous pkts */
|
||||
|
||||
#define VTNET_S_HOSTCAPS \
|
||||
( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
|
||||
VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
|
||||
|
||||
/*
|
||||
* PCI config-space "registers"
|
||||
*/
|
||||
struct virtio_net_config {
|
||||
uint8_t mac[6];
|
||||
uint16_t status;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Queue definitions.
|
||||
*/
|
||||
#define VTNET_RXQ 0
|
||||
#define VTNET_TXQ 1
|
||||
#define VTNET_CTLQ 2 /* NB: not yet supported */
|
||||
|
||||
#define VTNET_MAXQ 3
|
||||
|
||||
/*
|
||||
* Fixed network header size
|
||||
*/
|
||||
struct virtio_net_rxhdr {
|
||||
uint8_t vrh_flags;
|
||||
uint8_t vrh_gso_type;
|
||||
uint16_t vrh_hdr_len;
|
||||
uint16_t vrh_gso_size;
|
||||
uint16_t vrh_csum_start;
|
||||
uint16_t vrh_csum_offset;
|
||||
uint16_t vrh_bufs;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Debug printf
|
||||
*/
|
||||
static int pci_vtnet_debug;
|
||||
#define DPRINTF(params) if (pci_vtnet_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtnet_softc {
|
||||
struct virtio_softc vsc_vs;
|
||||
struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
|
||||
pthread_mutex_t vsc_mtx;
|
||||
struct mevent *vsc_mevp;
|
||||
|
||||
int vsc_tapfd;
|
||||
struct nm_desc *vsc_nmd;
|
||||
|
||||
int vsc_rx_ready;
|
||||
volatile int resetting; /* set and checked outside lock */
|
||||
|
||||
uint64_t vsc_features; /* negotiated features */
|
||||
|
||||
struct virtio_net_config vsc_config;
|
||||
|
||||
pthread_mutex_t rx_mtx;
|
||||
int rx_in_progress;
|
||||
int rx_vhdrlen;
|
||||
int rx_merge; /* merged rx bufs in use */
|
||||
|
||||
pthread_t tx_tid;
|
||||
pthread_mutex_t tx_mtx;
|
||||
pthread_cond_t tx_cond;
|
||||
int tx_in_progress;
|
||||
|
||||
void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
|
||||
void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
|
||||
int iovcnt, int len);
|
||||
};
|
||||
|
||||
static void pci_vtnet_reset(void *);
|
||||
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
|
||||
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
|
||||
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
|
||||
static void pci_vtnet_neg_features(void *, uint64_t);
|
||||
|
||||
static struct virtio_consts vtnet_vi_consts = {
|
||||
"vtnet", /* our name */
|
||||
VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
|
||||
sizeof(struct virtio_net_config), /* config reg size */
|
||||
pci_vtnet_reset, /* reset */
|
||||
NULL, /* device-wide qnotify -- not used */
|
||||
pci_vtnet_cfgread, /* read PCI config */
|
||||
pci_vtnet_cfgwrite, /* write PCI config */
|
||||
pci_vtnet_neg_features, /* apply negotiated features */
|
||||
VTNET_S_HOSTCAPS, /* our capabilities */
|
||||
};
|
||||
|
||||
/*
|
||||
* If the transmit thread is active then stall until it is done.
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_txwait(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
|
||||
pthread_mutex_lock(&sc->tx_mtx);
|
||||
while (sc->tx_in_progress) {
|
||||
pthread_mutex_unlock(&sc->tx_mtx);
|
||||
usleep(10000);
|
||||
pthread_mutex_lock(&sc->tx_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&sc->tx_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the receive thread is active then stall until it is done.
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
|
||||
pthread_mutex_lock(&sc->rx_mtx);
|
||||
while (sc->rx_in_progress) {
|
||||
pthread_mutex_unlock(&sc->rx_mtx);
|
||||
usleep(10000);
|
||||
pthread_mutex_lock(&sc->rx_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&sc->rx_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_reset(void *vsc)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
|
||||
DPRINTF(("vtnet: device reset requested !\n"));
|
||||
|
||||
sc->resetting = 1;
|
||||
|
||||
/*
|
||||
* Wait for the transmit and receive threads to finish their
|
||||
* processing.
|
||||
*/
|
||||
pci_vtnet_txwait(sc);
|
||||
pci_vtnet_rxwait(sc);
|
||||
|
||||
sc->vsc_rx_ready = 0;
|
||||
sc->rx_merge = 1;
|
||||
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
|
||||
|
||||
/* now reset rings, MSI-X vectors, and negotiated capabilities */
|
||||
vi_reset_dev(&sc->vsc_vs);
|
||||
|
||||
sc->resetting = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the tap device
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
static char pad[60]; /* all zero bytes */
|
||||
|
||||
if (sc->vsc_tapfd == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) writev(sc->vsc_tapfd, iov, iovcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when there is read activity on the tap file descriptor.
|
||||
* Each buffer posted by the guest is assumed to be able to contain
|
||||
* an entire ethernet frame + rx header.
|
||||
* MP note: the dummybuf is only used for discarding frames, so there
|
||||
* is no need for it to be per-vtnet or locked.
|
||||
*/
|
||||
static uint8_t dummybuf[2048];
|
||||
|
||||
static __inline struct iovec *
|
||||
rx_iov_trim(struct iovec *iov, int *niov, int tlen)
|
||||
{
|
||||
struct iovec *riov;
|
||||
|
||||
/* XXX short-cut: assume first segment is >= tlen */
|
||||
assert(iov[0].iov_len >= tlen);
|
||||
|
||||
iov[0].iov_len -= tlen;
|
||||
if (iov[0].iov_len == 0) {
|
||||
assert(*niov > 1);
|
||||
*niov -= 1;
|
||||
riov = &iov[1];
|
||||
} else {
|
||||
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
|
||||
riov = &iov[0];
|
||||
}
|
||||
|
||||
return (riov);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS], *riov;
|
||||
struct vqueue_info *vq;
|
||||
void *vrx;
|
||||
int len, n;
|
||||
uint16_t idx;
|
||||
|
||||
/*
|
||||
* Should never be called without a valid tap fd
|
||||
*/
|
||||
assert(sc->vsc_tapfd != -1);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up or the guest is resetting the device.
|
||||
*/
|
||||
if (!sc->vsc_rx_ready || sc->resetting) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for available rx buffers
|
||||
*/
|
||||
vq = &sc->vsc_queues[VTNET_RXQ];
|
||||
if (!vq_has_descs(vq)) {
|
||||
/*
|
||||
* Drop the packet and try later. Interrupt on
|
||||
* empty, if that's negotiated.
|
||||
*/
|
||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
|
||||
vq_endchains(vq, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
/*
|
||||
* Get descriptor chain.
|
||||
*/
|
||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
|
||||
assert(n >= 1 && n <= VTNET_MAXSEGS);
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = iov[0].iov_base;
|
||||
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
|
||||
|
||||
len = readv(sc->vsc_tapfd, riov, n);
|
||||
|
||||
if (len < 0 && errno == EWOULDBLOCK) {
|
||||
/*
|
||||
* No more packets, but still some avail ring
|
||||
* entries. Interrupt if needed/appropriate.
|
||||
*/
|
||||
vq_retchain(vq);
|
||||
vq_endchains(vq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers if merged rx bufs were negotiated.
|
||||
*/
|
||||
memset(vrx, 0, sc->rx_vhdrlen);
|
||||
|
||||
if (sc->rx_merge) {
|
||||
struct virtio_net_rxhdr *vrxh;
|
||||
|
||||
vrxh = vrx;
|
||||
vrxh->vrh_bufs = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release this chain and handle more chains.
|
||||
*/
|
||||
vq_relchain(vq, idx, len + sc->rx_vhdrlen);
|
||||
} while (vq_has_descs(vq));
|
||||
|
||||
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
|
||||
vq_endchains(vq, 1);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
|
||||
{
|
||||
int r, i;
|
||||
int len = 0;
|
||||
|
||||
for (r = nmd->cur_tx_ring; ; ) {
|
||||
struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
|
||||
uint32_t cur, idx;
|
||||
char *buf;
|
||||
|
||||
if (nm_ring_empty(ring)) {
|
||||
r++;
|
||||
if (r > nmd->last_tx_ring)
|
||||
r = nmd->first_tx_ring;
|
||||
if (r == nmd->cur_tx_ring)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
cur = ring->cur;
|
||||
idx = ring->slot[cur].buf_idx;
|
||||
buf = NETMAP_BUF(ring, idx);
|
||||
|
||||
for (i = 0; i < iovcnt; i++) {
|
||||
if (len + iov[i].iov_len > 2048)
|
||||
break;
|
||||
memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
|
||||
len += iov[i].iov_len;
|
||||
}
|
||||
ring->slot[cur].len = len;
|
||||
ring->head = ring->cur = nm_ring_next(ring, cur);
|
||||
nmd->cur_tx_ring = r;
|
||||
ioctl(nmd->fd, NIOCTXSYNC, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
|
||||
{
|
||||
int len = 0;
|
||||
int i = 0;
|
||||
int r;
|
||||
|
||||
for (r = nmd->cur_rx_ring; ; ) {
|
||||
struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
|
||||
uint32_t cur, idx;
|
||||
char *buf;
|
||||
size_t left;
|
||||
|
||||
if (nm_ring_empty(ring)) {
|
||||
r++;
|
||||
if (r > nmd->last_rx_ring)
|
||||
r = nmd->first_rx_ring;
|
||||
if (r == nmd->cur_rx_ring)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
cur = ring->cur;
|
||||
idx = ring->slot[cur].buf_idx;
|
||||
buf = NETMAP_BUF(ring, idx);
|
||||
left = ring->slot[cur].len;
|
||||
|
||||
for (i = 0; i < iovcnt && left > 0; i++) {
|
||||
if (iov[i].iov_len > left)
|
||||
iov[i].iov_len = left;
|
||||
memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
|
||||
len += iov[i].iov_len;
|
||||
left -= iov[i].iov_len;
|
||||
}
|
||||
ring->head = ring->cur = nm_ring_next(ring, cur);
|
||||
nmd->cur_rx_ring = r;
|
||||
ioctl(nmd->fd, NIOCRXSYNC, NULL);
|
||||
break;
|
||||
}
|
||||
for (; i < iovcnt; i++)
|
||||
iov[i].iov_len = 0;
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the vale port
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
static char pad[60]; /* all zero bytes */
|
||||
|
||||
if (sc->vsc_nmd == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS], *riov;
|
||||
struct vqueue_info *vq;
|
||||
void *vrx;
|
||||
int len, n;
|
||||
uint16_t idx;
|
||||
|
||||
/*
|
||||
* Should never be called without a valid netmap descriptor
|
||||
*/
|
||||
assert(sc->vsc_nmd != NULL);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up or the guest is resetting the device.
|
||||
*/
|
||||
if (!sc->vsc_rx_ready || sc->resetting) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for available rx buffers
|
||||
*/
|
||||
vq = &sc->vsc_queues[VTNET_RXQ];
|
||||
if (!vq_has_descs(vq)) {
|
||||
/*
|
||||
* Drop the packet and try later. Interrupt on
|
||||
* empty, if that's negotiated.
|
||||
*/
|
||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
|
||||
vq_endchains(vq, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
/*
|
||||
* Get descriptor chain.
|
||||
*/
|
||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
|
||||
assert(n >= 1 && n <= VTNET_MAXSEGS);
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = iov[0].iov_base;
|
||||
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
|
||||
|
||||
len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
|
||||
|
||||
if (len == 0) {
|
||||
/*
|
||||
* No more packets, but still some avail ring
|
||||
* entries. Interrupt if needed/appropriate.
|
||||
*/
|
||||
vq_retchain(vq);
|
||||
vq_endchains(vq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers if merged rx bufs were negotiated.
|
||||
*/
|
||||
memset(vrx, 0, sc->rx_vhdrlen);
|
||||
|
||||
if (sc->rx_merge) {
|
||||
struct virtio_net_rxhdr *vrxh;
|
||||
|
||||
vrxh = vrx;
|
||||
vrxh->vrh_bufs = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release this chain and handle more chains.
|
||||
*/
|
||||
vq_relchain(vq, idx, len + sc->rx_vhdrlen);
|
||||
} while (vq_has_descs(vq));
|
||||
|
||||
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
|
||||
vq_endchains(vq, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
|
||||
pthread_mutex_lock(&sc->rx_mtx);
|
||||
sc->rx_in_progress = 1;
|
||||
sc->pci_vtnet_rx(sc);
|
||||
sc->rx_in_progress = 0;
|
||||
pthread_mutex_unlock(&sc->rx_mtx);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
|
||||
/*
|
||||
* A qnotify means that the rx process can now begin
|
||||
*/
|
||||
if (sc->vsc_rx_ready == 0) {
|
||||
sc->vsc_rx_ready = 1;
|
||||
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
int i, n;
|
||||
int plen, tlen;
|
||||
uint16_t idx;
|
||||
|
||||
/*
|
||||
* Obtain chain of descriptors. The first one is
|
||||
* really the header descriptor, so we need to sum
|
||||
* up two lengths: packet length and transfer length.
|
||||
*/
|
||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
|
||||
assert(n >= 1 && n <= VTNET_MAXSEGS);
|
||||
plen = 0;
|
||||
tlen = iov[0].iov_len;
|
||||
for (i = 1; i < n; i++) {
|
||||
plen += iov[i].iov_len;
|
||||
tlen += iov[i].iov_len;
|
||||
}
|
||||
|
||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
|
||||
sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
|
||||
|
||||
/* chain is processed, release it and set tlen */
|
||||
vq_relchain(vq, idx, tlen);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
|
||||
/*
|
||||
* Any ring entries to process?
|
||||
*/
|
||||
if (!vq_has_descs(vq))
|
||||
return;
|
||||
|
||||
/* Signal the tx thread for processing */
|
||||
pthread_mutex_lock(&sc->tx_mtx);
|
||||
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
|
||||
if (sc->tx_in_progress == 0)
|
||||
pthread_cond_signal(&sc->tx_cond);
|
||||
pthread_mutex_unlock(&sc->tx_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Thread which will handle processing of TX desc
|
||||
*/
|
||||
static void *
|
||||
pci_vtnet_tx_thread(void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
struct vqueue_info *vq;
|
||||
int error;
|
||||
|
||||
vq = &sc->vsc_queues[VTNET_TXQ];
|
||||
|
||||
/*
|
||||
* Let us wait till the tx queue pointers get initialised &
|
||||
* first tx signaled
|
||||
*/
|
||||
pthread_mutex_lock(&sc->tx_mtx);
|
||||
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
|
||||
assert(error == 0);
|
||||
|
||||
for (;;) {
|
||||
/* note - tx mutex is locked here */
|
||||
while (sc->resetting || !vq_has_descs(vq)) {
|
||||
vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
|
||||
mb();
|
||||
if (!sc->resetting && vq_has_descs(vq))
|
||||
break;
|
||||
|
||||
sc->tx_in_progress = 0;
|
||||
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
|
||||
assert(error == 0);
|
||||
}
|
||||
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
|
||||
sc->tx_in_progress = 1;
|
||||
pthread_mutex_unlock(&sc->tx_mtx);
|
||||
|
||||
do {
|
||||
/*
|
||||
* Run through entries, placing them into
|
||||
* iovecs and sending when an end-of-packet
|
||||
* is found
|
||||
*/
|
||||
pci_vtnet_proctx(sc, vq);
|
||||
} while (vq_has_descs(vq));
|
||||
|
||||
/*
|
||||
* Generate an interrupt if needed.
|
||||
*/
|
||||
vq_endchains(vq, 1);
|
||||
|
||||
pthread_mutex_lock(&sc->tx_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef notyet
|
||||
static void
|
||||
pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
|
||||
{
|
||||
|
||||
DPRINTF(("vtnet: control qnotify!\n\r"));
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
|
||||
{
|
||||
struct ether_addr *ea;
|
||||
char *tmpstr;
|
||||
char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
tmpstr = strsep(&mac_str,"=");
|
||||
|
||||
if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
|
||||
ea = ether_aton(mac_str);
|
||||
|
||||
if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
|
||||
memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
|
||||
fprintf(stderr, "Invalid MAC %s\n", mac_str);
|
||||
return (EINVAL);
|
||||
} else
|
||||
memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
|
||||
{
|
||||
char tbuf[80];
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strlcat(tbuf, devname, sizeof(tbuf));
|
||||
|
||||
sc->pci_vtnet_rx = pci_vtnet_tap_rx;
|
||||
sc->pci_vtnet_tx = pci_vtnet_tap_tx;
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_rx_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
|
||||
{
|
||||
sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
|
||||
sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
|
||||
|
||||
sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
|
||||
if (sc->vsc_nmd == NULL) {
|
||||
WPRINTF(("open of netmap device %s failed\n", ifname));
|
||||
return;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
|
||||
EVF_READ,
|
||||
pci_vtnet_rx_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
nm_close(sc->vsc_nmd);
|
||||
sc->vsc_nmd = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
MD5_CTX mdctx;
|
||||
unsigned char digest[16];
|
||||
char nstr[80];
|
||||
char tname[MAXCOMLEN + 1];
|
||||
struct pci_vtnet_softc *sc;
|
||||
char *devname;
|
||||
char *vtopts;
|
||||
int mac_provided;
|
||||
|
||||
sc = calloc(1, sizeof(struct pci_vtnet_softc));
|
||||
|
||||
pthread_mutex_init(&sc->vsc_mtx, NULL);
|
||||
|
||||
vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
|
||||
sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
|
||||
|
||||
sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
|
||||
sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
|
||||
sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
|
||||
sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
|
||||
#ifdef notyet
|
||||
sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
|
||||
sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Attempt to open the tap device and read the MAC address
|
||||
* if specified
|
||||
*/
|
||||
mac_provided = 0;
|
||||
sc->vsc_tapfd = -1;
|
||||
sc->vsc_nmd = NULL;
|
||||
if (opts != NULL) {
|
||||
int err;
|
||||
|
||||
devname = vtopts = strdup(opts);
|
||||
(void) strsep(&vtopts, ",");
|
||||
|
||||
if (vtopts != NULL) {
|
||||
err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
|
||||
if (err != 0) {
|
||||
free(devname);
|
||||
return (err);
|
||||
}
|
||||
mac_provided = 1;
|
||||
}
|
||||
|
||||
if (strncmp(devname, "vale", 4) == 0)
|
||||
pci_vtnet_netmap_setup(sc, devname);
|
||||
if (strncmp(devname, "tap", 3) == 0 ||
|
||||
strncmp(devname, "vmnet", 5) == 0)
|
||||
pci_vtnet_tap_setup(sc, devname);
|
||||
|
||||
free(devname);
|
||||
}
|
||||
|
||||
/*
|
||||
* The default MAC address is the standard NetApp OUI of 00-a0-98,
|
||||
* followed by an MD5 of the PCI slot/func number and dev name
|
||||
*/
|
||||
if (!mac_provided) {
|
||||
snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
|
||||
pi->pi_func, vmname);
|
||||
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, nstr, strlen(nstr));
|
||||
MD5Final(digest, &mdctx);
|
||||
|
||||
sc->vsc_config.mac[0] = 0x00;
|
||||
sc->vsc_config.mac[1] = 0xa0;
|
||||
sc->vsc_config.mac[2] = 0x98;
|
||||
sc->vsc_config.mac[3] = digest[0];
|
||||
sc->vsc_config.mac[4] = digest[1];
|
||||
sc->vsc_config.mac[5] = digest[2];
|
||||
}
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
|
||||
|
||||
/* Link is up if we managed to open tap device or vale port. */
|
||||
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 ||
|
||||
sc->vsc_nmd != NULL);
|
||||
|
||||
/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
|
||||
if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
|
||||
return (1);
|
||||
|
||||
/* use BAR 0 to map config regs in IO space */
|
||||
vi_set_io_bar(&sc->vsc_vs, 0);
|
||||
|
||||
sc->resetting = 0;
|
||||
|
||||
sc->rx_merge = 1;
|
||||
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
|
||||
sc->rx_in_progress = 0;
|
||||
pthread_mutex_init(&sc->rx_mtx, NULL);
|
||||
|
||||
/*
|
||||
* Initialize tx semaphore & spawn TX processing thread.
|
||||
* As of now, only one thread for TX desc processing is
|
||||
* spawned.
|
||||
*/
|
||||
sc->tx_in_progress = 0;
|
||||
pthread_mutex_init(&sc->tx_mtx, NULL);
|
||||
pthread_cond_init(&sc->tx_cond, NULL);
|
||||
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
|
||||
snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
|
||||
pi->pi_func);
|
||||
pthread_set_name_np(sc->tx_tid, tname);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
void *ptr;
|
||||
|
||||
if (offset < 6) {
|
||||
assert(offset + size <= 6);
|
||||
/*
|
||||
* The driver is allowed to change the MAC address
|
||||
*/
|
||||
ptr = &sc->vsc_config.mac[offset];
|
||||
memcpy(ptr, &value, size);
|
||||
} else {
|
||||
/* silently ignore other writes */
|
||||
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
void *ptr;
|
||||
|
||||
ptr = (uint8_t *)&sc->vsc_config + offset;
|
||||
memcpy(retval, ptr, size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
|
||||
sc->vsc_features = negotiated_features;
|
||||
|
||||
if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
|
||||
sc->rx_merge = 0;
|
||||
/* non-merge rx header is 2 bytes shorter */
|
||||
sc->rx_vhdrlen -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
struct pci_devemu pci_de_vnet = {
|
||||
.pe_emu = "virtio-net",
|
||||
.pe_init = pci_vtnet_init,
|
||||
.pe_barwrite = vi_pci_write,
|
||||
.pe_barread = vi_pci_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vnet);
|
189
pci_virtio_rnd.c
Normal file
189
pci_virtio_rnd.c
Normal file
@ -0,0 +1,189 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Nahanni Systems Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in this position and unchanged.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* virtio entropy device emulation.
|
||||
* Randomness is sourced from /dev/random which does not block
|
||||
* once it has been seeded at bootup.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
|
||||
#define VTRND_RINGSZ 64
|
||||
|
||||
|
||||
static int pci_vtrnd_debug;
|
||||
#define DPRINTF(params) if (pci_vtrnd_debug) printf params
|
||||
#define WPRINTF(params) printf params
|
||||
|
||||
/*
|
||||
* Per-device softc
|
||||
*/
|
||||
struct pci_vtrnd_softc {
|
||||
struct virtio_softc vrsc_vs;
|
||||
struct vqueue_info vrsc_vq;
|
||||
pthread_mutex_t vrsc_mtx;
|
||||
uint64_t vrsc_cfg;
|
||||
int vrsc_fd;
|
||||
};
|
||||
|
||||
static void pci_vtrnd_reset(void *);
|
||||
static void pci_vtrnd_notify(void *, struct vqueue_info *);
|
||||
|
||||
static struct virtio_consts vtrnd_vi_consts = {
|
||||
"vtrnd", /* our name */
|
||||
1, /* we support 1 virtqueue */
|
||||
0, /* config reg size */
|
||||
pci_vtrnd_reset, /* reset */
|
||||
pci_vtrnd_notify, /* device-wide qnotify */
|
||||
NULL, /* read virtio config */
|
||||
NULL, /* write virtio config */
|
||||
NULL, /* apply negotiated features */
|
||||
0, /* our capabilities */
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
pci_vtrnd_reset(void *vsc)
|
||||
{
|
||||
struct pci_vtrnd_softc *sc;
|
||||
|
||||
sc = vsc;
|
||||
|
||||
DPRINTF(("vtrnd: device reset requested !\n"));
|
||||
vi_reset_dev(&sc->vrsc_vs);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
pci_vtrnd_notify(void *vsc, struct vqueue_info *vq)
|
||||
{
|
||||
struct iovec iov;
|
||||
struct pci_vtrnd_softc *sc;
|
||||
int len;
|
||||
uint16_t idx;
|
||||
|
||||
sc = vsc;
|
||||
|
||||
if (sc->vrsc_fd < 0) {
|
||||
vq_endchains(vq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
while (vq_has_descs(vq)) {
|
||||
vq_getchain(vq, &idx, &iov, 1, NULL);
|
||||
|
||||
len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
|
||||
|
||||
DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len));
|
||||
|
||||
/* Catastrophe if unable to read from /dev/random */
|
||||
assert(len > 0);
|
||||
|
||||
/*
|
||||
* Release this chain and handle more
|
||||
*/
|
||||
vq_relchain(vq, idx, len);
|
||||
}
|
||||
vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
{
|
||||
struct pci_vtrnd_softc *sc;
|
||||
int fd;
|
||||
int len;
|
||||
uint8_t v;
|
||||
|
||||
/*
|
||||
* Should always be able to open /dev/random.
|
||||
*/
|
||||
fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
|
||||
|
||||
assert(fd >= 0);
|
||||
|
||||
/*
|
||||
* Check that device is seeded and non-blocking.
|
||||
*/
|
||||
len = read(fd, &v, sizeof(v));
|
||||
if (len <= 0) {
|
||||
WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
|
||||
return (1);
|
||||
}
|
||||
|
||||
sc = calloc(1, sizeof(struct pci_vtrnd_softc));
|
||||
|
||||
vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
|
||||
sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
|
||||
|
||||
sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
|
||||
|
||||
/* keep /dev/random opened while emulating */
|
||||
sc->vrsc_fd = fd;
|
||||
|
||||
/* initialize config space */
|
||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM);
|
||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
|
||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY);
|
||||
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
|
||||
|
||||
if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
|
||||
return (1);
|
||||
vi_set_io_bar(&sc->vrsc_vs, 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
struct pci_devemu pci_de_vrnd = {
|
||||
.pe_emu = "virtio-rnd",
|
||||
.pe_init = pci_vtrnd_init,
|
||||
.pe_barwrite = vi_pci_write,
|
||||
.pe_barread = vi_pci_read
|
||||
};
|
||||
PCI_EMUL_SET(pci_de_vrnd);
|
312
pm.c
Normal file
312
pm.c
Normal file
@ -0,0 +1,312 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Hudson River Trading LLC
|
||||
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <signal.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "inout.h"
|
||||
#include "mevent.h"
|
||||
#include "pci_irq.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static struct mevent *power_button;
|
||||
static sig_t old_power_handler;
|
||||
|
||||
/*
|
||||
* Reset Control register at I/O port 0xcf9. Bit 2 forces a system
|
||||
* reset when it transitions from 0 to 1. Bit 1 selects the type of
|
||||
* reset to attempt: 0 selects a "soft" reset, and 1 selects a "hard"
|
||||
* reset.
|
||||
*/
|
||||
static int
|
||||
reset_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int error;
|
||||
|
||||
static uint8_t reset_control;
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
if (in)
|
||||
*eax = reset_control;
|
||||
else {
|
||||
reset_control = *eax;
|
||||
|
||||
/* Treat hard and soft resets the same. */
|
||||
if (reset_control & 0x4) {
|
||||
error = vm_suspend(ctx, VM_SUSPEND_RESET);
|
||||
assert(error == 0 || errno == EALREADY);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
|
||||
|
||||
/*
|
||||
* ACPI's SCI is a level-triggered interrupt.
|
||||
*/
|
||||
static int sci_active;
|
||||
|
||||
static void
|
||||
sci_assert(struct vmctx *ctx)
|
||||
{
|
||||
|
||||
if (sci_active)
|
||||
return;
|
||||
vm_isa_assert_irq(ctx, SCI_INT, SCI_INT);
|
||||
sci_active = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
sci_deassert(struct vmctx *ctx)
|
||||
{
|
||||
|
||||
if (!sci_active)
|
||||
return;
|
||||
vm_isa_deassert_irq(ctx, SCI_INT, SCI_INT);
|
||||
sci_active = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Power Management 1 Event Registers
|
||||
*
|
||||
* The only power management event supported is a power button upon
|
||||
* receiving SIGTERM.
|
||||
*/
|
||||
static uint16_t pm1_enable, pm1_status;
|
||||
|
||||
#define PM1_TMR_STS 0x0001
|
||||
#define PM1_BM_STS 0x0010
|
||||
#define PM1_GBL_STS 0x0020
|
||||
#define PM1_PWRBTN_STS 0x0100
|
||||
#define PM1_SLPBTN_STS 0x0200
|
||||
#define PM1_RTC_STS 0x0400
|
||||
#define PM1_WAK_STS 0x8000
|
||||
|
||||
#define PM1_TMR_EN 0x0001
|
||||
#define PM1_GBL_EN 0x0020
|
||||
#define PM1_PWRBTN_EN 0x0100
|
||||
#define PM1_SLPBTN_EN 0x0200
|
||||
#define PM1_RTC_EN 0x0400
|
||||
|
||||
static void
|
||||
sci_update(struct vmctx *ctx)
|
||||
{
|
||||
int need_sci;
|
||||
|
||||
/* See if the SCI should be active or not. */
|
||||
need_sci = 0;
|
||||
if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
|
||||
need_sci = 1;
|
||||
if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
|
||||
need_sci = 1;
|
||||
if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
|
||||
need_sci = 1;
|
||||
if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
|
||||
need_sci = 1;
|
||||
if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
|
||||
need_sci = 1;
|
||||
if (need_sci)
|
||||
sci_assert(ctx);
|
||||
else
|
||||
sci_deassert(ctx);
|
||||
}
|
||||
|
||||
static int
|
||||
pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
|
||||
if (bytes != 2)
|
||||
return (-1);
|
||||
|
||||
pthread_mutex_lock(&pm_lock);
|
||||
if (in)
|
||||
*eax = pm1_status;
|
||||
else {
|
||||
/*
|
||||
* Writes are only permitted to clear certain bits by
|
||||
* writing 1 to those flags.
|
||||
*/
|
||||
pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
|
||||
PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
|
||||
sci_update(ctx);
|
||||
}
|
||||
pthread_mutex_unlock(&pm_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
|
||||
if (bytes != 2)
|
||||
return (-1);
|
||||
|
||||
pthread_mutex_lock(&pm_lock);
|
||||
if (in)
|
||||
*eax = pm1_enable;
|
||||
else {
|
||||
/*
|
||||
* Only permit certain bits to be set. We never use
|
||||
* the global lock, but ACPI-CA whines profusely if it
|
||||
* can't set GBL_EN.
|
||||
*/
|
||||
pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
|
||||
sci_update(ctx);
|
||||
}
|
||||
pthread_mutex_unlock(&pm_lock);
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
|
||||
INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, pm1_enable_handler);
|
||||
|
||||
static void
|
||||
power_button_handler(int signal, enum ev_type type, void *arg)
|
||||
{
|
||||
struct vmctx *ctx;
|
||||
|
||||
ctx = arg;
|
||||
pthread_mutex_lock(&pm_lock);
|
||||
if (!(pm1_status & PM1_PWRBTN_STS)) {
|
||||
pm1_status |= PM1_PWRBTN_STS;
|
||||
sci_update(ctx);
|
||||
}
|
||||
pthread_mutex_unlock(&pm_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Power Management 1 Control Register
|
||||
*
|
||||
* This is mostly unimplemented except that we wish to handle writes that
|
||||
* set SPL_EN to handle S5 (soft power off).
|
||||
*/
|
||||
static uint16_t pm1_control;
|
||||
|
||||
#define PM1_SCI_EN 0x0001
|
||||
#define PM1_SLP_TYP 0x1c00
|
||||
#define PM1_SLP_EN 0x2000
|
||||
#define PM1_ALWAYS_ZERO 0xc003
|
||||
|
||||
static int
|
||||
pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (bytes != 2)
|
||||
return (-1);
|
||||
if (in)
|
||||
*eax = pm1_control;
|
||||
else {
|
||||
/*
|
||||
* Various bits are write-only or reserved, so force them
|
||||
* to zero in pm1_control. Always preserve SCI_EN as OSPM
|
||||
* can never change it.
|
||||
*/
|
||||
pm1_control = (pm1_control & PM1_SCI_EN) |
|
||||
(*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO));
|
||||
|
||||
/*
|
||||
* If SLP_EN is set, check for S5. Bhyve's _S5_ method
|
||||
* says that '5' should be stored in SLP_TYP for S5.
|
||||
*/
|
||||
if (*eax & PM1_SLP_EN) {
|
||||
if ((pm1_control & PM1_SLP_TYP) >> 10 == 5) {
|
||||
error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
|
||||
assert(error == 0 || errno == EALREADY);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
|
||||
SYSRES_IO(PM1A_EVT_ADDR, 8);
|
||||
|
||||
/*
|
||||
* ACPI SMI Command Register
|
||||
*
|
||||
* This write-only register is used to enable and disable ACPI.
|
||||
*/
|
||||
static int
|
||||
smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
|
||||
assert(!in);
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
pthread_mutex_lock(&pm_lock);
|
||||
switch (*eax) {
|
||||
case BHYVE_ACPI_ENABLE:
|
||||
pm1_control |= PM1_SCI_EN;
|
||||
if (power_button == NULL) {
|
||||
power_button = mevent_add(SIGTERM, EVF_SIGNAL,
|
||||
power_button_handler, ctx);
|
||||
old_power_handler = signal(SIGTERM, SIG_IGN);
|
||||
}
|
||||
break;
|
||||
case BHYVE_ACPI_DISABLE:
|
||||
pm1_control &= ~PM1_SCI_EN;
|
||||
if (power_button != NULL) {
|
||||
mevent_delete(power_button);
|
||||
power_button = NULL;
|
||||
signal(SIGTERM, old_power_handler);
|
||||
}
|
||||
break;
|
||||
}
|
||||
pthread_mutex_unlock(&pm_lock);
|
||||
return (0);
|
||||
}
|
||||
INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
|
||||
SYSRES_IO(SMI_CMD, 1);
|
||||
|
||||
void
|
||||
sci_init(struct vmctx *ctx)
|
||||
{
|
||||
|
||||
/*
|
||||
* Mark ACPI's SCI as level trigger and bump its use count
|
||||
* in the PIRQ router.
|
||||
*/
|
||||
pci_irq_use(SCI_INT);
|
||||
vm_isa_set_irq_trigger(ctx, SCI_INT, LEVEL_TRIGGER);
|
||||
}
|
53
post.c
Normal file
53
post.c
Normal file
@ -0,0 +1,53 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "inout.h"
|
||||
#include "pci_lpc.h"
|
||||
|
||||
static int
|
||||
post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
||||
uint32_t *eax, void *arg)
|
||||
{
|
||||
assert(in == 1);
|
||||
|
||||
if (bytes != 1)
|
||||
return (-1);
|
||||
|
||||
*eax = 0xff; /* return some garbage */
|
||||
return (0);
|
||||
}
|
||||
|
||||
INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
|
||||
SYSRES_IO(0x84, 1);
|
129
rtc.c
Normal file
129
rtc.c
Normal file
@ -0,0 +1,129 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "pci_lpc.h"
|
||||
#include "rtc.h"
|
||||
|
||||
#define IO_RTC 0x70
|
||||
|
||||
#define RTC_LMEM_LSB 0x34
|
||||
#define RTC_LMEM_MSB 0x35
|
||||
#define RTC_HMEM_LSB 0x5b
|
||||
#define RTC_HMEM_SB 0x5c
|
||||
#define RTC_HMEM_MSB 0x5d
|
||||
|
||||
#define m_64KB (64*1024)
|
||||
#define m_16MB (16*1024*1024)
|
||||
#define m_4GB (4ULL*1024*1024*1024)
|
||||
|
||||
/*
|
||||
* Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
|
||||
*/
|
||||
static time_t
|
||||
rtc_time(struct vmctx *ctx, int use_localtime)
|
||||
{
|
||||
struct tm tm;
|
||||
time_t t;
|
||||
|
||||
time(&t);
|
||||
if (use_localtime) {
|
||||
localtime_r(&t, &tm);
|
||||
t = timegm(&tm);
|
||||
}
|
||||
return (t);
|
||||
}
|
||||
|
||||
void
|
||||
rtc_init(struct vmctx *ctx, int use_localtime)
|
||||
{
|
||||
size_t himem;
|
||||
size_t lomem;
|
||||
int err;
|
||||
|
||||
/* XXX init diag/reset code/equipment/checksum ? */
|
||||
|
||||
/*
|
||||
* Report guest memory size in nvram cells as required by UEFI.
|
||||
* Little-endian encoding.
|
||||
* 0x34/0x35 - 64KB chunks above 16MB, below 4GB
|
||||
* 0x5b/0x5c/0x5d - 64KB chunks above 4GB
|
||||
*/
|
||||
lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
|
||||
err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem);
|
||||
assert(err == 0);
|
||||
err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8);
|
||||
assert(err == 0);
|
||||
|
||||
himem = vm_get_highmem_size(ctx) / m_64KB;
|
||||
err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem);
|
||||
assert(err == 0);
|
||||
err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8);
|
||||
assert(err == 0);
|
||||
err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16);
|
||||
assert(err == 0);
|
||||
|
||||
err = vm_rtc_settime(ctx, rtc_time(ctx, use_localtime));
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
rtc_dsdt(void)
|
||||
{
|
||||
|
||||
dsdt_line("");
|
||||
dsdt_line("Device (RTC)");
|
||||
dsdt_line("{");
|
||||
dsdt_line(" Name (_HID, EisaId (\"PNP0B00\"))");
|
||||
dsdt_line(" Name (_CRS, ResourceTemplate ()");
|
||||
dsdt_line(" {");
|
||||
dsdt_indent(2);
|
||||
dsdt_fixed_ioport(IO_RTC, 2);
|
||||
dsdt_fixed_irq(8);
|
||||
dsdt_unindent(2);
|
||||
dsdt_line(" })");
|
||||
dsdt_line("}");
|
||||
}
|
||||
LPC_DSDT(rtc_dsdt);
|
||||
|
||||
/*
|
||||
* Reserve the extended RTC I/O ports although they are not emulated at this
|
||||
* time.
|
||||
*/
|
||||
SYSRES_IO(0x72, 6);
|
34
rtc.h
Normal file
34
rtc.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _RTC_H_
|
||||
#define _RTC_H_
|
||||
|
||||
void rtc_init(struct vmctx *ctx, int use_localtime);
|
||||
|
||||
#endif /* _RTC_H_ */
|
827
smbiostbl.c
Normal file
827
smbiostbl.c
Normal file
@ -0,0 +1,827 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <md5.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <uuid.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "smbiostbl.h"
|
||||
|
||||
#define MB (1024*1024)
|
||||
#define GB (1024ULL*1024*1024)
|
||||
|
||||
#define SMBIOS_BASE 0xF1000
|
||||
|
||||
/* BHYVE_ACPI_BASE - SMBIOS_BASE) */
|
||||
#define SMBIOS_MAX_LENGTH (0xF2400 - 0xF1000)
|
||||
|
||||
#define SMBIOS_TYPE_BIOS 0
|
||||
#define SMBIOS_TYPE_SYSTEM 1
|
||||
#define SMBIOS_TYPE_CHASSIS 3
|
||||
#define SMBIOS_TYPE_PROCESSOR 4
|
||||
#define SMBIOS_TYPE_MEMARRAY 16
|
||||
#define SMBIOS_TYPE_MEMDEVICE 17
|
||||
#define SMBIOS_TYPE_MEMARRAYMAP 19
|
||||
#define SMBIOS_TYPE_BOOT 32
|
||||
#define SMBIOS_TYPE_EOT 127
|
||||
|
||||
struct smbios_structure {
|
||||
uint8_t type;
|
||||
uint8_t length;
|
||||
uint16_t handle;
|
||||
} __packed;
|
||||
|
||||
typedef int (*initializer_func_t)(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
struct smbios_template_entry {
|
||||
struct smbios_structure *entry;
|
||||
const char **strings;
|
||||
initializer_func_t initializer;
|
||||
};
|
||||
|
||||
/*
|
||||
* SMBIOS Structure Table Entry Point
|
||||
*/
|
||||
#define SMBIOS_ENTRY_EANCHOR "_SM_"
|
||||
#define SMBIOS_ENTRY_EANCHORLEN 4
|
||||
#define SMBIOS_ENTRY_IANCHOR "_DMI_"
|
||||
#define SMBIOS_ENTRY_IANCHORLEN 5
|
||||
|
||||
struct smbios_entry_point {
|
||||
char eanchor[4]; /* anchor tag */
|
||||
uint8_t echecksum; /* checksum of entry point structure */
|
||||
uint8_t eplen; /* length in bytes of entry point */
|
||||
uint8_t major; /* major version of the SMBIOS spec */
|
||||
uint8_t minor; /* minor version of the SMBIOS spec */
|
||||
uint16_t maxssize; /* maximum size in bytes of a struct */
|
||||
uint8_t revision; /* entry point structure revision */
|
||||
uint8_t format[5]; /* entry point rev-specific data */
|
||||
char ianchor[5]; /* intermediate anchor tag */
|
||||
uint8_t ichecksum; /* intermediate checksum */
|
||||
uint16_t stlen; /* len in bytes of structure table */
|
||||
uint32_t staddr; /* physical addr of structure table */
|
||||
uint16_t stnum; /* number of structure table entries */
|
||||
uint8_t bcdrev; /* BCD value representing DMI ver */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* BIOS Information
|
||||
*/
|
||||
#define SMBIOS_FL_ISA 0x00000010 /* ISA is supported */
|
||||
#define SMBIOS_FL_PCI 0x00000080 /* PCI is supported */
|
||||
#define SMBIOS_FL_SHADOW 0x00001000 /* BIOS shadowing is allowed */
|
||||
#define SMBIOS_FL_CDBOOT 0x00008000 /* Boot from CD is supported */
|
||||
#define SMBIOS_FL_SELBOOT 0x00010000 /* Selectable Boot supported */
|
||||
#define SMBIOS_FL_EDD 0x00080000 /* EDD Spec is supported */
|
||||
|
||||
#define SMBIOS_XB1_FL_ACPI 0x00000001 /* ACPI is supported */
|
||||
|
||||
#define SMBIOS_XB2_FL_BBS 0x00000001 /* BIOS Boot Specification */
|
||||
#define SMBIOS_XB2_FL_VM 0x00000010 /* Virtual Machine */
|
||||
|
||||
struct smbios_table_type0 {
|
||||
struct smbios_structure header;
|
||||
uint8_t vendor; /* vendor string */
|
||||
uint8_t version; /* version string */
|
||||
uint16_t segment; /* address segment location */
|
||||
uint8_t rel_date; /* release date */
|
||||
uint8_t size; /* rom size */
|
||||
uint64_t cflags; /* characteristics */
|
||||
uint8_t xc_bytes[2]; /* characteristics ext bytes */
|
||||
uint8_t sb_major_rel; /* system bios version */
|
||||
uint8_t sb_minor_rele;
|
||||
uint8_t ecfw_major_rel; /* embedded ctrl fw version */
|
||||
uint8_t ecfw_minor_rel;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* System Information
|
||||
*/
|
||||
#define SMBIOS_WAKEUP_SWITCH 0x06 /* power switch */
|
||||
|
||||
struct smbios_table_type1 {
|
||||
struct smbios_structure header;
|
||||
uint8_t manufacturer; /* manufacturer string */
|
||||
uint8_t product; /* product name string */
|
||||
uint8_t version; /* version string */
|
||||
uint8_t serial; /* serial number string */
|
||||
uint8_t uuid[16]; /* uuid byte array */
|
||||
uint8_t wakeup; /* wake-up event */
|
||||
uint8_t sku; /* sku number string */
|
||||
uint8_t family; /* family name string */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* System Enclosure or Chassis
|
||||
*/
|
||||
#define SMBIOS_CHT_UNKNOWN 0x02 /* unknown */
|
||||
|
||||
#define SMBIOS_CHST_SAFE 0x03 /* safe */
|
||||
|
||||
#define SMBIOS_CHSC_NONE 0x03 /* none */
|
||||
|
||||
struct smbios_table_type3 {
|
||||
struct smbios_structure header;
|
||||
uint8_t manufacturer; /* manufacturer string */
|
||||
uint8_t type; /* type */
|
||||
uint8_t version; /* version string */
|
||||
uint8_t serial; /* serial number string */
|
||||
uint8_t asset; /* asset tag string */
|
||||
uint8_t bustate; /* boot-up state */
|
||||
uint8_t psstate; /* power supply state */
|
||||
uint8_t tstate; /* thermal state */
|
||||
uint8_t security; /* security status */
|
||||
uint8_t uheight; /* height in 'u's */
|
||||
uint8_t cords; /* number of power cords */
|
||||
uint8_t elems; /* number of element records */
|
||||
uint8_t elemlen; /* length of records */
|
||||
uint8_t sku; /* sku number string */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Processor Information
|
||||
*/
|
||||
#define SMBIOS_PRT_CENTRAL 0x03 /* central processor */
|
||||
|
||||
#define SMBIOS_PRF_OTHER 0x01 /* other */
|
||||
|
||||
#define SMBIOS_PRS_PRESENT 0x40 /* socket is populated */
|
||||
#define SMBIOS_PRS_ENABLED 0x1 /* enabled */
|
||||
|
||||
#define SMBIOS_PRU_NONE 0x06 /* none */
|
||||
|
||||
#define SMBIOS_PFL_64B 0x04 /* 64-bit capable */
|
||||
|
||||
struct smbios_table_type4 {
|
||||
struct smbios_structure header;
|
||||
uint8_t socket; /* socket designation string */
|
||||
uint8_t type; /* processor type */
|
||||
uint8_t family; /* processor family */
|
||||
uint8_t manufacturer; /* manufacturer string */
|
||||
uint64_t cpuid; /* processor cpuid */
|
||||
uint8_t version; /* version string */
|
||||
uint8_t voltage; /* voltage */
|
||||
uint16_t clkspeed; /* ext clock speed in mhz */
|
||||
uint16_t maxspeed; /* maximum speed in mhz */
|
||||
uint16_t curspeed; /* current speed in mhz */
|
||||
uint8_t status; /* status */
|
||||
uint8_t upgrade; /* upgrade */
|
||||
uint16_t l1handle; /* l1 cache handle */
|
||||
uint16_t l2handle; /* l2 cache handle */
|
||||
uint16_t l3handle; /* l3 cache handle */
|
||||
uint8_t serial; /* serial number string */
|
||||
uint8_t asset; /* asset tag string */
|
||||
uint8_t part; /* part number string */
|
||||
uint8_t cores; /* cores per socket */
|
||||
uint8_t ecores; /* enabled cores */
|
||||
uint8_t threads; /* threads per socket */
|
||||
uint16_t cflags; /* processor characteristics */
|
||||
uint16_t family2; /* processor family 2 */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Physical Memory Array
|
||||
*/
|
||||
#define SMBIOS_MAL_SYSMB 0x03 /* system board or motherboard */
|
||||
|
||||
#define SMBIOS_MAU_SYSTEM 0x03 /* system memory */
|
||||
|
||||
#define SMBIOS_MAE_NONE 0x03 /* none */
|
||||
|
||||
struct smbios_table_type16 {
|
||||
struct smbios_structure header;
|
||||
uint8_t location; /* physical device location */
|
||||
uint8_t use; /* device functional purpose */
|
||||
uint8_t ecc; /* err detect/correct method */
|
||||
uint32_t size; /* max mem capacity in kb */
|
||||
uint16_t errhand; /* handle of error (if any) */
|
||||
uint16_t ndevs; /* num of slots or sockets */
|
||||
uint64_t xsize; /* max mem capacity in bytes */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Memory Device
|
||||
*/
|
||||
#define SMBIOS_MDFF_UNKNOWN 0x02 /* unknown */
|
||||
|
||||
#define SMBIOS_MDT_UNKNOWN 0x02 /* unknown */
|
||||
|
||||
#define SMBIOS_MDF_UNKNOWN 0x0004 /* unknown */
|
||||
|
||||
struct smbios_table_type17 {
|
||||
struct smbios_structure header;
|
||||
uint16_t arrayhand; /* handle of physl mem array */
|
||||
uint16_t errhand; /* handle of mem error data */
|
||||
uint16_t twidth; /* total width in bits */
|
||||
uint16_t dwidth; /* data width in bits */
|
||||
uint16_t size; /* size in bytes */
|
||||
uint8_t form; /* form factor */
|
||||
uint8_t set; /* set */
|
||||
uint8_t dloc; /* device locator string */
|
||||
uint8_t bloc; /* phys bank locator string */
|
||||
uint8_t type; /* memory type */
|
||||
uint16_t flags; /* memory characteristics */
|
||||
uint16_t maxspeed; /* maximum speed in mhz */
|
||||
uint8_t manufacturer; /* manufacturer string */
|
||||
uint8_t serial; /* serial number string */
|
||||
uint8_t asset; /* asset tag string */
|
||||
uint8_t part; /* part number string */
|
||||
uint8_t attributes; /* attributes */
|
||||
uint32_t xsize; /* extended size in mbs */
|
||||
uint16_t curspeed; /* current speed in mhz */
|
||||
uint16_t minvoltage; /* minimum voltage */
|
||||
uint16_t maxvoltage; /* maximum voltage */
|
||||
uint16_t curvoltage; /* configured voltage */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Memory Array Mapped Address
|
||||
*/
|
||||
struct smbios_table_type19 {
|
||||
struct smbios_structure header;
|
||||
uint32_t saddr; /* start phys addr in kb */
|
||||
uint32_t eaddr; /* end phys addr in kb */
|
||||
uint16_t arrayhand; /* physical mem array handle */
|
||||
uint8_t width; /* num of dev in row */
|
||||
uint64_t xsaddr; /* start phys addr in bytes */
|
||||
uint64_t xeaddr; /* end phys addr in bytes */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* System Boot Information
|
||||
*/
|
||||
#define SMBIOS_BOOT_NORMAL 0 /* no errors detected */
|
||||
|
||||
struct smbios_table_type32 {
|
||||
struct smbios_structure header;
|
||||
uint8_t reserved[6];
|
||||
uint8_t status; /* boot status */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* End-of-Table
|
||||
*/
|
||||
struct smbios_table_type127 {
|
||||
struct smbios_structure header;
|
||||
} __packed;
|
||||
|
||||
struct smbios_table_type0 smbios_type0_template = {
|
||||
{ SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 },
|
||||
1, /* bios vendor string */
|
||||
2, /* bios version string */
|
||||
0xF000, /* bios address segment location */
|
||||
3, /* bios release date */
|
||||
0x0, /* bios size (64k * (n + 1) is the size in bytes) */
|
||||
SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW |
|
||||
SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD,
|
||||
{ SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM },
|
||||
0x0, /* bios major release */
|
||||
0x0, /* bios minor release */
|
||||
0xff, /* embedded controller firmware major release */
|
||||
0xff /* embedded controller firmware minor release */
|
||||
};
|
||||
|
||||
const char *smbios_type0_strings[] = {
|
||||
"BHYVE", /* vendor string */
|
||||
"1.00", /* bios version string */
|
||||
"03/14/2014", /* bios release date string */
|
||||
NULL
|
||||
};
|
||||
|
||||
struct smbios_table_type1 smbios_type1_template = {
|
||||
{ SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 },
|
||||
1, /* manufacturer string */
|
||||
2, /* product string */
|
||||
3, /* version string */
|
||||
4, /* serial number string */
|
||||
{ 0 },
|
||||
SMBIOS_WAKEUP_SWITCH,
|
||||
5, /* sku string */
|
||||
6 /* family string */
|
||||
};
|
||||
|
||||
static int smbios_type1_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
const char *smbios_type1_strings[] = {
|
||||
" ", /* manufacturer string */
|
||||
"BHYVE", /* product name string */
|
||||
"1.0", /* version string */
|
||||
"None", /* serial number string */
|
||||
"None", /* sku string */
|
||||
" ", /* family name string */
|
||||
NULL
|
||||
};
|
||||
|
||||
struct smbios_table_type3 smbios_type3_template = {
|
||||
{ SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 },
|
||||
1, /* manufacturer string */
|
||||
SMBIOS_CHT_UNKNOWN,
|
||||
2, /* version string */
|
||||
3, /* serial number string */
|
||||
4, /* asset tag string */
|
||||
SMBIOS_CHST_SAFE,
|
||||
SMBIOS_CHST_SAFE,
|
||||
SMBIOS_CHST_SAFE,
|
||||
SMBIOS_CHSC_NONE,
|
||||
0, /* height in 'u's (0=enclosure height unspecified) */
|
||||
0, /* number of power cords (0=number unspecified) */
|
||||
0, /* number of contained element records */
|
||||
0, /* length of records */
|
||||
5 /* sku number string */
|
||||
};
|
||||
|
||||
const char *smbios_type3_strings[] = {
|
||||
" ", /* manufacturer string */
|
||||
"1.0", /* version string */
|
||||
"None", /* serial number string */
|
||||
"None", /* asset tag string */
|
||||
"None", /* sku number string */
|
||||
NULL
|
||||
};
|
||||
|
||||
struct smbios_table_type4 smbios_type4_template = {
|
||||
{ SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 },
|
||||
1, /* socket designation string */
|
||||
SMBIOS_PRT_CENTRAL,
|
||||
SMBIOS_PRF_OTHER,
|
||||
2, /* manufacturer string */
|
||||
0, /* cpuid */
|
||||
3, /* version string */
|
||||
0, /* voltage */
|
||||
0, /* external clock frequency in mhz (0=unknown) */
|
||||
0, /* maximum frequency in mhz (0=unknown) */
|
||||
0, /* current frequency in mhz (0=unknown) */
|
||||
SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED,
|
||||
SMBIOS_PRU_NONE,
|
||||
-1, /* l1 cache handle */
|
||||
-1, /* l2 cache handle */
|
||||
-1, /* l3 cache handle */
|
||||
4, /* serial number string */
|
||||
5, /* asset tag string */
|
||||
6, /* part number string */
|
||||
0, /* cores per socket (0=unknown) */
|
||||
0, /* enabled cores per socket (0=unknown) */
|
||||
0, /* threads per socket (0=unknown) */
|
||||
SMBIOS_PFL_64B,
|
||||
SMBIOS_PRF_OTHER
|
||||
};
|
||||
|
||||
const char *smbios_type4_strings[] = {
|
||||
" ", /* socket designation string */
|
||||
" ", /* manufacturer string */
|
||||
" ", /* version string */
|
||||
"None", /* serial number string */
|
||||
"None", /* asset tag string */
|
||||
"None", /* part number string */
|
||||
NULL
|
||||
};
|
||||
|
||||
static int smbios_type4_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
struct smbios_table_type16 smbios_type16_template = {
|
||||
{ SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16), 0 },
|
||||
SMBIOS_MAL_SYSMB,
|
||||
SMBIOS_MAU_SYSTEM,
|
||||
SMBIOS_MAE_NONE,
|
||||
0x80000000, /* max mem capacity in kb (0x80000000=use extended) */
|
||||
-1, /* handle of error (if any) */
|
||||
0, /* number of slots or sockets (TBD) */
|
||||
0 /* extended maximum memory capacity in bytes (TBD) */
|
||||
};
|
||||
|
||||
static int smbios_type16_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
struct smbios_table_type17 smbios_type17_template = {
|
||||
{ SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17), 0 },
|
||||
-1, /* handle of physical memory array */
|
||||
-1, /* handle of memory error data */
|
||||
64, /* total width in bits including ecc */
|
||||
64, /* data width in bits */
|
||||
0x7fff, /* size in bytes (0x7fff=use extended)*/
|
||||
SMBIOS_MDFF_UNKNOWN,
|
||||
0, /* set (0x00=none, 0xff=unknown) */
|
||||
1, /* device locator string */
|
||||
2, /* physical bank locator string */
|
||||
SMBIOS_MDT_UNKNOWN,
|
||||
SMBIOS_MDF_UNKNOWN,
|
||||
0, /* maximum memory speed in mhz (0=unknown) */
|
||||
3, /* manufacturer string */
|
||||
4, /* serial number string */
|
||||
5, /* asset tag string */
|
||||
6, /* part number string */
|
||||
0, /* attributes (0=unknown rank information) */
|
||||
0, /* extended size in mb (TBD) */
|
||||
0, /* current speed in mhz (0=unknown) */
|
||||
0, /* minimum voltage in mv (0=unknown) */
|
||||
0, /* maximum voltage in mv (0=unknown) */
|
||||
0 /* configured voltage in mv (0=unknown) */
|
||||
};
|
||||
|
||||
const char *smbios_type17_strings[] = {
|
||||
" ", /* device locator string */
|
||||
" ", /* physical bank locator string */
|
||||
" ", /* manufacturer string */
|
||||
"None", /* serial number string */
|
||||
"None", /* asset tag string */
|
||||
"None", /* part number string */
|
||||
NULL
|
||||
};
|
||||
|
||||
static int smbios_type17_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
struct smbios_table_type19 smbios_type19_template = {
|
||||
{ SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19), 0 },
|
||||
0xffffffff, /* starting phys addr in kb (0xffffffff=use ext) */
|
||||
0xffffffff, /* ending phys addr in kb (0xffffffff=use ext) */
|
||||
-1, /* physical memory array handle */
|
||||
1, /* number of devices that form a row */
|
||||
0, /* extended starting phys addr in bytes (TDB) */
|
||||
0 /* extended ending phys addr in bytes (TDB) */
|
||||
};
|
||||
|
||||
static int smbios_type19_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
struct smbios_table_type32 smbios_type32_template = {
|
||||
{ SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32), 0 },
|
||||
{ 0, 0, 0, 0, 0, 0 },
|
||||
SMBIOS_BOOT_NORMAL
|
||||
};
|
||||
|
||||
struct smbios_table_type127 smbios_type127_template = {
|
||||
{ SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127), 0 }
|
||||
};
|
||||
|
||||
static int smbios_generic_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size);
|
||||
|
||||
static struct smbios_template_entry smbios_template[] = {
|
||||
{ (struct smbios_structure *)&smbios_type0_template,
|
||||
smbios_type0_strings,
|
||||
smbios_generic_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type1_template,
|
||||
smbios_type1_strings,
|
||||
smbios_type1_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type3_template,
|
||||
smbios_type3_strings,
|
||||
smbios_generic_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type4_template,
|
||||
smbios_type4_strings,
|
||||
smbios_type4_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type16_template,
|
||||
NULL,
|
||||
smbios_type16_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type17_template,
|
||||
smbios_type17_strings,
|
||||
smbios_type17_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type19_template,
|
||||
NULL,
|
||||
smbios_type19_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type32_template,
|
||||
NULL,
|
||||
smbios_generic_initializer },
|
||||
{ (struct smbios_structure *)&smbios_type127_template,
|
||||
NULL,
|
||||
smbios_generic_initializer },
|
||||
{ NULL,NULL, NULL }
|
||||
};
|
||||
|
||||
static uint64_t guest_lomem, guest_himem;
|
||||
static uint16_t type16_handle;
|
||||
|
||||
static int
|
||||
smbios_generic_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
struct smbios_structure *entry;
|
||||
|
||||
memcpy(curaddr, template_entry, template_entry->length);
|
||||
entry = (struct smbios_structure *)curaddr;
|
||||
entry->handle = *n + 1;
|
||||
curaddr += entry->length;
|
||||
if (template_strings != NULL) {
|
||||
int i;
|
||||
|
||||
for (i = 0; template_strings[i] != NULL; i++) {
|
||||
const char *string;
|
||||
int len;
|
||||
|
||||
string = template_strings[i];
|
||||
len = strlen(string) + 1;
|
||||
memcpy(curaddr, string, len);
|
||||
curaddr += len;
|
||||
}
|
||||
*curaddr = '\0';
|
||||
curaddr++;
|
||||
} else {
|
||||
/* Minimum string section is double nul */
|
||||
*curaddr = '\0';
|
||||
curaddr++;
|
||||
*curaddr = '\0';
|
||||
curaddr++;
|
||||
}
|
||||
(*n)++;
|
||||
*endaddr = curaddr;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
smbios_type1_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
struct smbios_table_type1 *type1;
|
||||
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type1 = (struct smbios_table_type1 *)curaddr;
|
||||
|
||||
if (guest_uuid_str != NULL) {
|
||||
uuid_t uuid;
|
||||
uint32_t status;
|
||||
|
||||
uuid_from_string(guest_uuid_str, &uuid, &status);
|
||||
if (status != uuid_s_ok)
|
||||
return (-1);
|
||||
|
||||
uuid_enc_le(&type1->uuid, &uuid);
|
||||
} else {
|
||||
MD5_CTX mdctx;
|
||||
u_char digest[16];
|
||||
char hostname[MAXHOSTNAMELEN];
|
||||
|
||||
/*
|
||||
* Universally unique and yet reproducible are an
|
||||
* oxymoron, however reproducible is desirable in
|
||||
* this case.
|
||||
*/
|
||||
if (gethostname(hostname, sizeof(hostname)))
|
||||
return (-1);
|
||||
|
||||
MD5Init(&mdctx);
|
||||
MD5Update(&mdctx, vmname, strlen(vmname));
|
||||
MD5Update(&mdctx, hostname, sizeof(hostname));
|
||||
MD5Final(digest, &mdctx);
|
||||
|
||||
/*
|
||||
* Set the variant and version number.
|
||||
*/
|
||||
digest[6] &= 0x0F;
|
||||
digest[6] |= 0x30; /* version 3 */
|
||||
digest[8] &= 0x3F;
|
||||
digest[8] |= 0x80;
|
||||
|
||||
memcpy(&type1->uuid, digest, sizeof (digest));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
smbios_type4_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < guest_ncpus; i++) {
|
||||
struct smbios_table_type4 *type4;
|
||||
char *p;
|
||||
int nstrings, len;
|
||||
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type4 = (struct smbios_table_type4 *)curaddr;
|
||||
p = curaddr + sizeof (struct smbios_table_type4);
|
||||
nstrings = 0;
|
||||
while (p < *endaddr - 1) {
|
||||
if (*p++ == '\0')
|
||||
nstrings++;
|
||||
}
|
||||
len = sprintf(*endaddr - 1, "CPU #%d", i) + 1;
|
||||
*endaddr += len - 1;
|
||||
*(*endaddr) = '\0';
|
||||
(*endaddr)++;
|
||||
type4->socket = nstrings + 1;
|
||||
curaddr = *endaddr;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
smbios_type16_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
struct smbios_table_type16 *type16;
|
||||
|
||||
type16_handle = *n;
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type16 = (struct smbios_table_type16 *)curaddr;
|
||||
type16->xsize = guest_lomem + guest_himem;
|
||||
type16->ndevs = guest_himem > 0 ? 2 : 1;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
smbios_type17_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
struct smbios_table_type17 *type17;
|
||||
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type17 = (struct smbios_table_type17 *)curaddr;
|
||||
type17->arrayhand = type16_handle;
|
||||
type17->xsize = guest_lomem;
|
||||
|
||||
if (guest_himem > 0) {
|
||||
curaddr = *endaddr;
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type17 = (struct smbios_table_type17 *)curaddr;
|
||||
type17->arrayhand = type16_handle;
|
||||
type17->xsize = guest_himem;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
smbios_type19_initializer(struct smbios_structure *template_entry,
|
||||
const char **template_strings, char *curaddr, char **endaddr,
|
||||
uint16_t *n, uint16_t *size)
|
||||
{
|
||||
struct smbios_table_type19 *type19;
|
||||
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type19 = (struct smbios_table_type19 *)curaddr;
|
||||
type19->arrayhand = type16_handle;
|
||||
type19->xsaddr = 0;
|
||||
type19->xeaddr = guest_lomem;
|
||||
|
||||
if (guest_himem > 0) {
|
||||
curaddr = *endaddr;
|
||||
smbios_generic_initializer(template_entry, template_strings,
|
||||
curaddr, endaddr, n, size);
|
||||
type19 = (struct smbios_table_type19 *)curaddr;
|
||||
type19->arrayhand = type16_handle;
|
||||
type19->xsaddr = 4*GB;
|
||||
type19->xeaddr = guest_himem;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr)
|
||||
{
|
||||
memset(smbios_ep, 0, sizeof(*smbios_ep));
|
||||
memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR,
|
||||
SMBIOS_ENTRY_EANCHORLEN);
|
||||
smbios_ep->eplen = 0x1F;
|
||||
assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen);
|
||||
smbios_ep->major = 2;
|
||||
smbios_ep->minor = 6;
|
||||
smbios_ep->revision = 0;
|
||||
memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR,
|
||||
SMBIOS_ENTRY_IANCHORLEN);
|
||||
smbios_ep->staddr = staddr;
|
||||
smbios_ep->bcdrev = 0x24;
|
||||
}
|
||||
|
||||
static void
|
||||
smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len,
|
||||
uint16_t num, uint16_t maxssize)
|
||||
{
|
||||
uint8_t checksum;
|
||||
int i;
|
||||
|
||||
smbios_ep->maxssize = maxssize;
|
||||
smbios_ep->stlen = len;
|
||||
smbios_ep->stnum = num;
|
||||
|
||||
checksum = 0;
|
||||
for (i = 0x10; i < 0x1f; i++) {
|
||||
checksum -= ((uint8_t *)smbios_ep)[i];
|
||||
}
|
||||
smbios_ep->ichecksum = checksum;
|
||||
|
||||
checksum = 0;
|
||||
for (i = 0; i < 0x1f; i++) {
|
||||
checksum -= ((uint8_t *)smbios_ep)[i];
|
||||
}
|
||||
smbios_ep->echecksum = checksum;
|
||||
}
|
||||
|
||||
int
|
||||
smbios_build(struct vmctx *ctx)
|
||||
{
|
||||
struct smbios_entry_point *smbios_ep;
|
||||
uint16_t n;
|
||||
uint16_t maxssize;
|
||||
char *curaddr, *startaddr, *ststartaddr;
|
||||
int i;
|
||||
int err;
|
||||
|
||||
guest_lomem = vm_get_lowmem_size(ctx);
|
||||
guest_himem = vm_get_highmem_size(ctx);
|
||||
|
||||
startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH);
|
||||
if (startaddr == NULL) {
|
||||
fprintf(stderr, "smbios table requires mapped mem\n");
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
curaddr = startaddr;
|
||||
|
||||
smbios_ep = (struct smbios_entry_point *)curaddr;
|
||||
smbios_ep_initializer(smbios_ep, SMBIOS_BASE +
|
||||
sizeof(struct smbios_entry_point));
|
||||
curaddr += sizeof(struct smbios_entry_point);
|
||||
ststartaddr = curaddr;
|
||||
|
||||
n = 0;
|
||||
maxssize = 0;
|
||||
for (i = 0; smbios_template[i].entry != NULL; i++) {
|
||||
struct smbios_structure *entry;
|
||||
const char **strings;
|
||||
initializer_func_t initializer;
|
||||
char *endaddr;
|
||||
uint16_t size;
|
||||
|
||||
entry = smbios_template[i].entry;
|
||||
strings = smbios_template[i].strings;
|
||||
initializer = smbios_template[i].initializer;
|
||||
|
||||
err = (*initializer)(entry, strings, curaddr, &endaddr,
|
||||
&n, &size);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
if (size > maxssize)
|
||||
maxssize = size;
|
||||
|
||||
curaddr = endaddr;
|
||||
}
|
||||
|
||||
assert(curaddr - startaddr < SMBIOS_MAX_LENGTH);
|
||||
smbios_ep_finalizer(smbios_ep, curaddr - ststartaddr, n, maxssize);
|
||||
|
||||
return (0);
|
||||
}
|
36
smbiostbl.h
Normal file
36
smbiostbl.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SMBIOSTBL_H_
|
||||
#define _SMBIOSTBL_H_
|
||||
|
||||
struct vmctx;
|
||||
|
||||
int smbios_build(struct vmctx *ctx);
|
||||
|
||||
#endif /* _SMBIOSTBL_H_ */
|
104
spinup_ap.c
Normal file
104
spinup_ap.c
Normal file
@ -0,0 +1,104 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "spinup_ap.h"
|
||||
|
||||
static void
|
||||
spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip)
|
||||
{
|
||||
int vector, error;
|
||||
uint16_t cs;
|
||||
uint64_t desc_base;
|
||||
uint32_t desc_limit, desc_access;
|
||||
|
||||
vector = *rip >> PAGE_SHIFT;
|
||||
*rip = 0;
|
||||
|
||||
/*
|
||||
* Update the %cs and %rip of the guest so that it starts
|
||||
* executing real mode code at at 'vector << 12'.
|
||||
*/
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
|
||||
assert(error == 0);
|
||||
|
||||
error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
|
||||
&desc_limit, &desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
desc_base = vector << PAGE_SHIFT;
|
||||
error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
|
||||
desc_base, desc_limit, desc_access);
|
||||
assert(error == 0);
|
||||
|
||||
cs = (vector << PAGE_SHIFT) >> 4;
|
||||
error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
int
|
||||
spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
|
||||
{
|
||||
int error;
|
||||
|
||||
assert(newcpu != 0);
|
||||
assert(newcpu < guest_ncpus);
|
||||
|
||||
error = vcpu_reset(ctx, newcpu);
|
||||
assert(error == 0);
|
||||
|
||||
fbsdrun_set_capabilities(ctx, newcpu);
|
||||
|
||||
/*
|
||||
* Enable the 'unrestricted guest' mode for 'newcpu'.
|
||||
*
|
||||
* Set up the processor state in power-on 16-bit mode, with the CS:IP
|
||||
* init'd to the specified low-mem 4K page.
|
||||
*/
|
||||
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
|
||||
assert(error == 0);
|
||||
|
||||
spinup_ap_realmode(ctx, newcpu, &rip);
|
||||
|
||||
fbsdrun_addcpu(ctx, vcpu, newcpu, rip);
|
||||
|
||||
return (newcpu);
|
||||
}
|
34
spinup_ap.h
Normal file
34
spinup_ap.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SPINUP_AP_H_
|
||||
#define _SPINUP_AP_H_
|
||||
|
||||
int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip);
|
||||
|
||||
#endif
|
939
task_switch.c
Normal file
939
task_switch.c
Normal file
@ -0,0 +1,939 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/_iovec.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <x86/psl.h>
|
||||
#include <x86/segments.h>
|
||||
#include <x86/specialreg.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
|
||||
/*
|
||||
* Using 'struct i386tss' is tempting but causes myriad sign extension
|
||||
* issues because all of its fields are defined as signed integers.
|
||||
*/
|
||||
struct tss32 {
|
||||
uint16_t tss_link;
|
||||
uint16_t rsvd1;
|
||||
uint32_t tss_esp0;
|
||||
uint16_t tss_ss0;
|
||||
uint16_t rsvd2;
|
||||
uint32_t tss_esp1;
|
||||
uint16_t tss_ss1;
|
||||
uint16_t rsvd3;
|
||||
uint32_t tss_esp2;
|
||||
uint16_t tss_ss2;
|
||||
uint16_t rsvd4;
|
||||
uint32_t tss_cr3;
|
||||
uint32_t tss_eip;
|
||||
uint32_t tss_eflags;
|
||||
uint32_t tss_eax;
|
||||
uint32_t tss_ecx;
|
||||
uint32_t tss_edx;
|
||||
uint32_t tss_ebx;
|
||||
uint32_t tss_esp;
|
||||
uint32_t tss_ebp;
|
||||
uint32_t tss_esi;
|
||||
uint32_t tss_edi;
|
||||
uint16_t tss_es;
|
||||
uint16_t rsvd5;
|
||||
uint16_t tss_cs;
|
||||
uint16_t rsvd6;
|
||||
uint16_t tss_ss;
|
||||
uint16_t rsvd7;
|
||||
uint16_t tss_ds;
|
||||
uint16_t rsvd8;
|
||||
uint16_t tss_fs;
|
||||
uint16_t rsvd9;
|
||||
uint16_t tss_gs;
|
||||
uint16_t rsvd10;
|
||||
uint16_t tss_ldt;
|
||||
uint16_t rsvd11;
|
||||
uint16_t tss_trap;
|
||||
uint16_t tss_iomap;
|
||||
};
|
||||
CTASSERT(sizeof(struct tss32) == 104);
|
||||
|
||||
#define SEL_START(sel) (((sel) & ~0x7))
|
||||
#define SEL_LIMIT(sel) (((sel) | 0x7))
|
||||
#define TSS_BUSY(type) (((type) & 0x2) != 0)
|
||||
|
||||
static uint64_t
|
||||
GETREG(struct vmctx *ctx, int vcpu, int reg)
|
||||
{
|
||||
uint64_t val;
|
||||
int error;
|
||||
|
||||
error = vm_get_register(ctx, vcpu, reg, &val);
|
||||
assert(error == 0);
|
||||
return (val);
|
||||
}
|
||||
|
||||
static void
|
||||
SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = vm_set_register(ctx, vcpu, reg, val);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static struct seg_desc
|
||||
usd_to_seg_desc(struct user_segment_descriptor *usd)
|
||||
{
|
||||
struct seg_desc seg_desc;
|
||||
|
||||
seg_desc.base = (u_int)USD_GETBASE(usd);
|
||||
if (usd->sd_gran)
|
||||
seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
|
||||
else
|
||||
seg_desc.limit = (u_int)USD_GETLIMIT(usd);
|
||||
seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
|
||||
seg_desc.access |= usd->sd_xx << 12;
|
||||
seg_desc.access |= usd->sd_def32 << 14;
|
||||
seg_desc.access |= usd->sd_gran << 15;
|
||||
|
||||
return (seg_desc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inject an exception with an error code that is a segment selector.
|
||||
* The format of the error code is described in section 6.13, "Error Code",
|
||||
* Intel SDM volume 3.
|
||||
*
|
||||
* Bit 0 (EXT) denotes whether the exception occurred during delivery
|
||||
* of an external event like an interrupt.
|
||||
*
|
||||
* Bit 1 (IDT) indicates whether the selector points to a gate descriptor
|
||||
* in the IDT.
|
||||
*
|
||||
* Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
|
||||
*/
|
||||
static void
|
||||
sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext)
|
||||
{
|
||||
/*
|
||||
* Bit 2 from the selector is retained as-is in the error code.
|
||||
*
|
||||
* Bit 1 can be safely cleared because none of the selectors
|
||||
* encountered during task switch emulation refer to a task
|
||||
* gate in the IDT.
|
||||
*
|
||||
* Bit 0 is set depending on the value of 'ext'.
|
||||
*/
|
||||
sel &= ~0x3;
|
||||
if (ext)
|
||||
sel |= 0x1;
|
||||
vm_inject_fault(ctx, vcpu, vector, 1, sel);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 if the selector 'sel' in within the limits of the GDT/LDT
|
||||
* and non-zero otherwise.
|
||||
*/
|
||||
static int
|
||||
desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
|
||||
{
|
||||
uint64_t base;
|
||||
uint32_t limit, access;
|
||||
int error, reg;
|
||||
|
||||
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
|
||||
error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
|
||||
assert(error == 0);
|
||||
|
||||
if (reg == VM_REG_GUEST_LDTR) {
|
||||
if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (limit < SEL_LIMIT(sel))
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
|
||||
* by the selector 'sel'.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
* Returns 1 if an exception was injected into the guest.
|
||||
* Returns -1 otherwise.
|
||||
*/
|
||||
static int
|
||||
desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc, bool doread,
|
||||
int *faultptr)
|
||||
{
|
||||
struct iovec iov[2];
|
||||
uint64_t base;
|
||||
uint32_t limit, access;
|
||||
int error, reg;
|
||||
|
||||
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
|
||||
error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
|
||||
assert(error == 0);
|
||||
assert(limit >= SEL_LIMIT(sel));
|
||||
|
||||
error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
|
||||
sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
|
||||
if (doread)
|
||||
vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
|
||||
else
|
||||
vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
|
||||
{
|
||||
return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
|
||||
{
|
||||
return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the TSS descriptor referenced by 'sel' into 'desc'.
|
||||
*
|
||||
* Returns 0 on success.
|
||||
* Returns 1 if an exception was injected into the guest.
|
||||
* Returns -1 otherwise.
|
||||
*/
|
||||
static int
|
||||
read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
|
||||
{
|
||||
struct vm_guest_paging sup_paging;
|
||||
int error;
|
||||
|
||||
assert(!ISLDT(sel));
|
||||
assert(IDXSEL(sel) != 0);
|
||||
|
||||
/* Fetch the new TSS descriptor */
|
||||
if (desc_table_limit_check(ctx, vcpu, sel)) {
|
||||
if (ts->reason == TSR_IRET)
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
else
|
||||
sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
sup_paging = ts->paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static bool
|
||||
code_desc(int sd_type)
|
||||
{
|
||||
/* code descriptor */
|
||||
return ((sd_type & 0x18) == 0x18);
|
||||
}
|
||||
|
||||
static bool
|
||||
stack_desc(int sd_type)
|
||||
{
|
||||
/* writable data descriptor */
|
||||
return ((sd_type & 0x1A) == 0x12);
|
||||
}
|
||||
|
||||
static bool
|
||||
data_desc(int sd_type)
|
||||
{
|
||||
/* data descriptor or a readable code descriptor */
|
||||
return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
|
||||
}
|
||||
|
||||
static bool
|
||||
ldt_desc(int sd_type)
|
||||
{
|
||||
|
||||
return (sd_type == SDT_SYSLDT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate the descriptor 'seg_desc' associated with 'segment'.
|
||||
*/
|
||||
static int
|
||||
validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
int segment, struct seg_desc *seg_desc, int *faultptr)
|
||||
{
|
||||
struct vm_guest_paging sup_paging;
|
||||
struct user_segment_descriptor usd;
|
||||
int error, idtvec;
|
||||
int cpl, dpl, rpl;
|
||||
uint16_t sel, cs;
|
||||
bool ldtseg, codeseg, stackseg, dataseg, conforming;
|
||||
|
||||
ldtseg = codeseg = stackseg = dataseg = false;
|
||||
switch (segment) {
|
||||
case VM_REG_GUEST_LDTR:
|
||||
ldtseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_CS:
|
||||
codeseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_SS:
|
||||
stackseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_DS:
|
||||
case VM_REG_GUEST_ES:
|
||||
case VM_REG_GUEST_FS:
|
||||
case VM_REG_GUEST_GS:
|
||||
dataseg = true;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/* Get the segment selector */
|
||||
sel = GETREG(ctx, vcpu, segment);
|
||||
|
||||
/* LDT selector must point into the GDT */
|
||||
if (ldtseg && ISLDT(sel)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/* Descriptor table limit check */
|
||||
if (desc_table_limit_check(ctx, vcpu, sel)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/* NULL selector */
|
||||
if (IDXSEL(sel) == 0) {
|
||||
/* Code and stack segment selectors cannot be NULL */
|
||||
if (codeseg || stackseg) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
seg_desc->base = 0;
|
||||
seg_desc->limit = 0;
|
||||
seg_desc->access = 0x10000; /* unusable */
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Read the descriptor from the GDT/LDT */
|
||||
sup_paging = ts->paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
|
||||
/* Verify that the descriptor type is compatible with the segment */
|
||||
if ((ldtseg && !ldt_desc(usd.sd_type)) ||
|
||||
(codeseg && !code_desc(usd.sd_type)) ||
|
||||
(dataseg && !data_desc(usd.sd_type)) ||
|
||||
(stackseg && !stack_desc(usd.sd_type))) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/* Segment must be marked present */
|
||||
if (!usd.sd_p) {
|
||||
if (ldtseg)
|
||||
idtvec = IDT_TS;
|
||||
else if (stackseg)
|
||||
idtvec = IDT_SS;
|
||||
else
|
||||
idtvec = IDT_NP;
|
||||
sel_exception(ctx, vcpu, idtvec, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
|
||||
cpl = cs & SEL_RPL_MASK;
|
||||
rpl = sel & SEL_RPL_MASK;
|
||||
dpl = usd.sd_dpl;
|
||||
|
||||
if (stackseg && (rpl != cpl || dpl != cpl)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (codeseg) {
|
||||
conforming = (usd.sd_type & 0x4) ? true : false;
|
||||
if ((conforming && (cpl < dpl)) ||
|
||||
(!conforming && (cpl != dpl))) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
||||
if (dataseg) {
|
||||
/*
|
||||
* A data segment is always non-conforming except when it's
|
||||
* descriptor is a readable, conforming code segment.
|
||||
*/
|
||||
if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
|
||||
conforming = true;
|
||||
else
|
||||
conforming = false;
|
||||
|
||||
if (!conforming && (rpl > dpl || cpl > dpl)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
*seg_desc = usd_to_seg_desc(&usd);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch,
|
||||
uint32_t eip, struct tss32 *tss, struct iovec *iov)
|
||||
{
|
||||
|
||||
/* General purpose registers */
|
||||
tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX);
|
||||
tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX);
|
||||
tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX);
|
||||
tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX);
|
||||
tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
|
||||
tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP);
|
||||
tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI);
|
||||
tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI);
|
||||
|
||||
/* Segment selectors */
|
||||
tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES);
|
||||
tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
|
||||
tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
|
||||
tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS);
|
||||
tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS);
|
||||
tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS);
|
||||
|
||||
/* eflags and eip */
|
||||
tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
|
||||
if (task_switch->reason == TSR_IRET)
|
||||
tss->tss_eflags &= ~PSL_NT;
|
||||
tss->tss_eip = eip;
|
||||
|
||||
/* Copy updated old TSS into guest memory */
|
||||
vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32));
|
||||
}
|
||||
|
||||
static void
|
||||
update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the vcpu registers to reflect the state of the new task.
|
||||
*/
|
||||
static int
|
||||
tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
|
||||
{
|
||||
struct seg_desc seg_desc, seg_desc2;
|
||||
uint64_t *pdpte, maxphyaddr, reserved;
|
||||
uint32_t eflags;
|
||||
int error, i;
|
||||
bool nested;
|
||||
|
||||
nested = false;
|
||||
if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
|
||||
tss->tss_link = ot_sel;
|
||||
nested = true;
|
||||
}
|
||||
|
||||
eflags = tss->tss_eflags;
|
||||
if (nested)
|
||||
eflags |= PSL_NT;
|
||||
|
||||
/* LDTR */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
|
||||
|
||||
/* PBDR */
|
||||
if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
|
||||
if (ts->paging.paging_mode == PAGING_MODE_PAE) {
|
||||
/*
|
||||
* XXX Assuming 36-bit MAXPHYADDR.
|
||||
*/
|
||||
maxphyaddr = (1UL << 36) - 1;
|
||||
pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
|
||||
for (i = 0; i < 4; i++) {
|
||||
/* Check reserved bits if the PDPTE is valid */
|
||||
if (!(pdpte[i] & 0x1))
|
||||
continue;
|
||||
/*
|
||||
* Bits 2:1, 8:5 and bits above the processor's
|
||||
* maximum physical address are reserved.
|
||||
*/
|
||||
reserved = ~maxphyaddr | 0x1E6;
|
||||
if (pdpte[i] & reserved) {
|
||||
vm_inject_gp(ctx, vcpu);
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
|
||||
}
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
|
||||
ts->paging.cr3 = tss->tss_cr3;
|
||||
}
|
||||
|
||||
/* eflags and eip */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
|
||||
|
||||
/* General purpose registers */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
|
||||
|
||||
/* Segment selectors */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs);
|
||||
|
||||
/*
|
||||
* If this is a nested task then write out the new TSS to update
|
||||
* the previous link field.
|
||||
*/
|
||||
if (nested)
|
||||
vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
|
||||
|
||||
/* Validate segment descriptors */
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
|
||||
|
||||
/*
|
||||
* Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
|
||||
*
|
||||
* The SS and CS attribute checks on VM-entry are inter-dependent so
|
||||
* we need to make sure that both segments are valid before updating
|
||||
* either of them. This ensures that the VMCS state can pass the
|
||||
* VM-entry checks so the guest can handle any exception injected
|
||||
* during task switch emulation.
|
||||
*/
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
|
||||
ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
|
||||
faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Push an error code on the stack of the new task. This is needed if the
|
||||
* task switch was triggered by a hardware exception that causes an error
|
||||
* code to be saved (e.g. #PF).
|
||||
*/
|
||||
static int
|
||||
push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
int task_type, uint32_t errcode, int *faultptr)
|
||||
{
|
||||
struct iovec iov[2];
|
||||
struct seg_desc seg_desc;
|
||||
int stacksize, bytes, error;
|
||||
uint64_t gla, cr0, rflags;
|
||||
uint32_t esp;
|
||||
uint16_t stacksel;
|
||||
|
||||
*faultptr = 0;
|
||||
|
||||
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
|
||||
rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
|
||||
stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
|
||||
|
||||
error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
|
||||
&seg_desc.limit, &seg_desc.access);
|
||||
assert(error == 0);
|
||||
|
||||
/*
|
||||
* Section "Error Code" in the Intel SDM vol 3: the error code is
|
||||
* pushed on the stack as a doubleword or word (depending on the
|
||||
* default interrupt, trap or task gate size).
|
||||
*/
|
||||
if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
|
||||
bytes = 4;
|
||||
else
|
||||
bytes = 2;
|
||||
|
||||
/*
|
||||
* PUSH instruction from Intel SDM vol 2: the 'B' flag in the
|
||||
* stack-segment descriptor determines the size of the stack
|
||||
* pointer outside of 64-bit mode.
|
||||
*/
|
||||
if (SEG_DESC_DEF32(seg_desc.access))
|
||||
stacksize = 4;
|
||||
else
|
||||
stacksize = 2;
|
||||
|
||||
esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
|
||||
esp -= bytes;
|
||||
|
||||
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
|
||||
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
|
||||
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
|
||||
*faultptr = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
|
||||
vm_inject_ac(ctx, vcpu, 1);
|
||||
*faultptr = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
|
||||
iov, nitems(iov), faultptr);
|
||||
if (error || *faultptr)
|
||||
return (error);
|
||||
|
||||
vm_copyout(ctx, vcpu, &errcode, iov, bytes);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evaluate return value from helper functions and potentially return to
|
||||
* the VM run loop.
|
||||
*/
|
||||
#define CHKERR(error,fault) \
|
||||
do { \
|
||||
assert((error == 0) || (error == EFAULT)); \
|
||||
if (error) \
|
||||
return (VMEXIT_ABORT); \
|
||||
else if (fault) \
|
||||
return (VMEXIT_CONTINUE); \
|
||||
} while (0)
|
||||
|
||||
int
|
||||
vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
struct seg_desc nt;
|
||||
struct tss32 oldtss, newtss;
|
||||
struct vm_task_switch *task_switch;
|
||||
struct vm_guest_paging *paging, sup_paging;
|
||||
struct user_segment_descriptor nt_desc, ot_desc;
|
||||
struct iovec nt_iov[2], ot_iov[2];
|
||||
uint64_t cr0, ot_base;
|
||||
uint32_t eip, ot_lim, access;
|
||||
int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
|
||||
enum task_switch_reason reason;
|
||||
uint16_t nt_sel, ot_sel;
|
||||
|
||||
task_switch = &vmexit->u.task_switch;
|
||||
nt_sel = task_switch->tsssel;
|
||||
ext = vmexit->u.task_switch.ext;
|
||||
reason = vmexit->u.task_switch.reason;
|
||||
paging = &vmexit->u.task_switch.paging;
|
||||
vcpu = *pvcpu;
|
||||
|
||||
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
|
||||
|
||||
/*
|
||||
* Calculate the instruction pointer to store in the old TSS.
|
||||
*/
|
||||
eip = vmexit->rip + vmexit->inst_length;
|
||||
|
||||
/*
|
||||
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
|
||||
* The following page table accesses are implicitly supervisor mode:
|
||||
* - accesses to GDT or LDT to load segment descriptors
|
||||
* - accesses to the task state segment during task switch
|
||||
*/
|
||||
sup_paging = *paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
|
||||
/* Fetch the new TSS descriptor */
|
||||
error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
|
||||
&fault);
|
||||
CHKERR(error, fault);
|
||||
|
||||
nt = usd_to_seg_desc(&nt_desc);
|
||||
|
||||
/* Verify the type of the new TSS */
|
||||
nt_type = SEG_DESC_TYPE(nt.access);
|
||||
if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
|
||||
nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* TSS descriptor must have present bit set */
|
||||
if (!SEG_DESC_PRESENT(nt.access)) {
|
||||
sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TSS must have a minimum length of 104 bytes for a 32-bit TSS and
|
||||
* 44 bytes for a 16-bit TSS.
|
||||
*/
|
||||
if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
|
||||
minlimit = 104 - 1;
|
||||
else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
|
||||
minlimit = 44 - 1;
|
||||
else
|
||||
minlimit = 0;
|
||||
|
||||
assert(minlimit > 0);
|
||||
if (nt.limit < minlimit) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* TSS must be busy if task switch is due to IRET */
|
||||
if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TSS must be available (not busy) if task switch reason is
|
||||
* CALL, JMP, exception or interrupt.
|
||||
*/
|
||||
if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
|
||||
sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Fetch the new TSS */
|
||||
error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
|
||||
PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
|
||||
CHKERR(error, fault);
|
||||
vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
|
||||
|
||||
/* Get the old TSS selector from the guest's task register */
|
||||
ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
|
||||
if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
|
||||
/*
|
||||
* This might happen if a task switch was attempted without
|
||||
* ever loading the task register with LTR. In this case the
|
||||
* TR would contain the values from power-on:
|
||||
* (sel = 0, base = 0, limit = 0xffff).
|
||||
*/
|
||||
sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Get the old TSS base and limit from the guest's task register */
|
||||
error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
|
||||
&access);
|
||||
assert(error == 0);
|
||||
assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
|
||||
ot_type = SEG_DESC_TYPE(access);
|
||||
assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
|
||||
|
||||
/* Fetch the old TSS descriptor */
|
||||
error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
|
||||
&fault);
|
||||
CHKERR(error, fault);
|
||||
|
||||
/* Get the old TSS */
|
||||
error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
|
||||
PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
|
||||
CHKERR(error, fault);
|
||||
vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
|
||||
|
||||
/*
|
||||
* Clear the busy bit in the old TSS descriptor if the task switch
|
||||
* due to an IRET or JMP instruction.
|
||||
*/
|
||||
if (reason == TSR_IRET || reason == TSR_JMP) {
|
||||
ot_desc.sd_type &= ~0x2;
|
||||
error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
|
||||
&ot_desc, &fault);
|
||||
CHKERR(error, fault);
|
||||
}
|
||||
|
||||
if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
|
||||
fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
/* Save processor state in old TSS */
|
||||
tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
|
||||
|
||||
/*
|
||||
* If the task switch was triggered for any reason other than IRET
|
||||
* then set the busy bit in the new TSS descriptor.
|
||||
*/
|
||||
if (reason != TSR_IRET) {
|
||||
nt_desc.sd_type |= 0x2;
|
||||
error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
|
||||
&nt_desc, &fault);
|
||||
CHKERR(error, fault);
|
||||
}
|
||||
|
||||
/* Update task register to point at the new TSS */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
|
||||
|
||||
/* Update the hidden descriptor state of the task register */
|
||||
nt = usd_to_seg_desc(&nt_desc);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);
|
||||
|
||||
/* Set CR0.TS */
|
||||
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
|
||||
|
||||
/*
|
||||
* We are now committed to the task switch. Any exceptions encountered
|
||||
* after this point will be handled in the context of the new task and
|
||||
* the saved instruction pointer will belong to the new task.
|
||||
*/
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
|
||||
assert(error == 0);
|
||||
|
||||
/* Load processor state from new TSS */
|
||||
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
|
||||
&fault);
|
||||
CHKERR(error, fault);
|
||||
|
||||
/*
|
||||
* Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
|
||||
* caused an error code to be generated, this error code is copied
|
||||
* to the stack of the new task.
|
||||
*/
|
||||
if (task_switch->errcode_valid) {
|
||||
assert(task_switch->ext);
|
||||
assert(task_switch->reason == TSR_IDT_GATE);
|
||||
error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
|
||||
task_switch->errcode, &fault);
|
||||
CHKERR(error, fault);
|
||||
}
|
||||
|
||||
/*
|
||||
* Treatment of virtual-NMI blocking if NMI is delivered through
|
||||
* a task gate.
|
||||
*
|
||||
* Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
|
||||
* If the virtual NMIs VM-execution control is 1, VM entry injects
|
||||
* an NMI, and delivery of the NMI causes a task switch that causes
|
||||
* a VM exit, virtual-NMI blocking is in effect before the VM exit
|
||||
* commences.
|
||||
*
|
||||
* Thus, virtual-NMI blocking is in effect at the time of the task
|
||||
* switch VM exit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Treatment of virtual-NMI unblocking on IRET from NMI handler task.
|
||||
*
|
||||
* Section "Changes to Instruction Behavior in VMX Non-Root Operation"
|
||||
* If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
|
||||
* This unblocking of virtual-NMI occurs even if IRET causes a fault.
|
||||
*
|
||||
* Thus, virtual-NMI blocking is cleared at the time of the task switch
|
||||
* VM exit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If the task switch was triggered by an event delivered through
|
||||
* the IDT then extinguish the pending event from the vcpu's
|
||||
* exitintinfo.
|
||||
*/
|
||||
if (task_switch->reason == TSR_IDT_GATE) {
|
||||
error = vm_set_intinfo(ctx, vcpu, 0);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX should inject debug exception if 'T' bit is 1
|
||||
*/
|
||||
done:
|
||||
return (VMEXIT_CONTINUE);
|
||||
}
|
674
uart_emul.c
Normal file
674
uart_emul.c
Normal file
@ -0,0 +1,674 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dev/ic/ns16550.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "mevent.h"
|
||||
#include "uart_emul.h"
|
||||
|
||||
#define COM1_BASE 0x3F8
|
||||
#define COM1_IRQ 4
|
||||
#define COM2_BASE 0x2F8
|
||||
#define COM2_IRQ 3
|
||||
|
||||
#define DEFAULT_RCLK 1843200
|
||||
#define DEFAULT_BAUD 9600
|
||||
|
||||
#define FCR_RX_MASK 0xC0
|
||||
|
||||
#define MCR_OUT1 0x04
|
||||
#define MCR_OUT2 0x08
|
||||
|
||||
#define MSR_DELTA_MASK 0x0f
|
||||
|
||||
#ifndef REG_SCR
|
||||
#define REG_SCR com_scr
|
||||
#endif
|
||||
|
||||
#define FIFOSZ 16
|
||||
|
||||
static bool uart_stdio; /* stdio in use for i/o */
|
||||
static struct termios tio_stdio_orig;
|
||||
|
||||
static struct {
|
||||
int baseaddr;
|
||||
int irq;
|
||||
bool inuse;
|
||||
} uart_lres[] = {
|
||||
{ COM1_BASE, COM1_IRQ, false},
|
||||
{ COM2_BASE, COM2_IRQ, false},
|
||||
};
|
||||
|
||||
#define UART_NLDEVS (sizeof(uart_lres) / sizeof(uart_lres[0]))
|
||||
|
||||
struct fifo {
|
||||
uint8_t buf[FIFOSZ];
|
||||
int rindex; /* index to read from */
|
||||
int windex; /* index to write to */
|
||||
int num; /* number of characters in the fifo */
|
||||
int size; /* size of the fifo */
|
||||
};
|
||||
|
||||
struct ttyfd {
|
||||
bool opened;
|
||||
int fd; /* tty device file descriptor */
|
||||
struct termios tio_orig, tio_new; /* I/O Terminals */
|
||||
};
|
||||
|
||||
struct uart_softc {
|
||||
pthread_mutex_t mtx; /* protects all softc elements */
|
||||
uint8_t data; /* Data register (R/W) */
|
||||
uint8_t ier; /* Interrupt enable register (R/W) */
|
||||
uint8_t lcr; /* Line control register (R/W) */
|
||||
uint8_t mcr; /* Modem control register (R/W) */
|
||||
uint8_t lsr; /* Line status register (R/W) */
|
||||
uint8_t msr; /* Modem status register (R/W) */
|
||||
uint8_t fcr; /* FIFO control register (W) */
|
||||
uint8_t scr; /* Scratch register (R/W) */
|
||||
|
||||
uint8_t dll; /* Baudrate divisor latch LSB */
|
||||
uint8_t dlh; /* Baudrate divisor latch MSB */
|
||||
|
||||
struct fifo rxfifo;
|
||||
struct mevent *mev;
|
||||
|
||||
struct ttyfd tty;
|
||||
bool thre_int_pending; /* THRE interrupt pending */
|
||||
|
||||
void *arg;
|
||||
uart_intr_func_t intr_assert;
|
||||
uart_intr_func_t intr_deassert;
|
||||
};
|
||||
|
||||
static void uart_drain(int fd, enum ev_type ev, void *arg);
|
||||
|
||||
static void
|
||||
ttyclose(void)
|
||||
{
|
||||
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig);
|
||||
}
|
||||
|
||||
static void
|
||||
ttyopen(struct ttyfd *tf)
|
||||
{
|
||||
|
||||
tcgetattr(tf->fd, &tf->tio_orig);
|
||||
|
||||
tf->tio_new = tf->tio_orig;
|
||||
cfmakeraw(&tf->tio_new);
|
||||
tf->tio_new.c_cflag |= CLOCAL;
|
||||
tcsetattr(tf->fd, TCSANOW, &tf->tio_new);
|
||||
|
||||
if (tf->fd == STDIN_FILENO) {
|
||||
tio_stdio_orig = tf->tio_orig;
|
||||
atexit(ttyclose);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ttyread(struct ttyfd *tf)
|
||||
{
|
||||
unsigned char rb;
|
||||
|
||||
if (read(tf->fd, &rb, 1) == 1)
|
||||
return (rb);
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static void
|
||||
ttywrite(struct ttyfd *tf, unsigned char wb)
|
||||
{
|
||||
|
||||
(void)write(tf->fd, &wb, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
rxfifo_reset(struct uart_softc *sc, int size)
|
||||
{
|
||||
char flushbuf[32];
|
||||
struct fifo *fifo;
|
||||
ssize_t nread;
|
||||
int error;
|
||||
|
||||
fifo = &sc->rxfifo;
|
||||
bzero(fifo, sizeof(struct fifo));
|
||||
fifo->size = size;
|
||||
|
||||
if (sc->tty.opened) {
|
||||
/*
|
||||
* Flush any unread input from the tty buffer.
|
||||
*/
|
||||
while (1) {
|
||||
nread = read(sc->tty.fd, flushbuf, sizeof(flushbuf));
|
||||
if (nread != sizeof(flushbuf))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable mevent to trigger when new characters are available
|
||||
* on the tty fd.
|
||||
*/
|
||||
error = mevent_enable(sc->mev);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rxfifo_available(struct uart_softc *sc)
|
||||
{
|
||||
struct fifo *fifo;
|
||||
|
||||
fifo = &sc->rxfifo;
|
||||
return (fifo->num < fifo->size);
|
||||
}
|
||||
|
||||
static int
|
||||
rxfifo_putchar(struct uart_softc *sc, uint8_t ch)
|
||||
{
|
||||
struct fifo *fifo;
|
||||
int error;
|
||||
|
||||
fifo = &sc->rxfifo;
|
||||
|
||||
if (fifo->num < fifo->size) {
|
||||
fifo->buf[fifo->windex] = ch;
|
||||
fifo->windex = (fifo->windex + 1) % fifo->size;
|
||||
fifo->num++;
|
||||
if (!rxfifo_available(sc)) {
|
||||
if (sc->tty.opened) {
|
||||
/*
|
||||
* Disable mevent callback if the FIFO is full.
|
||||
*/
|
||||
error = mevent_disable(sc->mev);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
rxfifo_getchar(struct uart_softc *sc)
|
||||
{
|
||||
struct fifo *fifo;
|
||||
int c, error, wasfull;
|
||||
|
||||
wasfull = 0;
|
||||
fifo = &sc->rxfifo;
|
||||
if (fifo->num > 0) {
|
||||
if (!rxfifo_available(sc))
|
||||
wasfull = 1;
|
||||
c = fifo->buf[fifo->rindex];
|
||||
fifo->rindex = (fifo->rindex + 1) % fifo->size;
|
||||
fifo->num--;
|
||||
if (wasfull) {
|
||||
if (sc->tty.opened) {
|
||||
error = mevent_enable(sc->mev);
|
||||
assert(error == 0);
|
||||
}
|
||||
}
|
||||
return (c);
|
||||
} else
|
||||
return (-1);
|
||||
}
|
||||
|
||||
static int
|
||||
rxfifo_numchars(struct uart_softc *sc)
|
||||
{
|
||||
struct fifo *fifo = &sc->rxfifo;
|
||||
|
||||
return (fifo->num);
|
||||
}
|
||||
|
||||
static void
|
||||
uart_opentty(struct uart_softc *sc)
|
||||
{
|
||||
|
||||
ttyopen(&sc->tty);
|
||||
sc->mev = mevent_add(sc->tty.fd, EVF_READ, uart_drain, sc);
|
||||
assert(sc->mev != NULL);
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
modem_status(uint8_t mcr)
|
||||
{
|
||||
uint8_t msr;
|
||||
|
||||
if (mcr & MCR_LOOPBACK) {
|
||||
/*
|
||||
* In the loopback mode certain bits from the MCR are
|
||||
* reflected back into MSR.
|
||||
*/
|
||||
msr = 0;
|
||||
if (mcr & MCR_RTS)
|
||||
msr |= MSR_CTS;
|
||||
if (mcr & MCR_DTR)
|
||||
msr |= MSR_DSR;
|
||||
if (mcr & MCR_OUT1)
|
||||
msr |= MSR_RI;
|
||||
if (mcr & MCR_OUT2)
|
||||
msr |= MSR_DCD;
|
||||
} else {
|
||||
/*
|
||||
* Always assert DCD and DSR so tty open doesn't block
|
||||
* even if CLOCAL is turned off.
|
||||
*/
|
||||
msr = MSR_DCD | MSR_DSR;
|
||||
}
|
||||
assert((msr & MSR_DELTA_MASK) == 0);
|
||||
|
||||
return (msr);
|
||||
}
|
||||
|
||||
/*
|
||||
* The IIR returns a prioritized interrupt reason:
|
||||
* - receive data available
|
||||
* - transmit holding register empty
|
||||
* - modem status change
|
||||
*
|
||||
* Return an interrupt reason if one is available.
|
||||
*/
|
||||
static int
|
||||
uart_intr_reason(struct uart_softc *sc)
|
||||
{
|
||||
|
||||
if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0)
|
||||
return (IIR_RLS);
|
||||
else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0)
|
||||
return (IIR_RXTOUT);
|
||||
else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0)
|
||||
return (IIR_TXRDY);
|
||||
else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0)
|
||||
return (IIR_MLSC);
|
||||
else
|
||||
return (IIR_NOPEND);
|
||||
}
|
||||
|
||||
static void
|
||||
uart_reset(struct uart_softc *sc)
|
||||
{
|
||||
uint16_t divisor;
|
||||
|
||||
divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
|
||||
sc->dll = divisor;
|
||||
sc->dlh = divisor >> 16;
|
||||
sc->msr = modem_status(sc->mcr);
|
||||
|
||||
rxfifo_reset(sc, 1); /* no fifo until enabled by software */
|
||||
}
|
||||
|
||||
/*
|
||||
* Toggle the COM port's intr pin depending on whether or not we have an
|
||||
* interrupt condition to report to the processor.
|
||||
*/
|
||||
static void
|
||||
uart_toggle_intr(struct uart_softc *sc)
|
||||
{
|
||||
uint8_t intr_reason;
|
||||
|
||||
intr_reason = uart_intr_reason(sc);
|
||||
|
||||
if (intr_reason == IIR_NOPEND)
|
||||
(*sc->intr_deassert)(sc->arg);
|
||||
else
|
||||
(*sc->intr_assert)(sc->arg);
|
||||
}
|
||||
|
||||
static void
|
||||
uart_drain(int fd, enum ev_type ev, void *arg)
|
||||
{
|
||||
struct uart_softc *sc;
|
||||
int ch;
|
||||
|
||||
sc = arg;
|
||||
|
||||
assert(fd == sc->tty.fd);
|
||||
assert(ev == EVF_READ);
|
||||
|
||||
/*
|
||||
* This routine is called in the context of the mevent thread
|
||||
* to take out the softc lock to protect against concurrent
|
||||
* access from a vCPU i/o exit
|
||||
*/
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
if ((sc->mcr & MCR_LOOPBACK) != 0) {
|
||||
(void) ttyread(&sc->tty);
|
||||
} else {
|
||||
while (rxfifo_available(sc) &&
|
||||
((ch = ttyread(&sc->tty)) != -1)) {
|
||||
rxfifo_putchar(sc, ch);
|
||||
}
|
||||
uart_toggle_intr(sc);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
}
|
||||
|
||||
void
|
||||
uart_write(struct uart_softc *sc, int offset, uint8_t value)
|
||||
{
|
||||
int fifosz;
|
||||
uint8_t msr;
|
||||
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
/*
|
||||
* Take care of the special case DLAB accesses first
|
||||
*/
|
||||
if ((sc->lcr & LCR_DLAB) != 0) {
|
||||
if (offset == REG_DLL) {
|
||||
sc->dll = value;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (offset == REG_DLH) {
|
||||
sc->dlh = value;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case REG_DATA:
|
||||
if (sc->mcr & MCR_LOOPBACK) {
|
||||
if (rxfifo_putchar(sc, value) != 0)
|
||||
sc->lsr |= LSR_OE;
|
||||
} else if (sc->tty.opened) {
|
||||
ttywrite(&sc->tty, value);
|
||||
} /* else drop on floor */
|
||||
sc->thre_int_pending = true;
|
||||
break;
|
||||
case REG_IER:
|
||||
/*
|
||||
* Apply mask so that bits 4-7 are 0
|
||||
* Also enables bits 0-3 only if they're 1
|
||||
*/
|
||||
sc->ier = value & 0x0F;
|
||||
break;
|
||||
case REG_FCR:
|
||||
/*
|
||||
* When moving from FIFO and 16450 mode and vice versa,
|
||||
* the FIFO contents are reset.
|
||||
*/
|
||||
if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) {
|
||||
fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1;
|
||||
rxfifo_reset(sc, fifosz);
|
||||
}
|
||||
|
||||
/*
|
||||
* The FCR_ENABLE bit must be '1' for the programming
|
||||
* of other FCR bits to be effective.
|
||||
*/
|
||||
if ((value & FCR_ENABLE) == 0) {
|
||||
sc->fcr = 0;
|
||||
} else {
|
||||
if ((value & FCR_RCV_RST) != 0)
|
||||
rxfifo_reset(sc, FIFOSZ);
|
||||
|
||||
sc->fcr = value &
|
||||
(FCR_ENABLE | FCR_DMA | FCR_RX_MASK);
|
||||
}
|
||||
break;
|
||||
case REG_LCR:
|
||||
sc->lcr = value;
|
||||
break;
|
||||
case REG_MCR:
|
||||
/* Apply mask so that bits 5-7 are 0 */
|
||||
sc->mcr = value & 0x1F;
|
||||
msr = modem_status(sc->mcr);
|
||||
|
||||
/*
|
||||
* Detect if there has been any change between the
|
||||
* previous and the new value of MSR. If there is
|
||||
* then assert the appropriate MSR delta bit.
|
||||
*/
|
||||
if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS))
|
||||
sc->msr |= MSR_DCTS;
|
||||
if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR))
|
||||
sc->msr |= MSR_DDSR;
|
||||
if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD))
|
||||
sc->msr |= MSR_DDCD;
|
||||
if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0)
|
||||
sc->msr |= MSR_TERI;
|
||||
|
||||
/*
|
||||
* Update the value of MSR while retaining the delta
|
||||
* bits.
|
||||
*/
|
||||
sc->msr &= MSR_DELTA_MASK;
|
||||
sc->msr |= msr;
|
||||
break;
|
||||
case REG_LSR:
|
||||
/*
|
||||
* Line status register is not meant to be written to
|
||||
* during normal operation.
|
||||
*/
|
||||
break;
|
||||
case REG_MSR:
|
||||
/*
|
||||
* As far as I can tell MSR is a read-only register.
|
||||
*/
|
||||
break;
|
||||
case REG_SCR:
|
||||
sc->scr = value;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
uart_toggle_intr(sc);
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
uart_read(struct uart_softc *sc, int offset)
|
||||
{
|
||||
uint8_t iir, intr_reason, reg;
|
||||
|
||||
pthread_mutex_lock(&sc->mtx);
|
||||
|
||||
/*
|
||||
* Take care of the special case DLAB accesses first
|
||||
*/
|
||||
if ((sc->lcr & LCR_DLAB) != 0) {
|
||||
if (offset == REG_DLL) {
|
||||
reg = sc->dll;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (offset == REG_DLH) {
|
||||
reg = sc->dlh;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case REG_DATA:
|
||||
reg = rxfifo_getchar(sc);
|
||||
break;
|
||||
case REG_IER:
|
||||
reg = sc->ier;
|
||||
break;
|
||||
case REG_IIR:
|
||||
iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0;
|
||||
|
||||
intr_reason = uart_intr_reason(sc);
|
||||
|
||||
/*
|
||||
* Deal with side effects of reading the IIR register
|
||||
*/
|
||||
if (intr_reason == IIR_TXRDY)
|
||||
sc->thre_int_pending = false;
|
||||
|
||||
iir |= intr_reason;
|
||||
|
||||
reg = iir;
|
||||
break;
|
||||
case REG_LCR:
|
||||
reg = sc->lcr;
|
||||
break;
|
||||
case REG_MCR:
|
||||
reg = sc->mcr;
|
||||
break;
|
||||
case REG_LSR:
|
||||
/* Transmitter is always ready for more data */
|
||||
sc->lsr |= LSR_TEMT | LSR_THRE;
|
||||
|
||||
/* Check for new receive data */
|
||||
if (rxfifo_numchars(sc) > 0)
|
||||
sc->lsr |= LSR_RXRDY;
|
||||
else
|
||||
sc->lsr &= ~LSR_RXRDY;
|
||||
|
||||
reg = sc->lsr;
|
||||
|
||||
/* The LSR_OE bit is cleared on LSR read */
|
||||
sc->lsr &= ~LSR_OE;
|
||||
break;
|
||||
case REG_MSR:
|
||||
/*
|
||||
* MSR delta bits are cleared on read
|
||||
*/
|
||||
reg = sc->msr;
|
||||
sc->msr &= ~MSR_DELTA_MASK;
|
||||
break;
|
||||
case REG_SCR:
|
||||
reg = sc->scr;
|
||||
break;
|
||||
default:
|
||||
reg = 0xFF;
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
uart_toggle_intr(sc);
|
||||
pthread_mutex_unlock(&sc->mtx);
|
||||
|
||||
return (reg);
|
||||
}
|
||||
|
||||
int
|
||||
uart_legacy_alloc(int which, int *baseaddr, int *irq)
|
||||
{
|
||||
|
||||
if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse)
|
||||
return (-1);
|
||||
|
||||
uart_lres[which].inuse = true;
|
||||
*baseaddr = uart_lres[which].baseaddr;
|
||||
*irq = uart_lres[which].irq;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct uart_softc *
|
||||
uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert,
|
||||
void *arg)
|
||||
{
|
||||
struct uart_softc *sc;
|
||||
|
||||
sc = calloc(1, sizeof(struct uart_softc));
|
||||
|
||||
sc->arg = arg;
|
||||
sc->intr_assert = intr_assert;
|
||||
sc->intr_deassert = intr_deassert;
|
||||
|
||||
pthread_mutex_init(&sc->mtx, NULL);
|
||||
|
||||
uart_reset(sc);
|
||||
|
||||
return (sc);
|
||||
}
|
||||
|
||||
static int
|
||||
uart_tty_backend(struct uart_softc *sc, const char *opts)
|
||||
{
|
||||
int fd;
|
||||
int retval;
|
||||
|
||||
retval = -1;
|
||||
|
||||
fd = open(opts, O_RDWR | O_NONBLOCK);
|
||||
if (fd > 0 && isatty(fd)) {
|
||||
sc->tty.fd = fd;
|
||||
sc->tty.opened = true;
|
||||
retval = 0;
|
||||
}
|
||||
|
||||
return (retval);
|
||||
}
|
||||
|
||||
int
|
||||
uart_set_backend(struct uart_softc *sc, const char *opts)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = -1;
|
||||
|
||||
if (opts == NULL)
|
||||
return (0);
|
||||
|
||||
if (strcmp("stdio", opts) == 0) {
|
||||
if (!uart_stdio) {
|
||||
sc->tty.fd = STDIN_FILENO;
|
||||
sc->tty.opened = true;
|
||||
uart_stdio = true;
|
||||
retval = 0;
|
||||
}
|
||||
} else if (uart_tty_backend(sc, opts) == 0) {
|
||||
retval = 0;
|
||||
}
|
||||
|
||||
/* Make the backend file descriptor non-blocking */
|
||||
if (retval == 0)
|
||||
retval = fcntl(sc->tty.fd, F_SETFL, O_NONBLOCK);
|
||||
|
||||
if (retval == 0)
|
||||
uart_opentty(sc);
|
||||
|
||||
return (retval);
|
||||
}
|
45
uart_emul.h
Normal file
45
uart_emul.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _UART_EMUL_H_
|
||||
#define _UART_EMUL_H_
|
||||
|
||||
|
||||
#define UART_IO_BAR_SIZE 8
|
||||
|
||||
struct uart_softc;
|
||||
|
||||
typedef void (*uart_intr_func_t)(void *arg);
|
||||
struct uart_softc *uart_init(uart_intr_func_t intr_assert,
|
||||
uart_intr_func_t intr_deassert, void *arg);
|
||||
|
||||
int uart_legacy_alloc(int unit, int *ioaddr, int *irq);
|
||||
uint8_t uart_read(struct uart_softc *sc, int offset);
|
||||
void uart_write(struct uart_softc *sc, int offset, uint8_t value);
|
||||
int uart_set_backend(struct uart_softc *sc, const char *opt);
|
||||
#endif
|
777
virtio.c
Normal file
777
virtio.c
Normal file
@ -0,0 +1,777 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Chris Torek <torek @ torek net>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
#include "pci_emul.h"
|
||||
#include "virtio.h"
|
||||
|
||||
/*
|
||||
* Functions for dealing with generalized "virtual devices" as
|
||||
* defined by <https://www.google.com/#output=search&q=virtio+spec>
|
||||
*/
|
||||
|
||||
/*
|
||||
* In case we decide to relax the "virtio softc comes at the
|
||||
* front of virtio-based device softc" constraint, let's use
|
||||
* this to convert.
|
||||
*/
|
||||
#define DEV_SOFTC(vs) ((void *)(vs))
|
||||
|
||||
/*
|
||||
* Link a virtio_softc to its constants, the device softc, and
|
||||
* the PCI emulation.
|
||||
*/
|
||||
void
|
||||
vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
|
||||
void *dev_softc, struct pci_devinst *pi,
|
||||
struct vqueue_info *queues)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* vs and dev_softc addresses must match */
|
||||
assert((void *)vs == dev_softc);
|
||||
vs->vs_vc = vc;
|
||||
vs->vs_pi = pi;
|
||||
pi->pi_arg = vs;
|
||||
|
||||
vs->vs_queues = queues;
|
||||
for (i = 0; i < vc->vc_nvq; i++) {
|
||||
queues[i].vq_vs = vs;
|
||||
queues[i].vq_num = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset device (device-wide). This erases all queues, i.e.,
|
||||
* all the queues become invalid (though we don't wipe out the
|
||||
* internal pointers, we just clear the VQ_ALLOC flag).
|
||||
*
|
||||
* It resets negotiated features to "none".
|
||||
*
|
||||
* If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
|
||||
*/
|
||||
void
|
||||
vi_reset_dev(struct virtio_softc *vs)
|
||||
{
|
||||
struct vqueue_info *vq;
|
||||
int i, nvq;
|
||||
|
||||
if (vs->vs_mtx)
|
||||
assert(pthread_mutex_isowned_np(vs->vs_mtx));
|
||||
|
||||
nvq = vs->vs_vc->vc_nvq;
|
||||
for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
|
||||
vq->vq_flags = 0;
|
||||
vq->vq_last_avail = 0;
|
||||
vq->vq_save_used = 0;
|
||||
vq->vq_pfn = 0;
|
||||
vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
|
||||
}
|
||||
vs->vs_negotiated_caps = 0;
|
||||
vs->vs_curq = 0;
|
||||
/* vs->vs_status = 0; -- redundant */
|
||||
if (vs->vs_isr)
|
||||
pci_lintr_deassert(vs->vs_pi);
|
||||
vs->vs_isr = 0;
|
||||
vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set I/O BAR (usually 0) to map PCI config registers.
|
||||
*/
|
||||
void
|
||||
vi_set_io_bar(struct virtio_softc *vs, int barnum)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* ??? should we use CFG0 if MSI-X is disabled?
|
||||
* Existing code did not...
|
||||
*/
|
||||
size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize;
|
||||
pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize MSI-X vector capabilities if we're to use MSI-X,
|
||||
* or MSI capabilities if not.
|
||||
*
|
||||
* We assume we want one MSI-X vector per queue, here, plus one
|
||||
* for the config vec.
|
||||
*/
|
||||
int
|
||||
vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
|
||||
{
|
||||
int nvec;
|
||||
|
||||
if (use_msix) {
|
||||
vs->vs_flags |= VIRTIO_USE_MSIX;
|
||||
VS_LOCK(vs);
|
||||
vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
|
||||
VS_UNLOCK(vs);
|
||||
nvec = vs->vs_vc->vc_nvq + 1;
|
||||
if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
|
||||
return (1);
|
||||
} else
|
||||
vs->vs_flags &= ~VIRTIO_USE_MSIX;
|
||||
|
||||
/* Only 1 MSI vector for bhyve */
|
||||
pci_emul_add_msicap(vs->vs_pi, 1);
|
||||
|
||||
/* Legacy interrupts are mandatory for virtio devices */
|
||||
pci_lintr_request(vs->vs_pi);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the currently-selected virtio queue (vs->vs_curq).
|
||||
* The guest just gave us a page frame number, from which we can
|
||||
* calculate the addresses of the queue.
|
||||
*/
|
||||
void
|
||||
vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
|
||||
{
|
||||
struct vqueue_info *vq;
|
||||
uint64_t phys;
|
||||
size_t size;
|
||||
char *base;
|
||||
|
||||
vq = &vs->vs_queues[vs->vs_curq];
|
||||
vq->vq_pfn = pfn;
|
||||
phys = (uint64_t)pfn << VRING_PFN;
|
||||
size = vring_size(vq->vq_qsize);
|
||||
base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
|
||||
|
||||
/* First page(s) are descriptors... */
|
||||
vq->vq_desc = (struct virtio_desc *)base;
|
||||
base += vq->vq_qsize * sizeof(struct virtio_desc);
|
||||
|
||||
/* ... immediately followed by "avail" ring (entirely uint16_t's) */
|
||||
vq->vq_avail = (struct vring_avail *)base;
|
||||
base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
|
||||
|
||||
/* Then it's rounded up to the next page... */
|
||||
base = (char *)roundup2((uintptr_t)base, VRING_ALIGN);
|
||||
|
||||
/* ... and the last page(s) are the used ring. */
|
||||
vq->vq_used = (struct vring_used *)base;
|
||||
|
||||
/* Mark queue as allocated, and start at 0 when we use it. */
|
||||
vq->vq_flags = VQ_ALLOC;
|
||||
vq->vq_last_avail = 0;
|
||||
vq->vq_save_used = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper inline for vq_getchain(): record the i'th "real"
|
||||
* descriptor.
|
||||
*/
|
||||
static inline void
|
||||
_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx,
|
||||
struct iovec *iov, int n_iov, uint16_t *flags) {
|
||||
|
||||
if (i >= n_iov)
|
||||
return;
|
||||
iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
|
||||
iov[i].iov_len = vd->vd_len;
|
||||
if (flags != NULL)
|
||||
flags[i] = vd->vd_flags;
|
||||
}
|
||||
#define VQ_MAX_DESCRIPTORS 512 /* see below */
|
||||
|
||||
/*
|
||||
* Examine the chain of descriptors starting at the "next one" to
|
||||
* make sure that they describe a sensible request. If so, return
|
||||
* the number of "real" descriptors that would be needed/used in
|
||||
* acting on this request. This may be smaller than the number of
|
||||
* available descriptors, e.g., if there are two available but
|
||||
* they are two separate requests, this just returns 1. Or, it
|
||||
* may be larger: if there are indirect descriptors involved,
|
||||
* there may only be one descriptor available but it may be an
|
||||
* indirect pointing to eight more. We return 8 in this case,
|
||||
* i.e., we do not count the indirect descriptors, only the "real"
|
||||
* ones.
|
||||
*
|
||||
* Basically, this vets the vd_flags and vd_next field of each
|
||||
* descriptor and tells you how many are involved. Since some may
|
||||
* be indirect, this also needs the vmctx (in the pci_devinst
|
||||
* at vs->vs_pi) so that it can find indirect descriptors.
|
||||
*
|
||||
* As we process each descriptor, we copy and adjust it (guest to
|
||||
* host address wise, also using the vmtctx) into the given iov[]
|
||||
* array (of the given size). If the array overflows, we stop
|
||||
* placing values into the array but keep processing descriptors,
|
||||
* up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
|
||||
* So you, the caller, must not assume that iov[] is as big as the
|
||||
* return value (you can process the same thing twice to allocate
|
||||
* a larger iov array if needed, or supply a zero length to find
|
||||
* out how much space is needed).
|
||||
*
|
||||
* If you want to verify the WRITE flag on each descriptor, pass a
|
||||
* non-NULL "flags" pointer to an array of "uint16_t" of the same size
|
||||
* as n_iov and we'll copy each vd_flags field after unwinding any
|
||||
* indirects.
|
||||
*
|
||||
* If some descriptor(s) are invalid, this prints a diagnostic message
|
||||
* and returns -1. If no descriptors are ready now it simply returns 0.
|
||||
*
|
||||
* You are assumed to have done a vq_ring_ready() if needed (note
|
||||
* that vq_has_descs() does one).
|
||||
*/
|
||||
int
|
||||
vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
|
||||
struct iovec *iov, int n_iov, uint16_t *flags)
|
||||
{
|
||||
int i;
|
||||
u_int ndesc, n_indir;
|
||||
u_int idx, next;
|
||||
volatile struct virtio_desc *vdir, *vindir, *vp;
|
||||
struct vmctx *ctx;
|
||||
struct virtio_softc *vs;
|
||||
const char *name;
|
||||
|
||||
vs = vq->vq_vs;
|
||||
name = vs->vs_vc->vc_name;
|
||||
|
||||
/*
|
||||
* Note: it's the responsibility of the guest not to
|
||||
* update vq->vq_avail->va_idx until all of the descriptors
|
||||
* the guest has written are valid (including all their
|
||||
* vd_next fields and vd_flags).
|
||||
*
|
||||
* Compute (last_avail - va_idx) in integers mod 2**16. This is
|
||||
* the number of descriptors the device has made available
|
||||
* since the last time we updated vq->vq_last_avail.
|
||||
*
|
||||
* We just need to do the subtraction as an unsigned int,
|
||||
* then trim off excess bits.
|
||||
*/
|
||||
idx = vq->vq_last_avail;
|
||||
ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
|
||||
if (ndesc == 0)
|
||||
return (0);
|
||||
if (ndesc > vq->vq_qsize) {
|
||||
/* XXX need better way to diagnose issues */
|
||||
fprintf(stderr,
|
||||
"%s: ndesc (%u) out of range, driver confused?\r\n",
|
||||
name, (u_int)ndesc);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now count/parse "involved" descriptors starting from
|
||||
* the head of the chain.
|
||||
*
|
||||
* To prevent loops, we could be more complicated and
|
||||
* check whether we're re-visiting a previously visited
|
||||
* index, but we just abort if the count gets excessive.
|
||||
*/
|
||||
ctx = vs->vs_pi->pi_vmctx;
|
||||
*pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
|
||||
vq->vq_last_avail++;
|
||||
for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
|
||||
if (next >= vq->vq_qsize) {
|
||||
fprintf(stderr,
|
||||
"%s: descriptor index %u out of range, "
|
||||
"driver confused?\r\n",
|
||||
name, next);
|
||||
return (-1);
|
||||
}
|
||||
vdir = &vq->vq_desc[next];
|
||||
if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
|
||||
_vq_record(i, vdir, ctx, iov, n_iov, flags);
|
||||
i++;
|
||||
} else if ((vs->vs_vc->vc_hv_caps &
|
||||
VIRTIO_RING_F_INDIRECT_DESC) == 0) {
|
||||
fprintf(stderr,
|
||||
"%s: descriptor has forbidden INDIRECT flag, "
|
||||
"driver confused?\r\n",
|
||||
name);
|
||||
return (-1);
|
||||
} else {
|
||||
n_indir = vdir->vd_len / 16;
|
||||
if ((vdir->vd_len & 0xf) || n_indir == 0) {
|
||||
fprintf(stderr,
|
||||
"%s: invalid indir len 0x%x, "
|
||||
"driver confused?\r\n",
|
||||
name, (u_int)vdir->vd_len);
|
||||
return (-1);
|
||||
}
|
||||
vindir = paddr_guest2host(ctx,
|
||||
vdir->vd_addr, vdir->vd_len);
|
||||
/*
|
||||
* Indirects start at the 0th, then follow
|
||||
* their own embedded "next"s until those run
|
||||
* out. Each one's indirect flag must be off
|
||||
* (we don't really have to check, could just
|
||||
* ignore errors...).
|
||||
*/
|
||||
next = 0;
|
||||
for (;;) {
|
||||
vp = &vindir[next];
|
||||
if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
|
||||
fprintf(stderr,
|
||||
"%s: indirect desc has INDIR flag,"
|
||||
" driver confused?\r\n",
|
||||
name);
|
||||
return (-1);
|
||||
}
|
||||
_vq_record(i, vp, ctx, iov, n_iov, flags);
|
||||
if (++i > VQ_MAX_DESCRIPTORS)
|
||||
goto loopy;
|
||||
if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
|
||||
break;
|
||||
next = vp->vd_next;
|
||||
if (next >= n_indir) {
|
||||
fprintf(stderr,
|
||||
"%s: invalid next %u > %u, "
|
||||
"driver confused?\r\n",
|
||||
name, (u_int)next, n_indir);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
|
||||
return (i);
|
||||
}
|
||||
loopy:
|
||||
fprintf(stderr,
|
||||
"%s: descriptor loop? count > %d - driver confused?\r\n",
|
||||
name, i);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the currently-first request chain back to the available queue.
|
||||
*
|
||||
* (This chain is the one you handled when you called vq_getchain()
|
||||
* and used its positive return value.)
|
||||
*/
|
||||
void
|
||||
vq_retchain(struct vqueue_info *vq)
|
||||
{
|
||||
|
||||
vq->vq_last_avail--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return specified request chain to the guest, setting its I/O length
|
||||
* to the provided value.
|
||||
*
|
||||
* (This chain is the one you handled when you called vq_getchain()
|
||||
* and used its positive return value.)
|
||||
*/
|
||||
void
|
||||
vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
|
||||
{
|
||||
uint16_t uidx, mask;
|
||||
volatile struct vring_used *vuh;
|
||||
volatile struct virtio_used *vue;
|
||||
|
||||
/*
|
||||
* Notes:
|
||||
* - mask is N-1 where N is a power of 2 so computes x % N
|
||||
* - vuh points to the "used" data shared with guest
|
||||
* - vue points to the "used" ring entry we want to update
|
||||
* - head is the same value we compute in vq_iovecs().
|
||||
*
|
||||
* (I apologize for the two fields named vu_idx; the
|
||||
* virtio spec calls the one that vue points to, "id"...)
|
||||
*/
|
||||
mask = vq->vq_qsize - 1;
|
||||
vuh = vq->vq_used;
|
||||
|
||||
uidx = vuh->vu_idx;
|
||||
vue = &vuh->vu_ring[uidx++ & mask];
|
||||
vue->vu_idx = idx;
|
||||
vue->vu_tlen = iolen;
|
||||
vuh->vu_idx = uidx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Driver has finished processing "available" chains and calling
|
||||
* vq_relchain on each one. If driver used all the available
|
||||
* chains, used_all should be set.
|
||||
*
|
||||
* If the "used" index moved we may need to inform the guest, i.e.,
|
||||
* deliver an interrupt. Even if the used index did NOT move we
|
||||
* may need to deliver an interrupt, if the avail ring is empty and
|
||||
* we are supposed to interrupt on empty.
|
||||
*
|
||||
* Note that used_all_avail is provided by the caller because it's
|
||||
* a snapshot of the ring state when he decided to finish interrupt
|
||||
* processing -- it's possible that descriptors became available after
|
||||
* that point. (It's also typically a constant 1/True as well.)
|
||||
*/
|
||||
void
|
||||
vq_endchains(struct vqueue_info *vq, int used_all_avail)
|
||||
{
|
||||
struct virtio_softc *vs;
|
||||
uint16_t event_idx, new_idx, old_idx;
|
||||
int intr;
|
||||
|
||||
/*
|
||||
* Interrupt generation: if we're using EVENT_IDX,
|
||||
* interrupt if we've crossed the event threshold.
|
||||
* Otherwise interrupt is generated if we added "used" entries,
|
||||
* but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
|
||||
*
|
||||
* In any case, though, if NOTIFY_ON_EMPTY is set and the
|
||||
* entire avail was processed, we need to interrupt always.
|
||||
*/
|
||||
vs = vq->vq_vs;
|
||||
old_idx = vq->vq_save_used;
|
||||
vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
|
||||
if (used_all_avail &&
|
||||
(vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
|
||||
intr = 1;
|
||||
else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
|
||||
event_idx = VQ_USED_EVENT_IDX(vq);
|
||||
/*
|
||||
* This calculation is per docs and the kernel
|
||||
* (see src/sys/dev/virtio/virtio_ring.h).
|
||||
*/
|
||||
intr = (uint16_t)(new_idx - event_idx - 1) <
|
||||
(uint16_t)(new_idx - old_idx);
|
||||
} else {
|
||||
intr = new_idx != old_idx &&
|
||||
!(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
|
||||
}
|
||||
if (intr)
|
||||
vq_interrupt(vs, vq);
|
||||
}
|
||||
|
||||
/* Note: these are in sorted order to make for a fast search */
|
||||
static struct config_reg {
|
||||
uint16_t cr_offset; /* register offset */
|
||||
uint8_t cr_size; /* size (bytes) */
|
||||
uint8_t cr_ro; /* true => reg is read only */
|
||||
const char *cr_name; /* name of reg */
|
||||
} config_regs[] = {
|
||||
{ VTCFG_R_HOSTCAP, 4, 1, "HOSTCAP" },
|
||||
{ VTCFG_R_GUESTCAP, 4, 0, "GUESTCAP" },
|
||||
{ VTCFG_R_PFN, 4, 0, "PFN" },
|
||||
{ VTCFG_R_QNUM, 2, 1, "QNUM" },
|
||||
{ VTCFG_R_QSEL, 2, 0, "QSEL" },
|
||||
{ VTCFG_R_QNOTIFY, 2, 0, "QNOTIFY" },
|
||||
{ VTCFG_R_STATUS, 1, 0, "STATUS" },
|
||||
{ VTCFG_R_ISR, 1, 0, "ISR" },
|
||||
{ VTCFG_R_CFGVEC, 2, 0, "CFGVEC" },
|
||||
{ VTCFG_R_QVEC, 2, 0, "QVEC" },
|
||||
};
|
||||
|
||||
static inline struct config_reg *
|
||||
vi_find_cr(int offset) {
|
||||
u_int hi, lo, mid;
|
||||
struct config_reg *cr;
|
||||
|
||||
lo = 0;
|
||||
hi = sizeof(config_regs) / sizeof(*config_regs) - 1;
|
||||
while (hi >= lo) {
|
||||
mid = (hi + lo) >> 1;
|
||||
cr = &config_regs[mid];
|
||||
if (cr->cr_offset == offset)
|
||||
return (cr);
|
||||
if (cr->cr_offset < offset)
|
||||
lo = mid + 1;
|
||||
else
|
||||
hi = mid - 1;
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle pci config space reads.
|
||||
* If it's to the MSI-X info, do that.
|
||||
* If it's part of the virtio standard stuff, do that.
|
||||
* Otherwise dispatch to the actual driver.
|
||||
*/
|
||||
uint64_t
|
||||
vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size)
|
||||
{
|
||||
struct virtio_softc *vs = pi->pi_arg;
|
||||
struct virtio_consts *vc;
|
||||
struct config_reg *cr;
|
||||
uint64_t virtio_config_size, max;
|
||||
const char *name;
|
||||
uint32_t newoff;
|
||||
uint32_t value;
|
||||
int error;
|
||||
|
||||
if (vs->vs_flags & VIRTIO_USE_MSIX) {
|
||||
if (baridx == pci_msix_table_bar(pi) ||
|
||||
baridx == pci_msix_pba_bar(pi)) {
|
||||
return (pci_emul_msix_tread(pi, offset, size));
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX probably should do something better than just assert() */
|
||||
assert(baridx == 0);
|
||||
|
||||
if (vs->vs_mtx)
|
||||
pthread_mutex_lock(vs->vs_mtx);
|
||||
|
||||
vc = vs->vs_vc;
|
||||
name = vc->vc_name;
|
||||
value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
|
||||
|
||||
if (size != 1 && size != 2 && size != 4)
|
||||
goto bad;
|
||||
|
||||
if (pci_msix_enabled(pi))
|
||||
virtio_config_size = VTCFG_R_CFG1;
|
||||
else
|
||||
virtio_config_size = VTCFG_R_CFG0;
|
||||
|
||||
if (offset >= virtio_config_size) {
|
||||
/*
|
||||
* Subtract off the standard size (including MSI-X
|
||||
* registers if enabled) and dispatch to underlying driver.
|
||||
* If that fails, fall into general code.
|
||||
*/
|
||||
newoff = offset - virtio_config_size;
|
||||
max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
|
||||
if (newoff + size > max)
|
||||
goto bad;
|
||||
error = (*vc->vc_cfgread)(DEV_SOFTC(vs), newoff, size, &value);
|
||||
if (!error)
|
||||
goto done;
|
||||
}
|
||||
|
||||
bad:
|
||||
cr = vi_find_cr(offset);
|
||||
if (cr == NULL || cr->cr_size != size) {
|
||||
if (cr != NULL) {
|
||||
/* offset must be OK, so size must be bad */
|
||||
fprintf(stderr,
|
||||
"%s: read from %s: bad size %d\r\n",
|
||||
name, cr->cr_name, size);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"%s: read from bad offset/size %jd/%d\r\n",
|
||||
name, (uintmax_t)offset, size);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_HOSTCAP:
|
||||
value = vc->vc_hv_caps;
|
||||
break;
|
||||
case VTCFG_R_GUESTCAP:
|
||||
value = vs->vs_negotiated_caps;
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
if (vs->vs_curq < vc->vc_nvq)
|
||||
value = vs->vs_queues[vs->vs_curq].vq_pfn;
|
||||
break;
|
||||
case VTCFG_R_QNUM:
|
||||
value = vs->vs_curq < vc->vc_nvq ?
|
||||
vs->vs_queues[vs->vs_curq].vq_qsize : 0;
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
value = vs->vs_curq;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
value = 0; /* XXX */
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
value = vs->vs_status;
|
||||
break;
|
||||
case VTCFG_R_ISR:
|
||||
value = vs->vs_isr;
|
||||
vs->vs_isr = 0; /* a read clears this flag */
|
||||
if (value)
|
||||
pci_lintr_deassert(pi);
|
||||
break;
|
||||
case VTCFG_R_CFGVEC:
|
||||
value = vs->vs_msix_cfg_idx;
|
||||
break;
|
||||
case VTCFG_R_QVEC:
|
||||
value = vs->vs_curq < vc->vc_nvq ?
|
||||
vs->vs_queues[vs->vs_curq].vq_msix_idx :
|
||||
VIRTIO_MSI_NO_VECTOR;
|
||||
break;
|
||||
}
|
||||
done:
|
||||
if (vs->vs_mtx)
|
||||
pthread_mutex_unlock(vs->vs_mtx);
|
||||
return (value);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle pci config space writes.
|
||||
* If it's to the MSI-X info, do that.
|
||||
* If it's part of the virtio standard stuff, do that.
|
||||
* Otherwise dispatch to the actual driver.
|
||||
*/
|
||||
void
|
||||
vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value)
|
||||
{
|
||||
struct virtio_softc *vs = pi->pi_arg;
|
||||
struct vqueue_info *vq;
|
||||
struct virtio_consts *vc;
|
||||
struct config_reg *cr;
|
||||
uint64_t virtio_config_size, max;
|
||||
const char *name;
|
||||
uint32_t newoff;
|
||||
int error;
|
||||
|
||||
if (vs->vs_flags & VIRTIO_USE_MSIX) {
|
||||
if (baridx == pci_msix_table_bar(pi) ||
|
||||
baridx == pci_msix_pba_bar(pi)) {
|
||||
pci_emul_msix_twrite(pi, offset, size, value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX probably should do something better than just assert() */
|
||||
assert(baridx == 0);
|
||||
|
||||
if (vs->vs_mtx)
|
||||
pthread_mutex_lock(vs->vs_mtx);
|
||||
|
||||
vc = vs->vs_vc;
|
||||
name = vc->vc_name;
|
||||
|
||||
if (size != 1 && size != 2 && size != 4)
|
||||
goto bad;
|
||||
|
||||
if (pci_msix_enabled(pi))
|
||||
virtio_config_size = VTCFG_R_CFG1;
|
||||
else
|
||||
virtio_config_size = VTCFG_R_CFG0;
|
||||
|
||||
if (offset >= virtio_config_size) {
|
||||
/*
|
||||
* Subtract off the standard size (including MSI-X
|
||||
* registers if enabled) and dispatch to underlying driver.
|
||||
*/
|
||||
newoff = offset - virtio_config_size;
|
||||
max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
|
||||
if (newoff + size > max)
|
||||
goto bad;
|
||||
error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), newoff, size, value);
|
||||
if (!error)
|
||||
goto done;
|
||||
}
|
||||
|
||||
bad:
|
||||
cr = vi_find_cr(offset);
|
||||
if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
|
||||
if (cr != NULL) {
|
||||
/* offset must be OK, wrong size and/or reg is R/O */
|
||||
if (cr->cr_size != size)
|
||||
fprintf(stderr,
|
||||
"%s: write to %s: bad size %d\r\n",
|
||||
name, cr->cr_name, size);
|
||||
if (cr->cr_ro)
|
||||
fprintf(stderr,
|
||||
"%s: write to read-only reg %s\r\n",
|
||||
name, cr->cr_name);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"%s: write to bad offset/size %jd/%d\r\n",
|
||||
name, (uintmax_t)offset, size);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (offset) {
|
||||
case VTCFG_R_GUESTCAP:
|
||||
vs->vs_negotiated_caps = value & vc->vc_hv_caps;
|
||||
if (vc->vc_apply_features)
|
||||
(*vc->vc_apply_features)(DEV_SOFTC(vs),
|
||||
vs->vs_negotiated_caps);
|
||||
break;
|
||||
case VTCFG_R_PFN:
|
||||
if (vs->vs_curq >= vc->vc_nvq)
|
||||
goto bad_qindex;
|
||||
vi_vq_init(vs, value);
|
||||
break;
|
||||
case VTCFG_R_QSEL:
|
||||
/*
|
||||
* Note that the guest is allowed to select an
|
||||
* invalid queue; we just need to return a QNUM
|
||||
* of 0 while the bad queue is selected.
|
||||
*/
|
||||
vs->vs_curq = value;
|
||||
break;
|
||||
case VTCFG_R_QNOTIFY:
|
||||
if (value >= vc->vc_nvq) {
|
||||
fprintf(stderr, "%s: queue %d notify out of range\r\n",
|
||||
name, (int)value);
|
||||
goto done;
|
||||
}
|
||||
vq = &vs->vs_queues[value];
|
||||
if (vq->vq_notify)
|
||||
(*vq->vq_notify)(DEV_SOFTC(vs), vq);
|
||||
else if (vc->vc_qnotify)
|
||||
(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
|
||||
else
|
||||
fprintf(stderr,
|
||||
"%s: qnotify queue %d: missing vq/vc notify\r\n",
|
||||
name, (int)value);
|
||||
break;
|
||||
case VTCFG_R_STATUS:
|
||||
vs->vs_status = value;
|
||||
if (value == 0)
|
||||
(*vc->vc_reset)(DEV_SOFTC(vs));
|
||||
break;
|
||||
case VTCFG_R_CFGVEC:
|
||||
vs->vs_msix_cfg_idx = value;
|
||||
break;
|
||||
case VTCFG_R_QVEC:
|
||||
if (vs->vs_curq >= vc->vc_nvq)
|
||||
goto bad_qindex;
|
||||
vq = &vs->vs_queues[vs->vs_curq];
|
||||
vq->vq_msix_idx = value;
|
||||
break;
|
||||
}
|
||||
goto done;
|
||||
|
||||
bad_qindex:
|
||||
fprintf(stderr,
|
||||
"%s: write config reg %s: curq %d >= max %d\r\n",
|
||||
name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
|
||||
done:
|
||||
if (vs->vs_mtx)
|
||||
pthread_mutex_unlock(vs->vs_mtx);
|
||||
}
|
464
virtio.h
Normal file
464
virtio.h
Normal file
@ -0,0 +1,464 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Chris Torek <torek @ torek net>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VIRTIO_H_
|
||||
#define _VIRTIO_H_
|
||||
|
||||
/*
|
||||
* These are derived from several virtio specifications.
|
||||
*
|
||||
* Some useful links:
|
||||
* https://github.com/rustyrussell/virtio-spec
|
||||
* http://people.redhat.com/pbonzini/virtio-spec.pdf
|
||||
*/
|
||||
|
||||
/*
|
||||
* A virtual device has zero or more "virtual queues" (virtqueue).
|
||||
* Each virtqueue uses at least two 4096-byte pages, laid out thus:
|
||||
*
|
||||
* +-----------------------------------------------+
|
||||
* | "desc": <N> descriptors, 16 bytes each |
|
||||
* | ----------------------------------------- |
|
||||
* | "avail": 2 uint16; <N> uint16; 1 uint16 |
|
||||
* | ----------------------------------------- |
|
||||
* | pad to 4k boundary |
|
||||
* +-----------------------------------------------+
|
||||
* | "used": 2 x uint16; <N> elems; 1 uint16 |
|
||||
* | ----------------------------------------- |
|
||||
* | pad to 4k boundary |
|
||||
* +-----------------------------------------------+
|
||||
*
|
||||
* The number <N> that appears here is always a power of two and is
|
||||
* limited to no more than 32768 (as it must fit in a 16-bit field).
|
||||
* If <N> is sufficiently large, the above will occupy more than
|
||||
* two pages. In any case, all pages must be physically contiguous
|
||||
* within the guest's physical address space.
|
||||
*
|
||||
* The <N> 16-byte "desc" descriptors consist of a 64-bit guest
|
||||
* physical address <addr>, a 32-bit length <len>, a 16-bit
|
||||
* <flags>, and a 16-bit <next> field (all in guest byte order).
|
||||
*
|
||||
* There are three flags that may be set :
|
||||
* NEXT descriptor is chained, so use its "next" field
|
||||
* WRITE descriptor is for host to write into guest RAM
|
||||
* (else host is to read from guest RAM)
|
||||
* INDIRECT descriptor address field is (guest physical)
|
||||
* address of a linear array of descriptors
|
||||
*
|
||||
* Unless INDIRECT is set, <len> is the number of bytes that may
|
||||
* be read/written from guest physical address <addr>. If
|
||||
* INDIRECT is set, WRITE is ignored and <len> provides the length
|
||||
* of the indirect descriptors (and <len> must be a multiple of
|
||||
* 16). Note that NEXT may still be set in the main descriptor
|
||||
* pointing to the indirect, and should be set in each indirect
|
||||
* descriptor that uses the next descriptor (these should generally
|
||||
* be numbered sequentially). However, INDIRECT must not be set
|
||||
* in the indirect descriptors. Upon reaching an indirect descriptor
|
||||
* without a NEXT bit, control returns to the direct descriptors.
|
||||
*
|
||||
* Except inside an indirect, each <next> value must be in the
|
||||
* range [0 .. N) (i.e., the half-open interval). (Inside an
|
||||
* indirect, each <next> must be in the range [0 .. <len>/16).)
|
||||
*
|
||||
* The "avail" data structures reside in the same pages as the
|
||||
* "desc" structures since both together are used by the device to
|
||||
* pass information to the hypervisor's virtual driver. These
|
||||
* begin with a 16-bit <flags> field and 16-bit index <idx>, then
|
||||
* have <N> 16-bit <ring> values, followed by one final 16-bit
|
||||
* field <used_event>. The <N> <ring> entries are simply indices
|
||||
* indices into the descriptor ring (and thus must meet the same
|
||||
* constraints as each <next> value). However, <idx> is counted
|
||||
* up from 0 (initially) and simply wraps around after 65535; it
|
||||
* is taken mod <N> to find the next available entry.
|
||||
*
|
||||
* The "used" ring occupies a separate page or pages, and contains
|
||||
* values written from the virtual driver back to the guest OS.
|
||||
* This begins with a 16-bit <flags> and 16-bit <idx>, then there
|
||||
* are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
|
||||
* The <N> "vring_used" elements consist of a 32-bit <id> and a
|
||||
* 32-bit <len> (vu_tlen below). The <id> is simply the index of
|
||||
* the head of a descriptor chain the guest made available
|
||||
* earlier, and the <len> is the number of bytes actually written,
|
||||
* e.g., in the case of a network driver that provided a large
|
||||
* receive buffer but received only a small amount of data.
|
||||
*
|
||||
* The two event fields, <used_event> and <avail_event>, in the
|
||||
* avail and used rings (respectively -- note the reversal!), are
|
||||
* always provided, but are used only if the virtual device
|
||||
* negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
|
||||
* negotiation. Similarly, both rings provide a flag --
|
||||
* VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
|
||||
* their <flags> field, indicating that the guest does not need an
|
||||
* interrupt, or that the hypervisor driver does not need a
|
||||
* notify, when descriptors are added to the corresponding ring.
|
||||
* (These are provided only for interrupt optimization and need
|
||||
* not be implemented.)
|
||||
*/
|
||||
#define VRING_ALIGN 4096
|
||||
|
||||
#define VRING_DESC_F_NEXT (1 << 0)
|
||||
#define VRING_DESC_F_WRITE (1 << 1)
|
||||
#define VRING_DESC_F_INDIRECT (1 << 2)
|
||||
|
||||
struct virtio_desc { /* AKA vring_desc */
|
||||
uint64_t vd_addr; /* guest physical address */
|
||||
uint32_t vd_len; /* length of scatter/gather seg */
|
||||
uint16_t vd_flags; /* VRING_F_DESC_* */
|
||||
uint16_t vd_next; /* next desc if F_NEXT */
|
||||
} __packed;
|
||||
|
||||
struct virtio_used { /* AKA vring_used_elem */
|
||||
uint32_t vu_idx; /* head of used descriptor chain */
|
||||
uint32_t vu_tlen; /* length written-to */
|
||||
} __packed;
|
||||
|
||||
#define VRING_AVAIL_F_NO_INTERRUPT 1
|
||||
|
||||
struct vring_avail {
|
||||
uint16_t va_flags; /* VRING_AVAIL_F_* */
|
||||
uint16_t va_idx; /* counts to 65535, then cycles */
|
||||
uint16_t va_ring[]; /* size N, reported in QNUM value */
|
||||
/* uint16_t va_used_event; -- after N ring entries */
|
||||
} __packed;
|
||||
|
||||
#define VRING_USED_F_NO_NOTIFY 1
|
||||
struct vring_used {
|
||||
uint16_t vu_flags; /* VRING_USED_F_* */
|
||||
uint16_t vu_idx; /* counts to 65535, then cycles */
|
||||
struct virtio_used vu_ring[]; /* size N */
|
||||
/* uint16_t vu_avail_event; -- after N ring entries */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The address of any given virtual queue is determined by a single
|
||||
* Page Frame Number register. The guest writes the PFN into the
|
||||
* PCI config space. However, a device that has two or more
|
||||
* virtqueues can have a different PFN, and size, for each queue.
|
||||
* The number of queues is determinable via the PCI config space
|
||||
* VTCFG_R_QSEL register. Writes to QSEL select the queue: 0 means
|
||||
* queue #0, 1 means queue#1, etc. Once a queue is selected, the
|
||||
* remaining PFN and QNUM registers refer to that queue.
|
||||
*
|
||||
* QNUM is a read-only register containing a nonzero power of two
|
||||
* that indicates the (hypervisor's) queue size. Or, if reading it
|
||||
* produces zero, the hypervisor does not have a corresponding
|
||||
* queue. (The number of possible queues depends on the virtual
|
||||
* device. The block device has just one; the network device
|
||||
* provides either two -- 0 = receive, 1 = transmit -- or three,
|
||||
* with 2 = control.)
|
||||
*
|
||||
* PFN is a read/write register giving the physical page address of
|
||||
* the virtqueue in guest memory (the guest must allocate enough space
|
||||
* based on the hypervisor's provided QNUM).
|
||||
*
|
||||
* QNOTIFY is effectively write-only: when the guest writes a queue
|
||||
* number to the register, the hypervisor should scan the specified
|
||||
* virtqueue. (Reading QNOTIFY currently always gets 0).
|
||||
*/
|
||||
|
||||
/*
|
||||
* PFN register shift amount
|
||||
*/
|
||||
#define VRING_PFN 12
|
||||
|
||||
/*
|
||||
* Virtio device types
|
||||
*
|
||||
* XXX Should really be merged with <dev/virtio/virtio.h> defines
|
||||
*/
|
||||
#define VIRTIO_TYPE_NET 1
|
||||
#define VIRTIO_TYPE_BLOCK 2
|
||||
#define VIRTIO_TYPE_CONSOLE 3
|
||||
#define VIRTIO_TYPE_ENTROPY 4
|
||||
#define VIRTIO_TYPE_BALLOON 5
|
||||
#define VIRTIO_TYPE_IOMEMORY 6
|
||||
#define VIRTIO_TYPE_RPMSG 7
|
||||
#define VIRTIO_TYPE_SCSI 8
|
||||
#define VIRTIO_TYPE_9P 9
|
||||
|
||||
/* experimental IDs start at 65535 and work down */
|
||||
|
||||
/*
|
||||
* PCI vendor/device IDs
|
||||
*/
|
||||
#define VIRTIO_VENDOR 0x1AF4
|
||||
#define VIRTIO_DEV_NET 0x1000
|
||||
#define VIRTIO_DEV_BLOCK 0x1001
|
||||
#define VIRTIO_DEV_RANDOM 0x1002
|
||||
|
||||
/*
|
||||
* PCI config space constants.
|
||||
*
|
||||
* If MSI-X is enabled, the ISR register is generally not used,
|
||||
* and the configuration vector and queue vector appear at offsets
|
||||
* 20 and 22 with the remaining configuration registers at 24.
|
||||
* If MSI-X is not enabled, those two registers disappear and
|
||||
* the remaining configuration registers start at offset 20.
|
||||
*/
|
||||
#define VTCFG_R_HOSTCAP 0
|
||||
#define VTCFG_R_GUESTCAP 4
|
||||
#define VTCFG_R_PFN 8
|
||||
#define VTCFG_R_QNUM 12
|
||||
#define VTCFG_R_QSEL 14
|
||||
#define VTCFG_R_QNOTIFY 16
|
||||
#define VTCFG_R_STATUS 18
|
||||
#define VTCFG_R_ISR 19
|
||||
#define VTCFG_R_CFGVEC 20
|
||||
#define VTCFG_R_QVEC 22
|
||||
#define VTCFG_R_CFG0 20 /* No MSI-X */
|
||||
#define VTCFG_R_CFG1 24 /* With MSI-X */
|
||||
#define VTCFG_R_MSIX 20
|
||||
|
||||
/*
|
||||
* Bits in VTCFG_R_STATUS. Guests need not actually set any of these,
|
||||
* but a guest writing 0 to this register means "please reset".
|
||||
*/
|
||||
#define VTCFG_STATUS_ACK 0x01 /* guest OS has acknowledged dev */
|
||||
#define VTCFG_STATUS_DRIVER 0x02 /* guest OS driver is loaded */
|
||||
#define VTCFG_STATUS_DRIVER_OK 0x04 /* guest OS driver ready */
|
||||
#define VTCFG_STATUS_FAILED 0x80 /* guest has given up on this dev */
|
||||
|
||||
/*
|
||||
* Bits in VTCFG_R_ISR. These apply only if not using MSI-X.
|
||||
*
|
||||
* (We don't [yet?] ever use CONF_CHANGED.)
|
||||
*/
|
||||
#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */
|
||||
#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */
|
||||
|
||||
#define VIRTIO_MSI_NO_VECTOR 0xFFFF
|
||||
|
||||
/*
|
||||
* Feature flags.
|
||||
* Note: bits 0 through 23 are reserved to each device type.
|
||||
*/
|
||||
#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
|
||||
#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28)
|
||||
#define VIRTIO_RING_F_EVENT_IDX (1 << 29)
|
||||
|
||||
/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
|
||||
static inline size_t
|
||||
vring_size(u_int qsz)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
/* constant 3 below = va_flags, va_idx, va_used_event */
|
||||
size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
|
||||
size = roundup2(size, VRING_ALIGN);
|
||||
|
||||
/* constant 3 below = vu_flags, vu_idx, vu_avail_event */
|
||||
size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
|
||||
size = roundup2(size, VRING_ALIGN);
|
||||
|
||||
return (size);
|
||||
}
|
||||
|
||||
struct vmctx;
|
||||
struct pci_devinst;
|
||||
struct vqueue_info;
|
||||
|
||||
/*
|
||||
* A virtual device, with some number (possibly 0) of virtual
|
||||
* queues and some size (possibly 0) of configuration-space
|
||||
* registers private to the device. The virtio_softc should come
|
||||
* at the front of each "derived class", so that a pointer to the
|
||||
* virtio_softc is also a pointer to the more specific, derived-
|
||||
* from-virtio driver's softc.
|
||||
*
|
||||
* Note: inside each hypervisor virtio driver, changes to these
|
||||
* data structures must be locked against other threads, if any.
|
||||
* Except for PCI config space register read/write, we assume each
|
||||
* driver does the required locking, but we need a pointer to the
|
||||
* lock (if there is one) for PCI config space read/write ops.
|
||||
*
|
||||
* When the guest reads or writes the device's config space, the
|
||||
* generic layer checks for operations on the special registers
|
||||
* described above. If the offset of the register(s) being read
|
||||
* or written is past the CFG area (CFG0 or CFG1), the request is
|
||||
* passed on to the virtual device, after subtracting off the
|
||||
* generic-layer size. (So, drivers can just use the offset as
|
||||
* an offset into "struct config", for instance.)
|
||||
*
|
||||
* (The virtio layer also makes sure that the read or write is to/
|
||||
* from a "good" config offset, hence vc_cfgsize, and on BAR #0.
|
||||
* However, the driver must verify the read or write size and offset
|
||||
* and that no one is writing a readonly register.)
|
||||
*
|
||||
* The BROKED flag ("this thing done gone and broked") is for future
|
||||
* use.
|
||||
*/
|
||||
#define VIRTIO_USE_MSIX 0x01
|
||||
#define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */
|
||||
#define VIRTIO_BROKED 0x08 /* ??? */
|
||||
|
||||
struct virtio_softc {
|
||||
struct virtio_consts *vs_vc; /* constants (see below) */
|
||||
int vs_flags; /* VIRTIO_* flags from above */
|
||||
pthread_mutex_t *vs_mtx; /* POSIX mutex, if any */
|
||||
struct pci_devinst *vs_pi; /* PCI device instance */
|
||||
uint32_t vs_negotiated_caps; /* negotiated capabilities */
|
||||
struct vqueue_info *vs_queues; /* one per vc_nvq */
|
||||
int vs_curq; /* current queue */
|
||||
uint8_t vs_status; /* value from last status write */
|
||||
uint8_t vs_isr; /* ISR flags, if not MSI-X */
|
||||
uint16_t vs_msix_cfg_idx; /* MSI-X vector for config event */
|
||||
};
|
||||
|
||||
#define VS_LOCK(vs) \
|
||||
do { \
|
||||
if (vs->vs_mtx) \
|
||||
pthread_mutex_lock(vs->vs_mtx); \
|
||||
} while (0)
|
||||
|
||||
#define VS_UNLOCK(vs) \
|
||||
do { \
|
||||
if (vs->vs_mtx) \
|
||||
pthread_mutex_unlock(vs->vs_mtx); \
|
||||
} while (0)
|
||||
|
||||
struct virtio_consts {
|
||||
const char *vc_name; /* name of driver (for diagnostics) */
|
||||
int vc_nvq; /* number of virtual queues */
|
||||
size_t vc_cfgsize; /* size of dev-specific config regs */
|
||||
void (*vc_reset)(void *); /* called on virtual device reset */
|
||||
void (*vc_qnotify)(void *, struct vqueue_info *);
|
||||
/* called on QNOTIFY if no VQ notify */
|
||||
int (*vc_cfgread)(void *, int, int, uint32_t *);
|
||||
/* called to read config regs */
|
||||
int (*vc_cfgwrite)(void *, int, int, uint32_t);
|
||||
/* called to write config regs */
|
||||
void (*vc_apply_features)(void *, uint64_t);
|
||||
/* called to apply negotiated features */
|
||||
uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
|
||||
};
|
||||
|
||||
/*
|
||||
* Data structure allocated (statically) per virtual queue.
|
||||
*
|
||||
* Drivers may change vq_qsize after a reset. When the guest OS
|
||||
* requests a device reset, the hypervisor first calls
|
||||
* vs->vs_vc->vc_reset(); then the data structure below is
|
||||
* reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
|
||||
*
|
||||
* The remaining fields should only be fussed-with by the generic
|
||||
* code.
|
||||
*
|
||||
* Note: the addresses of vq_desc, vq_avail, and vq_used are all
|
||||
* computable from each other, but it's a lot simpler if we just
|
||||
* keep a pointer to each one. The event indices are similarly
|
||||
* (but more easily) computable, and this time we'll compute them:
|
||||
* they're just XX_ring[N].
|
||||
*/
|
||||
#define VQ_ALLOC 0x01 /* set once we have a pfn */
|
||||
#define VQ_BROKED 0x02 /* ??? */
|
||||
struct vqueue_info {
|
||||
uint16_t vq_qsize; /* size of this queue (a power of 2) */
|
||||
void (*vq_notify)(void *, struct vqueue_info *);
|
||||
/* called instead of vc_notify, if not NULL */
|
||||
|
||||
struct virtio_softc *vq_vs; /* backpointer to softc */
|
||||
uint16_t vq_num; /* we're the num'th queue in the softc */
|
||||
|
||||
uint16_t vq_flags; /* flags (see above) */
|
||||
uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */
|
||||
uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */
|
||||
uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */
|
||||
|
||||
uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */
|
||||
|
||||
volatile struct virtio_desc *vq_desc; /* descriptor array */
|
||||
volatile struct vring_avail *vq_avail; /* the "avail" ring */
|
||||
volatile struct vring_used *vq_used; /* the "used" ring */
|
||||
|
||||
};
|
||||
/* as noted above, these are sort of backwards, name-wise */
|
||||
#define VQ_AVAIL_EVENT_IDX(vq) \
|
||||
(*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
|
||||
#define VQ_USED_EVENT_IDX(vq) \
|
||||
((vq)->vq_avail->va_ring[(vq)->vq_qsize])
|
||||
|
||||
/*
|
||||
* Is this ring ready for I/O?
|
||||
*/
|
||||
static inline int
|
||||
vq_ring_ready(struct vqueue_info *vq)
|
||||
{
|
||||
|
||||
return (vq->vq_flags & VQ_ALLOC);
|
||||
}
|
||||
|
||||
/*
|
||||
* Are there "available" descriptors? (This does not count
|
||||
* how many, just returns True if there are some.)
|
||||
*/
|
||||
static inline int
|
||||
vq_has_descs(struct vqueue_info *vq)
|
||||
{
|
||||
|
||||
return (vq_ring_ready(vq) && vq->vq_last_avail !=
|
||||
vq->vq_avail->va_idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deliver an interrupt to guest on the given virtual queue
|
||||
* (if possible, or a generic MSI interrupt if not using MSI-X).
|
||||
*/
|
||||
static inline void
|
||||
vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
|
||||
{
|
||||
|
||||
if (pci_msix_enabled(vs->vs_pi))
|
||||
pci_generate_msix(vs->vs_pi, vq->vq_msix_idx);
|
||||
else {
|
||||
VS_LOCK(vs);
|
||||
vs->vs_isr |= VTCFG_ISR_QUEUES;
|
||||
pci_generate_msi(vs->vs_pi, 0);
|
||||
pci_lintr_assert(vs->vs_pi);
|
||||
VS_UNLOCK(vs);
|
||||
}
|
||||
}
|
||||
|
||||
struct iovec;
|
||||
void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
|
||||
void *dev_softc, struct pci_devinst *pi,
|
||||
struct vqueue_info *queues);
|
||||
int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
|
||||
void vi_reset_dev(struct virtio_softc *);
|
||||
void vi_set_io_bar(struct virtio_softc *, int);
|
||||
|
||||
int vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
|
||||
struct iovec *iov, int n_iov, uint16_t *flags);
|
||||
void vq_retchain(struct vqueue_info *vq);
|
||||
void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
|
||||
void vq_endchains(struct vqueue_info *vq, int used_all_avail);
|
||||
|
||||
uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size);
|
||||
void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
|
||||
int baridx, uint64_t offset, int size, uint64_t value);
|
||||
#endif /* _VIRTIO_H_ */
|
230
xmsr.c
Normal file
230
xmsr.c
Normal file
@ -0,0 +1,230 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/specialreg.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "xmsr.h"
|
||||
|
||||
static int cpu_vendor_intel, cpu_vendor_amd;
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val)
|
||||
{
|
||||
|
||||
if (cpu_vendor_intel) {
|
||||
switch (num) {
|
||||
case 0xd04: /* Sandy Bridge uncore PMCs */
|
||||
case 0xc24:
|
||||
return (0);
|
||||
case MSR_BIOS_UPDT_TRIG:
|
||||
return (0);
|
||||
case MSR_BIOS_SIGN:
|
||||
return (0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (cpu_vendor_amd) {
|
||||
switch (num) {
|
||||
case MSR_HWCR:
|
||||
/*
|
||||
* Ignore writes to hardware configuration MSR.
|
||||
*/
|
||||
return (0);
|
||||
|
||||
case MSR_NB_CFG1:
|
||||
case MSR_IC_CFG:
|
||||
return (0); /* Ignore writes */
|
||||
|
||||
case MSR_PERFEVSEL0:
|
||||
case MSR_PERFEVSEL1:
|
||||
case MSR_PERFEVSEL2:
|
||||
case MSR_PERFEVSEL3:
|
||||
/* Ignore writes to the PerfEvtSel MSRs */
|
||||
return (0);
|
||||
|
||||
case MSR_K7_PERFCTR0:
|
||||
case MSR_K7_PERFCTR1:
|
||||
case MSR_K7_PERFCTR2:
|
||||
case MSR_K7_PERFCTR3:
|
||||
/* Ignore writes to the PerfCtr MSRs */
|
||||
return (0);
|
||||
|
||||
case MSR_P_STATE_CONTROL:
|
||||
/* Ignore write to change the P-state */
|
||||
return (0);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (cpu_vendor_intel) {
|
||||
switch (num) {
|
||||
case MSR_BIOS_SIGN:
|
||||
case MSR_IA32_PLATFORM_ID:
|
||||
case MSR_PKG_ENERGY_STATUS:
|
||||
case MSR_PP0_ENERGY_STATUS:
|
||||
case MSR_PP1_ENERGY_STATUS:
|
||||
case MSR_DRAM_ENERGY_STATUS:
|
||||
*val = 0;
|
||||
break;
|
||||
case MSR_RAPL_POWER_UNIT:
|
||||
/*
|
||||
* Use the default value documented in section
|
||||
* "RAPL Interfaces" in Intel SDM vol3.
|
||||
*/
|
||||
*val = 0x000a1003;
|
||||
break;
|
||||
default:
|
||||
error = -1;
|
||||
break;
|
||||
}
|
||||
} else if (cpu_vendor_amd) {
|
||||
switch (num) {
|
||||
case MSR_BIOS_SIGN:
|
||||
*val = 0;
|
||||
break;
|
||||
case MSR_HWCR:
|
||||
/*
|
||||
* Bios and Kernel Developer's Guides for AMD Families
|
||||
* 12H, 14H, 15H and 16H.
|
||||
*/
|
||||
*val = 0x01000010; /* Reset value */
|
||||
*val |= 1 << 9; /* MONITOR/MWAIT disable */
|
||||
break;
|
||||
|
||||
case MSR_NB_CFG1:
|
||||
case MSR_IC_CFG:
|
||||
/*
|
||||
* The reset value is processor family dependent so
|
||||
* just return 0.
|
||||
*/
|
||||
*val = 0;
|
||||
break;
|
||||
|
||||
case MSR_PERFEVSEL0:
|
||||
case MSR_PERFEVSEL1:
|
||||
case MSR_PERFEVSEL2:
|
||||
case MSR_PERFEVSEL3:
|
||||
/*
|
||||
* PerfEvtSel MSRs are not properly virtualized so just
|
||||
* return zero.
|
||||
*/
|
||||
*val = 0;
|
||||
break;
|
||||
|
||||
case MSR_K7_PERFCTR0:
|
||||
case MSR_K7_PERFCTR1:
|
||||
case MSR_K7_PERFCTR2:
|
||||
case MSR_K7_PERFCTR3:
|
||||
/*
|
||||
* PerfCtr MSRs are not properly virtualized so just
|
||||
* return zero.
|
||||
*/
|
||||
*val = 0;
|
||||
break;
|
||||
|
||||
case MSR_SMM_ADDR:
|
||||
case MSR_SMM_MASK:
|
||||
/*
|
||||
* Return the reset value defined in the AMD Bios and
|
||||
* Kernel Developer's Guide.
|
||||
*/
|
||||
*val = 0;
|
||||
break;
|
||||
|
||||
case MSR_P_STATE_LIMIT:
|
||||
case MSR_P_STATE_CONTROL:
|
||||
case MSR_P_STATE_STATUS:
|
||||
case MSR_P_STATE_CONFIG(0): /* P0 configuration */
|
||||
*val = 0;
|
||||
break;
|
||||
|
||||
/*
|
||||
* OpenBSD guests test bit 0 of this MSR to detect if the
|
||||
* workaround for erratum 721 is already applied.
|
||||
* http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf
|
||||
*/
|
||||
case 0xC0011029:
|
||||
*val = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
error = -1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
error = -1;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
init_msr(void)
|
||||
{
|
||||
int error;
|
||||
u_int regs[4];
|
||||
char cpu_vendor[13];
|
||||
|
||||
do_cpuid(0, regs);
|
||||
((u_int *)&cpu_vendor)[0] = regs[1];
|
||||
((u_int *)&cpu_vendor)[1] = regs[3];
|
||||
((u_int *)&cpu_vendor)[2] = regs[2];
|
||||
cpu_vendor[12] = '\0';
|
||||
|
||||
error = 0;
|
||||
if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
|
||||
cpu_vendor_amd = 1;
|
||||
} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
|
||||
cpu_vendor_intel = 1;
|
||||
} else {
|
||||
fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
|
||||
error = -1;
|
||||
}
|
||||
return (error);
|
||||
}
|
36
xmsr.h
Normal file
36
xmsr.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _XMSR_H_
|
||||
#define _XMSR_H_
|
||||
|
||||
int init_msr(void);
|
||||
int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
|
||||
int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user