AMD-vi: Fix IOMMU device interrupts being overridden

Currently, AMD-vi PCI-e passthrough will lead to the following lines in
dmesg:
"kernel: CPU0: local APIC error 0x40
ivhd0: Error: completion failed tail:0x720, head:0x0."

After some tracing, the problem is due to the interaction with
amdvi_alloc_intr_resources() and pci_driver_added(). In ivrs_drv, the
identification of AMD-vi IVHD is done by walking over the ACPI IVRS
table and ivhdX device_ts are added under the acpi bus, while there are
no driver handling the corresponding IOMMU PCI function. In
amdvi_alloc_intr_resources(), the MSI intr are allocated with the ivhdX
device_t instead of the IOMMU PCI function device_t. bus_setup_intr() is
called on ivhdX. the IOMMU pci function device_t is only used for
pci_enable_msi(). Since bus_setup_intr() is not called on IOMMU pci
function, the IOMMU PCI function device_t's dinfo->cfg.msi is never
updated to reflect the supposed msi_data and msi_addr. So the msi_data
and msi_addr stay in the value 0. When pci_driver_added() tried to loop
over the children of a pci bus, and do pci_cfg_restore() on each of
them, msi_addr and msi_data with value 0 will be written to the MSI
capability of the IOMMU pci function, thus explaining the errors in
dmesg.

This change includes an amdiommu driver which currently does attaching,
detaching and providing DEVMETHODs for setting up and tearing down
interrupt. The purpose of the driver is to prevent pci_driver_added()
from calling pci_cfg_restore() on the IOMMU PCI function device_t.
The introduction of the amdiommu driver handles allocation of an IRQ
resource within the IOMMU PCI function, so that the dinfo->cfg.msi is
populated.

This has been tested on EPYC Rome 7282 with Radeon 5700XT GPU.

Sponsored by:	The FreeBSD Foundation
Reviewed by:	jhb
Approved by:	philip (mentor)
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D28984
This commit is contained in:
Ka Ho Ng 2021-03-22 17:33:43 +08:00
parent ede14736fd
commit 74ada297e8
6 changed files with 253 additions and 81 deletions

View File

@ -0,0 +1,185 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 The FreeBSD Fondation
*
* Portions of this software were developed by Ka Ho Ng
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include "amdvi_priv.h"
#include "ivhd_if.h"
struct amdiommu_softc {
struct resource *event_res; /* Event interrupt resource. */
void *event_tag; /* Event interrupt tag. */
int event_rid;
};
static int amdiommu_probe(device_t);
static int amdiommu_attach(device_t);
static int amdiommu_detach(device_t);
static int ivhd_setup_intr(device_t, driver_intr_t, void *,
const char *);
static int ivhd_teardown_intr(device_t);
static device_method_t amdiommu_methods[] = {
/* device interface */
DEVMETHOD(device_probe, amdiommu_probe),
DEVMETHOD(device_attach, amdiommu_attach),
DEVMETHOD(device_detach, amdiommu_detach),
DEVMETHOD(ivhd_setup_intr, ivhd_setup_intr),
DEVMETHOD(ivhd_teardown_intr, ivhd_teardown_intr),
DEVMETHOD_END
};
static driver_t amdiommu_driver = {
"amdiommu",
amdiommu_methods,
sizeof(struct amdiommu_softc),
};
static int
amdiommu_probe(device_t dev)
{
int error;
int capoff;
/*
* Check base class and sub-class
*/
if (pci_get_class(dev) != PCIC_BASEPERIPH ||
pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU)
return (ENXIO);
/*
* A IOMMU capability block carries a 0Fh capid.
*/
error = pci_find_cap(dev, PCIY_SECDEV, &capoff);
if (error)
return (ENXIO);
/*
* bit [18:16] == 011b indicates the capability block is IOMMU
* capability block. If the field is not set to 011b, bail out.
*/
if ((pci_read_config(dev, capoff + 2, 2) & 0x7) != 0x3)
return (ENXIO);
return (BUS_PROBE_SPECIFIC);
}
static int
amdiommu_attach(device_t dev)
{
device_set_desc(dev, "AMD-Vi/IOMMU PCI function");
return (0);
}
static int
amdiommu_detach(device_t dev)
{
return (0);
}
static int
ivhd_setup_intr(device_t dev, driver_intr_t handler, void *arg,
const char *desc)
{
struct amdiommu_softc *sc;
int error, msicnt;
sc = device_get_softc(dev);
msicnt = 1;
if (sc->event_res != NULL)
panic("%s is called without intr teardown", __func__);
sc->event_rid = 1;
error = pci_alloc_msi(dev, &msicnt);
if (error) {
device_printf(dev, "Couldn't find event MSI IRQ resource.\n");
return (ENOENT);
}
sc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&sc->event_rid, RF_ACTIVE);
if (sc->event_res == NULL) {
device_printf(dev, "Unable to allocate event INTR resource.\n");
error = ENOMEM;
goto fail;
}
error = bus_setup_intr(dev, sc->event_res, INTR_TYPE_MISC | INTR_MPSAFE,
NULL, handler, arg, &sc->event_tag);
if (error) {
device_printf(dev, "Fail to setup event intr\n");
goto fail;
}
bus_describe_intr(dev, sc->event_res, sc->event_tag, "%s", desc);
return (0);
fail:
ivhd_teardown_intr(dev);
return (error);
}
static int
ivhd_teardown_intr(device_t dev)
{
struct amdiommu_softc *sc;
sc = device_get_softc(dev);
if (sc->event_res != NULL) {
bus_teardown_intr(dev, sc->event_res, sc->event_tag);
sc->event_tag = NULL;
}
if (sc->event_res != NULL) {
bus_release_resource(dev, SYS_RES_IRQ, sc->event_rid,
sc->event_res);
sc->event_res = NULL;
}
pci_release_msi(dev);
return (0);
}
static devclass_t amdiommu_devclass;
/* This driver has to be loaded before ivhd */
DRIVER_MODULE(amdiommu, pci, amdiommu_driver, amdiommu_devclass, 0, 0);
MODULE_DEPEND(amdiommu, pci, 1, 1, 1);

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <machine/pci_cfgreg.h>
#include "ivhd_if.h"
#include "pcib_if.h"
#include "io/iommu.h"
@ -772,99 +773,33 @@ amdvi_free_evt_intr_res(device_t dev)
{
struct amdvi_softc *softc;
device_t mmio_dev;
softc = device_get_softc(dev);
if (softc->event_tag != NULL) {
bus_teardown_intr(dev, softc->event_res, softc->event_tag);
}
if (softc->event_res != NULL) {
bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
softc->event_res);
}
bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
dev, 1, &softc->event_irq);
mmio_dev = softc->pci_dev;
IVHD_TEARDOWN_INTR(mmio_dev);
}
static bool
amdvi_alloc_intr_resources(struct amdvi_softc *softc)
{
struct amdvi_ctrl *ctrl;
device_t dev, pcib;
device_t mmio_dev;
uint64_t msi_addr;
uint32_t msi_data;
device_t dev, mmio_dev;
int err;
dev = softc->dev;
pcib = device_get_parent(device_get_parent(dev));
mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid),
PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid));
if (device_is_attached(mmio_dev)) {
device_printf(dev,
"warning: IOMMU device is claimed by another driver %s\n",
device_get_driver(mmio_dev)->name);
}
softc->event_irq = -1;
softc->event_rid = 0;
/*
* Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
* interrupt. XXX: Enable MSI/X support.
*/
err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
if (err) {
device_printf(dev,
"Couldn't find event MSI IRQ resource.\n");
return (ENOENT);
}
err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
softc->event_irq, 1);
if (err) {
device_printf(dev, "Couldn't set event MSI resource.\n");
return (ENXIO);
}
softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&softc->event_rid, RF_ACTIVE);
if (!softc->event_res) {
device_printf(dev,
"Unable to allocate event INTR resource.\n");
return (ENOMEM);
}
if (bus_setup_intr(dev, softc->event_res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
softc, &softc->event_tag)) {
device_printf(dev, "Fail to setup event intr\n");
bus_release_resource(softc->dev, SYS_RES_IRQ,
softc->event_rid, softc->event_res);
softc->event_res = NULL;
return (ENXIO);
}
bus_describe_intr(dev, softc->event_res, softc->event_tag,
"fault");
err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
&msi_data);
if (err) {
device_printf(dev,
"Event interrupt config failed, err=%d.\n",
err);
amdvi_free_evt_intr_res(softc->dev);
return (err);
}
mmio_dev = softc->pci_dev;
/* Clear interrupt status bits. */
ctrl = softc->ctrl;
ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
/* Now enable MSI interrupt. */
pci_enable_msi(mmio_dev, msi_addr, msi_data);
return (0);
err = IVHD_SETUP_INTR(mmio_dev, amdvi_event_intr, softc, "fault");
if (err)
device_printf(dev, "Interrupt setup failed on %s\n",
device_get_nameunit(mmio_dev));
return (err);
}
static void

View File

@ -375,17 +375,14 @@ enum IvrsType
struct amdvi_softc {
struct amdvi_ctrl *ctrl; /* Control area. */
device_t dev; /* IOMMU device. */
device_t pci_dev; /* IOMMU PCI function device. */
enum IvrsType ivhd_type; /* IOMMU IVHD type. */
bool iotlb; /* IOTLB supported by IOMMU */
struct amdvi_cmd *cmd; /* Command descriptor area. */
int cmd_max; /* Max number of commands. */
uint64_t cmp_data; /* Command completion write back. */
struct amdvi_event *event; /* Event descriptor area. */
struct resource *event_res; /* Event interrupt resource. */
void *event_tag; /* Event interrupt tag. */
int event_max; /* Max number of events. */
int event_irq;
int event_rid;
/* ACPI various flags. */
uint32_t ivhd_flag; /* ACPI IVHD flag. */
uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */

View File

@ -0,0 +1,46 @@
#-
# Copyright (c) 2021 The FreeBSD Fondation
#
# Portions of this software were developed by Ka Ho Ng
# under sponsorship from the FreeBSD Foundation.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
#
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/bus.h>
INTERFACE ivhd;
METHOD int setup_intr {
device_t dev;
driver_intr_t handler;
void *arg;
const char *desc;
};
METHOD int teardown_intr {
device_t dev;
};

View File

@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
* Copyright (c) 2021 The FreeBSD Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -44,6 +45,8 @@ __FBSDID("$FreeBSD$");
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include "io/iommu.h"
#include "amdvi_priv.h"
@ -628,6 +631,9 @@ ivhd_attach(device_t dev)
softc->dev = dev;
ivhd = ivhd_hdrs[unit];
KASSERT(ivhd, ("ivhd is NULL"));
softc->pci_dev = pci_find_bsf(PCI_RID2BUS(ivhd->Header.DeviceId),
PCI_RID2SLOT(ivhd->Header.DeviceId),
PCI_RID2FUNC(ivhd->Header.DeviceId));
softc->ivhd_type = ivhd->Header.Type;
softc->pci_seg = ivhd->PciSegmentGroup;

View File

@ -51,6 +51,9 @@ SRCS+= ept.c \
# amd-specific files
.PATH: ${SRCTOP}/sys/amd64/vmm/amd
SRCS+= vmcb.c \
amdiommu.c \
ivhd_if.c \
ivhd_if.h \
svm.c \
svm_support.S \
npt.c \