freebsd-nq/sys/dev/acpica/acpi_pci.c
Konstantin Belousov 86be9f0dd5 Import the driver for VT-d DMAR hardware, as specified in the revision
1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture
Specification.  The Extended Context and PASIDs from the rev. 2.2 are
not supported, but I am not aware of any released hardware which
implements them.  Code does not use queued invalidation, see comments
for the reason, and does not provide interrupt remapping services.

Code implements the management of the guest address space per domain
and allows to establish and tear down arbitrary mappings, but not
partial unmapping.  The superpages are created as needed, but not
promoted.  Faults are recorded, fault records could be obtained
programmatically, and printed on the console.

Implement the busdma(9) using DMARs.  This busdma backend avoids
bouncing and provides security against misbehaving hardware and driver
bad programming, preventing leaks and corruption of the memory by wild
DMA accesses.

By default, the implementation is compiled into amd64 GENERIC kernel
but disabled; to enable, set hw.dmar.enable=1 loader tunable.  Code is
written to work on i386, but testing there was low priority, and
driver is not enabled in GENERIC.  Even with the DMAR turned on,
individual devices could be directed to use the bounce busdma with the
hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable.  If
DMARs are capable of the pass-through translations, it is used,
otherwise, an identity-mapping page table is constructed.

The driver was tested on Xeon 5400/5500 chipset legacy machine,
Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4),
ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices.  It also
works with em(4) and igb(4), but there some fixes are needed for
drivers, which are not committed yet.  Intel GPUs do not work with
DMAR (yet).

Many thanks to John Baldwin, who explained me the newbus integration;
Peter Holm, who did all testing and helped me to discover and
understand several incredible bugs; and to Jim Harris for the access
to the EDS and BWG and for listening when I have to explain my
findings to somebody.

Sponsored by:	The FreeBSD Foundation
MFC after:	1 month
2013-10-28 13:33:29 +00:00

340 lines
9.9 KiB
C

/*-
* Copyright (c) 1997, Stefan Esser <se@freebsd.org>
* Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
* Copyright (c) 2000, BSDi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_acpi.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <sys/pciio.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
#include "pcib_if.h"
#include "pci_if.h"
/* Hooks for the ACPI CA debugging infrastructure. */
#define _COMPONENT ACPI_BUS
ACPI_MODULE_NAME("PCI")
struct acpi_pci_devinfo {
struct pci_devinfo ap_dinfo;
ACPI_HANDLE ap_handle;
int ap_flags;
};
ACPI_SERIAL_DECL(pci_powerstate, "ACPI PCI power methods");
/* Be sure that ACPI and PCI power states are equivalent. */
CTASSERT(ACPI_STATE_D0 == PCI_POWERSTATE_D0);
CTASSERT(ACPI_STATE_D1 == PCI_POWERSTATE_D1);
CTASSERT(ACPI_STATE_D2 == PCI_POWERSTATE_D2);
CTASSERT(ACPI_STATE_D3 == PCI_POWERSTATE_D3);
static int acpi_pci_attach(device_t dev);
static int acpi_pci_child_location_str_method(device_t cbdev,
device_t child, char *buf, size_t buflen);
static int acpi_pci_probe(device_t dev);
static int acpi_pci_read_ivar(device_t dev, device_t child, int which,
uintptr_t *result);
static int acpi_pci_write_ivar(device_t dev, device_t child, int which,
uintptr_t value);
static ACPI_STATUS acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level,
void *context, void **status);
static int acpi_pci_set_powerstate_method(device_t dev, device_t child,
int state);
static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
static device_method_t acpi_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, acpi_pci_probe),
DEVMETHOD(device_attach, acpi_pci_attach),
/* Bus interface */
DEVMETHOD(bus_read_ivar, acpi_pci_read_ivar),
DEVMETHOD(bus_write_ivar, acpi_pci_write_ivar),
DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method),
DEVMETHOD(bus_get_dma_tag, acpi_pci_get_dma_tag),
/* PCI interface */
DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method),
DEVMETHOD_END
};
static devclass_t pci_devclass;
DEFINE_CLASS_1(pci, acpi_pci_driver, acpi_pci_methods, sizeof(struct pci_softc),
pci_driver);
DRIVER_MODULE(acpi_pci, pcib, acpi_pci_driver, pci_devclass, 0, 0);
MODULE_DEPEND(acpi_pci, acpi, 1, 1, 1);
MODULE_DEPEND(acpi_pci, pci, 1, 1, 1);
MODULE_VERSION(acpi_pci, 1);
static int
acpi_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
{
struct acpi_pci_devinfo *dinfo;
dinfo = device_get_ivars(child);
switch (which) {
case ACPI_IVAR_HANDLE:
*result = (uintptr_t)dinfo->ap_handle;
return (0);
case ACPI_IVAR_FLAGS:
*result = (uintptr_t)dinfo->ap_flags;
return (0);
}
return (pci_read_ivar(dev, child, which, result));
}
static int
acpi_pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
{
struct acpi_pci_devinfo *dinfo;
dinfo = device_get_ivars(child);
switch (which) {
case ACPI_IVAR_HANDLE:
dinfo->ap_handle = (ACPI_HANDLE)value;
return (0);
case ACPI_IVAR_FLAGS:
dinfo->ap_flags = (int)value;
return (0);
}
return (pci_write_ivar(dev, child, which, value));
}
static int
acpi_pci_child_location_str_method(device_t cbdev, device_t child, char *buf,
size_t buflen)
{
struct acpi_pci_devinfo *dinfo = device_get_ivars(child);
pci_child_location_str_method(cbdev, child, buf, buflen);
if (dinfo->ap_handle) {
strlcat(buf, " handle=", buflen);
strlcat(buf, acpi_name(dinfo->ap_handle), buflen);
}
return (0);
}
/*
* PCI power manangement
*/
static int
acpi_pci_set_powerstate_method(device_t dev, device_t child, int state)
{
ACPI_HANDLE h;
ACPI_STATUS status;
int old_state, error;
error = 0;
if (state < ACPI_STATE_D0 || state > ACPI_STATE_D3)
return (EINVAL);
/*
* We set the state using PCI Power Management outside of setting
* the ACPI state. This means that when powering down a device, we
* first shut it down using PCI, and then using ACPI, which lets ACPI
* try to power down any Power Resources that are now no longer used.
* When powering up a device, we let ACPI set the state first so that
* it can enable any needed Power Resources before changing the PCI
* power state.
*/
ACPI_SERIAL_BEGIN(pci_powerstate);
old_state = pci_get_powerstate(child);
if (old_state < state && pci_do_power_suspend) {
error = pci_set_powerstate_method(dev, child, state);
if (error)
goto out;
}
h = acpi_get_handle(child);
status = acpi_pwr_switch_consumer(h, state);
if (ACPI_SUCCESS(status)) {
if (bootverbose)
device_printf(dev, "set ACPI power state D%d on %s\n",
state, acpi_name(h));
} else if (status != AE_NOT_FOUND)
device_printf(dev,
"failed to set ACPI power state D%d on %s: %s\n",
state, acpi_name(h), AcpiFormatException(status));
if (old_state > state && pci_do_power_resume)
error = pci_set_powerstate_method(dev, child, state);
out:
ACPI_SERIAL_END(pci_powerstate);
return (error);
}
static void
acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child)
{
ACPI_STATUS status;
device_t child;
/*
* Occasionally a PCI device may show up as an ACPI device
* with a _HID. (For example, the TabletPC TC1000 has a
* second PCI-ISA bridge that has a _HID for an
* acpi_sysresource device.) In that case, leave ACPI-CA's
* device data pointing at the ACPI-enumerated device.
*/
child = acpi_get_device(handle);
if (child != NULL) {
KASSERT(device_get_parent(child) ==
devclass_get_device(devclass_find("acpi"), 0),
("%s: child (%s)'s parent is not acpi0", __func__,
acpi_name(handle)));
return;
}
/*
* Update ACPI-CA to use the PCI enumerated device_t for this handle.
*/
status = AcpiAttachData(handle, acpi_fake_objhandler, pci_child);
if (ACPI_FAILURE(status))
printf("WARNING: Unable to attach object data to %s - %s\n",
acpi_name(handle), AcpiFormatException(status));
}
static ACPI_STATUS
acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level, void *context,
void **status)
{
struct acpi_pci_devinfo *dinfo;
device_t *devlist;
int devcount, i, func, slot;
UINT32 address;
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
if (ACPI_FAILURE(acpi_GetInteger(handle, "_ADR", &address)))
return_ACPI_STATUS (AE_OK);
slot = ACPI_ADR_PCI_SLOT(address);
func = ACPI_ADR_PCI_FUNC(address);
if (device_get_children((device_t)context, &devlist, &devcount) != 0)
return_ACPI_STATUS (AE_OK);
for (i = 0; i < devcount; i++) {
dinfo = device_get_ivars(devlist[i]);
if (dinfo->ap_dinfo.cfg.func == func &&
dinfo->ap_dinfo.cfg.slot == slot) {
dinfo->ap_handle = handle;
acpi_pci_update_device(handle, devlist[i]);
break;
}
}
free(devlist, M_TEMP);
return_ACPI_STATUS (AE_OK);
}
static int
acpi_pci_probe(device_t dev)
{
if (acpi_get_handle(dev) == NULL)
return (ENXIO);
device_set_desc(dev, "ACPI PCI bus");
return (0);
}
static int
acpi_pci_attach(device_t dev)
{
int busno, domain, error;
error = pci_attach_common(dev);
if (error)
return (error);
/*
* Since there can be multiple independantly numbered PCI
* busses on systems with multiple PCI domains, we can't use
* the unit number to decide which bus we are probing. We ask
* the parent pcib what our domain and bus numbers are.
*/
domain = pcib_get_domain(dev);
busno = pcib_get_bus(dev);
/*
* First, PCI devices are added as in the normal PCI bus driver.
* Afterwards, the ACPI namespace under the bridge driver is
* walked to save ACPI handles to all the devices that appear in
* the ACPI namespace as immediate descendants of the bridge.
*
* XXX: Sometimes PCI devices show up in the ACPI namespace that
* pci_add_children() doesn't find. We currently just ignore
* these devices.
*/
pci_add_children(dev, domain, busno, sizeof(struct acpi_pci_devinfo));
AcpiWalkNamespace(ACPI_TYPE_DEVICE, acpi_get_handle(dev), 1,
acpi_pci_save_handle, NULL, dev, NULL);
return (bus_generic_attach(dev));
}
#ifdef ACPI_DMAR
bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
static bus_dma_tag_t
acpi_pci_get_dma_tag(device_t bus, device_t child)
{
bus_dma_tag_t tag;
if (device_get_parent(child) == bus) {
/* try dmar and return if it works */
tag = dmar_get_dma_tag(bus, child);
} else
tag = NULL;
if (tag == NULL)
tag = pci_get_dma_tag(bus, child);
return (tag);
}
#else
static bus_dma_tag_t
acpi_pci_get_dma_tag(device_t bus, device_t child)
{
return (pci_get_dma_tag(bus, child));
}
#endif