freebsd-nq/sys/dev/acpica/acpi_pci.c

348 lines
10 KiB
C
Raw Normal View History

/*-
* Copyright (c) 1997, Stefan Esser <se@freebsd.org>
* Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
* Copyright (c) 2000, BSDi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
Import the driver for VT-d DMAR hardware, as specified in the revision 1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture Specification. The Extended Context and PASIDs from the rev. 2.2 are not supported, but I am not aware of any released hardware which implements them. Code does not use queued invalidation, see comments for the reason, and does not provide interrupt remapping services. Code implements the management of the guest address space per domain and allows to establish and tear down arbitrary mappings, but not partial unmapping. The superpages are created as needed, but not promoted. Faults are recorded, fault records could be obtained programmatically, and printed on the console. Implement the busdma(9) using DMARs. This busdma backend avoids bouncing and provides security against misbehaving hardware and driver bad programming, preventing leaks and corruption of the memory by wild DMA accesses. By default, the implementation is compiled into amd64 GENERIC kernel but disabled; to enable, set hw.dmar.enable=1 loader tunable. Code is written to work on i386, but testing there was low priority, and driver is not enabled in GENERIC. Even with the DMAR turned on, individual devices could be directed to use the bounce busdma with the hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable. If DMARs are capable of the pass-through translations, it is used, otherwise, an identity-mapping page table is constructed. The driver was tested on Xeon 5400/5500 chipset legacy machine, Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4), ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices. It also works with em(4) and igb(4), but there some fixes are needed for drivers, which are not committed yet. Intel GPUs do not work with DMAR (yet). Many thanks to John Baldwin, who explained me the newbus integration; Peter Holm, who did all testing and helped me to discover and understand several incredible bugs; and to Jim Harris for the access to the EDS and BWG and for listening when I have to explain my findings to somebody. Sponsored by: The FreeBSD Foundation MFC after: 1 month
2013-10-28 13:33:29 +00:00
#include "opt_acpi.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
2009-06-05 18:44:36 +00:00
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <sys/pciio.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
#include "pcib_if.h"
#include "pci_if.h"
/* Hooks for the ACPI CA debugging infrastructure. */
#define _COMPONENT ACPI_BUS
ACPI_MODULE_NAME("PCI")
struct acpi_pci_devinfo {
struct pci_devinfo ap_dinfo;
ACPI_HANDLE ap_handle;
int ap_flags;
};
ACPI_SERIAL_DECL(pci_powerstate, "ACPI PCI power methods");
/* Be sure that ACPI and PCI power states are equivalent. */
CTASSERT(ACPI_STATE_D0 == PCI_POWERSTATE_D0);
CTASSERT(ACPI_STATE_D1 == PCI_POWERSTATE_D1);
CTASSERT(ACPI_STATE_D2 == PCI_POWERSTATE_D2);
CTASSERT(ACPI_STATE_D3 == PCI_POWERSTATE_D3);
static int acpi_pci_attach(device_t dev);
static int acpi_pci_child_location_str_method(device_t cbdev,
device_t child, char *buf, size_t buflen);
static int acpi_pci_probe(device_t dev);
static int acpi_pci_read_ivar(device_t dev, device_t child, int which,
uintptr_t *result);
static int acpi_pci_write_ivar(device_t dev, device_t child, int which,
uintptr_t value);
static ACPI_STATUS acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level,
void *context, void **status);
static int acpi_pci_set_powerstate_method(device_t dev, device_t child,
int state);
static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
Import the driver for VT-d DMAR hardware, as specified in the revision 1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture Specification. The Extended Context and PASIDs from the rev. 2.2 are not supported, but I am not aware of any released hardware which implements them. Code does not use queued invalidation, see comments for the reason, and does not provide interrupt remapping services. Code implements the management of the guest address space per domain and allows to establish and tear down arbitrary mappings, but not partial unmapping. The superpages are created as needed, but not promoted. Faults are recorded, fault records could be obtained programmatically, and printed on the console. Implement the busdma(9) using DMARs. This busdma backend avoids bouncing and provides security against misbehaving hardware and driver bad programming, preventing leaks and corruption of the memory by wild DMA accesses. By default, the implementation is compiled into amd64 GENERIC kernel but disabled; to enable, set hw.dmar.enable=1 loader tunable. Code is written to work on i386, but testing there was low priority, and driver is not enabled in GENERIC. Even with the DMAR turned on, individual devices could be directed to use the bounce busdma with the hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable. If DMARs are capable of the pass-through translations, it is used, otherwise, an identity-mapping page table is constructed. The driver was tested on Xeon 5400/5500 chipset legacy machine, Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4), ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices. It also works with em(4) and igb(4), but there some fixes are needed for drivers, which are not committed yet. Intel GPUs do not work with DMAR (yet). Many thanks to John Baldwin, who explained me the newbus integration; Peter Holm, who did all testing and helped me to discover and understand several incredible bugs; and to Jim Harris for the access to the EDS and BWG and for listening when I have to explain my findings to somebody. Sponsored by: The FreeBSD Foundation MFC after: 1 month
2013-10-28 13:33:29 +00:00
static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
static device_method_t acpi_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, acpi_pci_probe),
DEVMETHOD(device_attach, acpi_pci_attach),
/* Bus interface */
DEVMETHOD(bus_read_ivar, acpi_pci_read_ivar),
DEVMETHOD(bus_write_ivar, acpi_pci_write_ivar),
DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method),
Import the driver for VT-d DMAR hardware, as specified in the revision 1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture Specification. The Extended Context and PASIDs from the rev. 2.2 are not supported, but I am not aware of any released hardware which implements them. Code does not use queued invalidation, see comments for the reason, and does not provide interrupt remapping services. Code implements the management of the guest address space per domain and allows to establish and tear down arbitrary mappings, but not partial unmapping. The superpages are created as needed, but not promoted. Faults are recorded, fault records could be obtained programmatically, and printed on the console. Implement the busdma(9) using DMARs. This busdma backend avoids bouncing and provides security against misbehaving hardware and driver bad programming, preventing leaks and corruption of the memory by wild DMA accesses. By default, the implementation is compiled into amd64 GENERIC kernel but disabled; to enable, set hw.dmar.enable=1 loader tunable. Code is written to work on i386, but testing there was low priority, and driver is not enabled in GENERIC. Even with the DMAR turned on, individual devices could be directed to use the bounce busdma with the hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable. If DMARs are capable of the pass-through translations, it is used, otherwise, an identity-mapping page table is constructed. The driver was tested on Xeon 5400/5500 chipset legacy machine, Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4), ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices. It also works with em(4) and igb(4), but there some fixes are needed for drivers, which are not committed yet. Intel GPUs do not work with DMAR (yet). Many thanks to John Baldwin, who explained me the newbus integration; Peter Holm, who did all testing and helped me to discover and understand several incredible bugs; and to Jim Harris for the access to the EDS and BWG and for listening when I have to explain my findings to somebody. Sponsored by: The FreeBSD Foundation MFC after: 1 month
2013-10-28 13:33:29 +00:00
DEVMETHOD(bus_get_dma_tag, acpi_pci_get_dma_tag),
DEVMETHOD(bus_get_domain, acpi_get_domain),
/* PCI interface */
DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method),
DEVMETHOD_END
};
static devclass_t pci_devclass;
DEFINE_CLASS_1(pci, acpi_pci_driver, acpi_pci_methods, sizeof(struct pci_softc),
pci_driver);
DRIVER_MODULE(acpi_pci, pcib, acpi_pci_driver, pci_devclass, 0, 0);
MODULE_DEPEND(acpi_pci, acpi, 1, 1, 1);
MODULE_DEPEND(acpi_pci, pci, 1, 1, 1);
MODULE_VERSION(acpi_pci, 1);
static int
acpi_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
{
struct acpi_pci_devinfo *dinfo;
dinfo = device_get_ivars(child);
switch (which) {
case ACPI_IVAR_HANDLE:
*result = (uintptr_t)dinfo->ap_handle;
return (0);
case ACPI_IVAR_FLAGS:
*result = (uintptr_t)dinfo->ap_flags;
return (0);
}
return (pci_read_ivar(dev, child, which, result));
}
static int
acpi_pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
{
struct acpi_pci_devinfo *dinfo;
dinfo = device_get_ivars(child);
switch (which) {
case ACPI_IVAR_HANDLE:
dinfo->ap_handle = (ACPI_HANDLE)value;
return (0);
case ACPI_IVAR_FLAGS:
dinfo->ap_flags = (int)value;
return (0);
}
return (pci_write_ivar(dev, child, which, value));
}
static int
acpi_pci_child_location_str_method(device_t cbdev, device_t child, char *buf,
size_t buflen)
{
struct acpi_pci_devinfo *dinfo = device_get_ivars(child);
int pxm;
char buf2[32];
pci_child_location_str_method(cbdev, child, buf, buflen);
if (dinfo->ap_handle) {
strlcat(buf, " handle=", buflen);
strlcat(buf, acpi_name(dinfo->ap_handle), buflen);
if (ACPI_SUCCESS(acpi_GetInteger(dinfo->ap_handle, "_PXM", &pxm))) {
snprintf(buf2, 32, " _PXM=%d", pxm);
strlcat(buf, buf2, buflen);
}
}
return (0);
}
/*
* PCI power manangement
*/
static int
acpi_pci_set_powerstate_method(device_t dev, device_t child, int state)
{
ACPI_HANDLE h;
ACPI_STATUS status;
int old_state, error;
error = 0;
if (state < ACPI_STATE_D0 || state > ACPI_STATE_D3)
return (EINVAL);
/*
* We set the state using PCI Power Management outside of setting
* the ACPI state. This means that when powering down a device, we
* first shut it down using PCI, and then using ACPI, which lets ACPI
* try to power down any Power Resources that are now no longer used.
* When powering up a device, we let ACPI set the state first so that
* it can enable any needed Power Resources before changing the PCI
* power state.
*/
ACPI_SERIAL_BEGIN(pci_powerstate);
old_state = pci_get_powerstate(child);
if (old_state < state && pci_do_power_suspend) {
error = pci_set_powerstate_method(dev, child, state);
if (error)
goto out;
}
2004-06-07 21:44:01 +00:00
h = acpi_get_handle(child);
status = acpi_pwr_switch_consumer(h, state);
if (ACPI_SUCCESS(status)) {
if (bootverbose)
device_printf(dev, "set ACPI power state D%d on %s\n",
state, acpi_name(h));
} else if (status != AE_NOT_FOUND)
device_printf(dev,
"failed to set ACPI power state D%d on %s: %s\n",
state, acpi_name(h), AcpiFormatException(status));
if (old_state > state && pci_do_power_resume)
error = pci_set_powerstate_method(dev, child, state);
out:
ACPI_SERIAL_END(pci_powerstate);
return (error);
}
static void
acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child)
{
ACPI_STATUS status;
device_t child;
/*
Don't create a device_t object or parse current resources (via _CRS) for ACPI Device() objects that do not have any device IDs available via the _HID or _CID methods. Without a device ID a device driver cannot attach to the device anyway. Namespace objects that are devices but not of type ACPI_TYPE_DEVICE are not affected. A few BIOSes have also attached a _CRS method to a PCI device to allocate resources that are not managed via a BAR. With the previous code those resources are allocated from acpi0 directly which can interfere with the new PCI-PCI bridge driver (since the PCI device in question may be behind a bridge and its resources should be allocated from that bridge's windows instead). The resources were also orphaned and and would end up associated with some other random device whose device_t reused the pointer of the original ACPI-enumerated device (after it was free'd by the ACPI PCI bus driver) in devinfo output which was confusing. If we want to handle _CRS on PCI devices we can adjust the ACPI PCI bus driver to do that in the future and associate the resources with the proper device object respecting PCI-PCI bridges, etc. Note that with this change the ACPI PCI bus driver no longer has to delete ACPI-enumerated device_t devices that mirror PCI devices since they should in general not exist. There are rare cases when a BIOS will give a PCI device a _HID (e.g. I've seen a PCI-ISA bridge given a _HID for a system resource device). In that case we leave both the ACPI and PCI-enumerated device_t objects around just as in the previous code.
2011-06-17 21:19:01 +00:00
* Occasionally a PCI device may show up as an ACPI device
* with a _HID. (For example, the TabletPC TC1000 has a
* second PCI-ISA bridge that has a _HID for an
* acpi_sysresource device.) In that case, leave ACPI-CA's
* device data pointing at the ACPI-enumerated device.
*/
child = acpi_get_device(handle);
if (child != NULL) {
KASSERT(device_get_parent(child) ==
devclass_get_device(devclass_find("acpi"), 0),
("%s: child (%s)'s parent is not acpi0", __func__,
acpi_name(handle)));
Don't create a device_t object or parse current resources (via _CRS) for ACPI Device() objects that do not have any device IDs available via the _HID or _CID methods. Without a device ID a device driver cannot attach to the device anyway. Namespace objects that are devices but not of type ACPI_TYPE_DEVICE are not affected. A few BIOSes have also attached a _CRS method to a PCI device to allocate resources that are not managed via a BAR. With the previous code those resources are allocated from acpi0 directly which can interfere with the new PCI-PCI bridge driver (since the PCI device in question may be behind a bridge and its resources should be allocated from that bridge's windows instead). The resources were also orphaned and and would end up associated with some other random device whose device_t reused the pointer of the original ACPI-enumerated device (after it was free'd by the ACPI PCI bus driver) in devinfo output which was confusing. If we want to handle _CRS on PCI devices we can adjust the ACPI PCI bus driver to do that in the future and associate the resources with the proper device object respecting PCI-PCI bridges, etc. Note that with this change the ACPI PCI bus driver no longer has to delete ACPI-enumerated device_t devices that mirror PCI devices since they should in general not exist. There are rare cases when a BIOS will give a PCI device a _HID (e.g. I've seen a PCI-ISA bridge given a _HID for a system resource device). In that case we leave both the ACPI and PCI-enumerated device_t objects around just as in the previous code.
2011-06-17 21:19:01 +00:00
return;
}
/*
* Update ACPI-CA to use the PCI enumerated device_t for this handle.
*/
status = AcpiAttachData(handle, acpi_fake_objhandler, pci_child);
if (ACPI_FAILURE(status))
printf("WARNING: Unable to attach object data to %s - %s\n",
acpi_name(handle), AcpiFormatException(status));
}
static ACPI_STATUS
acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level, void *context,
void **status)
{
struct acpi_pci_devinfo *dinfo;
device_t *devlist;
int devcount, i, func, slot;
UINT32 address;
ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
if (ACPI_FAILURE(acpi_GetInteger(handle, "_ADR", &address)))
return_ACPI_STATUS (AE_OK);
slot = ACPI_ADR_PCI_SLOT(address);
func = ACPI_ADR_PCI_FUNC(address);
if (device_get_children((device_t)context, &devlist, &devcount) != 0)
return_ACPI_STATUS (AE_OK);
for (i = 0; i < devcount; i++) {
dinfo = device_get_ivars(devlist[i]);
if (dinfo->ap_dinfo.cfg.func == func &&
dinfo->ap_dinfo.cfg.slot == slot) {
dinfo->ap_handle = handle;
acpi_pci_update_device(handle, devlist[i]);
break;
}
}
free(devlist, M_TEMP);
return_ACPI_STATUS (AE_OK);
}
static int
acpi_pci_probe(device_t dev)
{
if (acpi_get_handle(dev) == NULL)
return (ENXIO);
device_set_desc(dev, "ACPI PCI bus");
msi: add Xen MSI implementation This patch adds support for MSI interrupts when running on Xen. Apart from adding the Xen related code needed in order to register MSI interrupts this patch also makes the msi_init function a hook in init_ops, so different MSI implementations can have different initialization functions. Sponsored by: Citrix Systems R&D xen/interface/physdev.h: - Add the MAP_PIRQ_TYPE_MULTI_MSI to map multi-vector MSI to the Xen public interface. x86/include/init.h: - Add a hook for setting custom msi_init methods. amd64/amd64/machdep.c: i386/i386/machdep.c: - Set the default msi_init hook to point to the native MSI initialization method. x86/xen/pv.c: - Set the Xen MSI init hook when running as a Xen guest. x86/x86/local_apic.c: - Call the msi_init hook instead of directly calling msi_init. xen/xen_intr.h: x86/xen/xen_intr.c: - Introduce support for registering/releasing MSI interrupts with Xen. - The MSI interrupts will use the same PIC as the IO APIC interrupts. xen/xen_msi.h: x86/xen/xen_msi.c: - Introduce a Xen MSI implementation. x86/xen/xen_nexus.c: - Overwrite the default MSI hooks in the Xen Nexus to use the Xen MSI implementation. x86/xen/xen_pci.c: - Introduce a Xen specific PCI bus that inherits from the ACPI PCI bus and overwrites the native MSI methods. - This is needed because when running under Xen the MSI messages used to configure MSI interrupts on PCI devices are written by Xen itself. dev/acpica/acpi_pci.c: - Lower the quality of the ACPI PCI bus so the newly introduced Xen PCI bus can take over when needed. conf/files.i386: conf/files.amd64: - Add the newly created files to the build process.
2014-09-30 16:46:45 +00:00
return (BUS_PROBE_DEFAULT);
}
static int
acpi_pci_attach(device_t dev)
{
int busno, domain, error;
error = pci_attach_common(dev);
if (error)
return (error);
/*
* Since there can be multiple independantly numbered PCI
* busses on systems with multiple PCI domains, we can't use
* the unit number to decide which bus we are probing. We ask
* the parent pcib what our domain and bus numbers are.
*/
domain = pcib_get_domain(dev);
busno = pcib_get_bus(dev);
/*
* First, PCI devices are added as in the normal PCI bus driver.
* Afterwards, the ACPI namespace under the bridge driver is
* walked to save ACPI handles to all the devices that appear in
* the ACPI namespace as immediate descendants of the bridge.
*
* XXX: Sometimes PCI devices show up in the ACPI namespace that
* pci_add_children() doesn't find. We currently just ignore
* these devices.
*/
pci_add_children(dev, domain, busno, sizeof(struct acpi_pci_devinfo));
AcpiWalkNamespace(ACPI_TYPE_DEVICE, acpi_get_handle(dev), 1,
2009-11-16 21:47:12 +00:00
acpi_pci_save_handle, NULL, dev, NULL);
return (bus_generic_attach(dev));
}
Import the driver for VT-d DMAR hardware, as specified in the revision 1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture Specification. The Extended Context and PASIDs from the rev. 2.2 are not supported, but I am not aware of any released hardware which implements them. Code does not use queued invalidation, see comments for the reason, and does not provide interrupt remapping services. Code implements the management of the guest address space per domain and allows to establish and tear down arbitrary mappings, but not partial unmapping. The superpages are created as needed, but not promoted. Faults are recorded, fault records could be obtained programmatically, and printed on the console. Implement the busdma(9) using DMARs. This busdma backend avoids bouncing and provides security against misbehaving hardware and driver bad programming, preventing leaks and corruption of the memory by wild DMA accesses. By default, the implementation is compiled into amd64 GENERIC kernel but disabled; to enable, set hw.dmar.enable=1 loader tunable. Code is written to work on i386, but testing there was low priority, and driver is not enabled in GENERIC. Even with the DMAR turned on, individual devices could be directed to use the bounce busdma with the hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable. If DMARs are capable of the pass-through translations, it is used, otherwise, an identity-mapping page table is constructed. The driver was tested on Xeon 5400/5500 chipset legacy machine, Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4), ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices. It also works with em(4) and igb(4), but there some fixes are needed for drivers, which are not committed yet. Intel GPUs do not work with DMAR (yet). Many thanks to John Baldwin, who explained me the newbus integration; Peter Holm, who did all testing and helped me to discover and understand several incredible bugs; and to Jim Harris for the access to the EDS and BWG and for listening when I have to explain my findings to somebody. Sponsored by: The FreeBSD Foundation MFC after: 1 month
2013-10-28 13:33:29 +00:00
#ifdef ACPI_DMAR
bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
static bus_dma_tag_t
acpi_pci_get_dma_tag(device_t bus, device_t child)
{
bus_dma_tag_t tag;
if (device_get_parent(child) == bus) {
/* try dmar and return if it works */
tag = dmar_get_dma_tag(bus, child);
} else
tag = NULL;
if (tag == NULL)
tag = pci_get_dma_tag(bus, child);
return (tag);
}
#else
static bus_dma_tag_t
acpi_pci_get_dma_tag(device_t bus, device_t child)
{
return (pci_get_dma_tag(bus, child));
}
#endif