bus/pci: consider only usable devices for IOVA mode

When selecting the preferred IOVA mode of the pci bus, the current
heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd
drivers supporting IOVA as VA?" etc..) should honor the device
white/blacklist so that an unwanted device does not impact the decision.

There is no reason to consider a device which has no driver available.

This applies to all OS, so implements this in common code then call a
OS specific callback.

On Linux side:
- the VFIO special considerations should be evaluated only if VFIO
  support is built,
- there is no strong requirement on using VA rather than PA if a driver
  supports VA, so defaulting to DC in such a case.

Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is contained in:
Ben Walker 2019-06-14 11:39:17 +02:00 committed by Thomas Monjalon
parent c2361bab70
commit 703458e19c
4 changed files with 136 additions and 147 deletions

View File

@ -376,13 +376,14 @@ rte_pci_scan(void)
return -1;
}
/*
* Get iommu class of PCI devices on the bus.
*/
enum rte_iova_mode
rte_pci_get_iommu_class(void)
pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused,
const struct rte_pci_device *pdev)
{
/* Supports only RTE_KDRV_NIC_UIO */
if (pdev->kdrv != RTE_KDRV_NIC_UIO)
RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
return RTE_IOVA_PA;
}

View File

@ -500,95 +500,14 @@ rte_pci_scan(void)
return -1;
}
/*
* Is pci device bound to any kdrv
*/
static inline int
pci_one_device_is_bound(void)
{
struct rte_pci_device *dev = NULL;
int ret = 0;
FOREACH_DEVICE_ON_PCIBUS(dev) {
if (dev->kdrv == RTE_KDRV_UNKNOWN ||
dev->kdrv == RTE_KDRV_NONE) {
continue;
} else {
ret = 1;
break;
}
}
return ret;
}
/*
* Any one of the device bound to uio
*/
static inline int
pci_one_device_bound_uio(void)
{
struct rte_pci_device *dev = NULL;
struct rte_devargs *devargs;
int need_check;
FOREACH_DEVICE_ON_PCIBUS(dev) {
devargs = dev->device.devargs;
need_check = 0;
switch (rte_pci_bus.bus.conf.scan_mode) {
case RTE_BUS_SCAN_WHITELIST:
if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
need_check = 1;
break;
case RTE_BUS_SCAN_UNDEFINED:
case RTE_BUS_SCAN_BLACKLIST:
if (devargs == NULL ||
devargs->policy != RTE_DEV_BLACKLISTED)
need_check = 1;
break;
}
if (!need_check)
continue;
if (dev->kdrv == RTE_KDRV_IGB_UIO ||
dev->kdrv == RTE_KDRV_UIO_GENERIC) {
return 1;
}
}
return 0;
}
/*
* Any one of the device has iova as va
*/
static inline int
pci_one_device_has_iova_va(void)
{
struct rte_pci_device *dev = NULL;
struct rte_pci_driver *drv = NULL;
FOREACH_DRIVER_ON_PCIBUS(drv) {
if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
FOREACH_DEVICE_ON_PCIBUS(dev) {
if ((dev->kdrv == RTE_KDRV_VFIO ||
dev->kdrv == RTE_KDRV_NIC_MLX) &&
rte_pci_match(drv, dev))
return 1;
}
}
}
return 0;
}
#if defined(RTE_ARCH_X86)
static bool
pci_one_device_iommu_support_va(struct rte_pci_device *dev)
pci_one_device_iommu_support_va(const struct rte_pci_device *dev)
{
#define VTD_CAP_MGAW_SHIFT 16
#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
struct rte_pci_addr *addr = &dev->addr;
const struct rte_pci_addr *addr = &dev->addr;
char filename[PATH_MAX];
FILE *fp;
uint64_t mgaw, vtd_cap_reg = 0;
@ -632,80 +551,76 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev)
}
#elif defined(RTE_ARCH_PPC_64)
static bool
pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
{
return false;
}
#else
static bool
pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
{
return true;
}
#endif
/*
* All devices IOMMUs support VA as IOVA
*/
static bool
pci_devices_iommu_support_va(void)
enum rte_iova_mode
pci_device_iova_mode(const struct rte_pci_driver *pdrv,
const struct rte_pci_device *pdev)
{
struct rte_pci_device *dev = NULL;
struct rte_pci_driver *drv = NULL;
enum rte_iova_mode iova_mode = RTE_IOVA_DC;
static int iommu_no_va = -1;
FOREACH_DRIVER_ON_PCIBUS(drv) {
FOREACH_DEVICE_ON_PCIBUS(dev) {
if (!rte_pci_match(drv, dev))
continue;
/*
* just one PCI device needs to be checked out because
* the IOMMU hardware is the same for all of them.
*/
return pci_one_device_iommu_support_va(dev);
switch (pdev->kdrv) {
case RTE_KDRV_VFIO: {
#ifdef VFIO_PRESENT
static int is_vfio_noiommu_enabled = -1;
if (is_vfio_noiommu_enabled == -1) {
if (rte_vfio_noiommu_is_enabled() == 1)
is_vfio_noiommu_enabled = 1;
else
is_vfio_noiommu_enabled = 0;
}
if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
iova_mode = RTE_IOVA_PA;
} else if (is_vfio_noiommu_enabled != 0) {
RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n");
iova_mode = RTE_IOVA_PA;
}
#endif
break;
}
case RTE_KDRV_NIC_MLX:
if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0)
iova_mode = RTE_IOVA_PA;
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
iova_mode = RTE_IOVA_PA;
break;
default:
RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
iova_mode = RTE_IOVA_PA;
break;
}
if (iova_mode != RTE_IOVA_PA) {
/*
* We can check this only once, because the IOMMU hardware is
* the same for all of them.
*/
if (iommu_no_va == -1)
iommu_no_va = pci_one_device_iommu_support_va(pdev)
? 0 : 1;
if (iommu_no_va != 0) {
RTE_LOG(DEBUG, EAL, "Forcing to 'PA', IOMMU does not support IOVA as 'VA'\n");
iova_mode = RTE_IOVA_PA;
}
}
return true;
}
/*
* Get iommu class of PCI devices on the bus.
*/
enum rte_iova_mode
rte_pci_get_iommu_class(void)
{
bool is_bound;
bool is_vfio_noiommu_enabled = true;
bool has_iova_va;
bool is_bound_uio;
bool iommu_no_va;
is_bound = pci_one_device_is_bound();
if (!is_bound)
return RTE_IOVA_DC;
has_iova_va = pci_one_device_has_iova_va();
is_bound_uio = pci_one_device_bound_uio();
iommu_no_va = !pci_devices_iommu_support_va();
#ifdef VFIO_PRESENT
is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
true : false;
#endif
if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
!iommu_no_va)
return RTE_IOVA_VA;
if (has_iova_va) {
RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. ");
if (is_vfio_noiommu_enabled)
RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
if (is_bound_uio)
RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
if (iommu_no_va)
RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n");
}
return RTE_IOVA_PA;
return iova_mode;
}
/* Read PCI config space. */

View File

@ -574,6 +574,71 @@ pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
return -1;
}
static bool
pci_ignore_device(const struct rte_pci_device *dev)
{
struct rte_devargs *devargs = dev->device.devargs;
switch (rte_pci_bus.bus.conf.scan_mode) {
case RTE_BUS_SCAN_WHITELIST:
if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
return false;
break;
case RTE_BUS_SCAN_UNDEFINED:
case RTE_BUS_SCAN_BLACKLIST:
if (devargs == NULL ||
devargs->policy != RTE_DEV_BLACKLISTED)
return false;
break;
}
return true;
}
enum rte_iova_mode
rte_pci_get_iommu_class(void)
{
enum rte_iova_mode iova_mode = RTE_IOVA_DC;
const struct rte_pci_device *dev;
const struct rte_pci_driver *drv;
bool devices_want_va = false;
bool devices_want_pa = false;
FOREACH_DEVICE_ON_PCIBUS(dev) {
if (pci_ignore_device(dev))
continue;
if (dev->kdrv == RTE_KDRV_UNKNOWN ||
dev->kdrv == RTE_KDRV_NONE)
continue;
FOREACH_DRIVER_ON_PCIBUS(drv) {
enum rte_iova_mode dev_iova_mode;
if (!rte_pci_match(drv, dev))
continue;
dev_iova_mode = pci_device_iova_mode(drv, dev);
RTE_LOG(DEBUG, EAL, "PCI driver %s for device "
PCI_PRI_FMT " wants IOVA as '%s'\n",
drv->driver.name,
dev->addr.domain, dev->addr.bus,
dev->addr.devid, dev->addr.function,
dev_iova_mode == RTE_IOVA_DC ? "DC" :
(dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
if (dev_iova_mode == RTE_IOVA_PA)
devices_want_pa = true;
else if (dev_iova_mode == RTE_IOVA_VA)
devices_want_va = true;
}
}
if (devices_want_pa) {
iova_mode = RTE_IOVA_PA;
if (devices_want_va)
RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'PA' because other devices want it\n");
} else if (devices_want_va) {
iova_mode = RTE_IOVA_VA;
}
return iova_mode;
}
struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,

View File

@ -172,6 +172,14 @@ int
rte_pci_match(const struct rte_pci_driver *pci_drv,
const struct rte_pci_device *pci_dev);
/**
* OS specific callback for rte_pci_get_iommu_class
*
*/
enum rte_iova_mode
pci_device_iova_mode(const struct rte_pci_driver *pci_drv,
const struct rte_pci_device *pci_dev);
/**
* Get iommu class of PCI devices on the bus.
* And return their preferred iova mapping mode.