env/dpdk: Use the DPDK device count for IOMMU mapping

The VFIO container must have at least one device present to
be able to perform DMA mapping operations. Instead of using
the count of SPDK devices, use the count of DPDK devices. This
allows for the user of SPDK's memory management APIs even if
only a DPDK device is attached.

Change-Id: Ie7e21f09bdf1cdf1a85424c35212f64f24ae4e26
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: Maciej Szwed <maciej.szwed@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3874
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Ben Walker 2020-08-20 12:25:27 -07:00 committed by Tomasz Zawadzki
parent 22e240eb87
commit 8f7d9ec2f4
4 changed files with 141 additions and 66 deletions

View File

@ -80,6 +80,7 @@ void pci_env_reinit(void);
void pci_env_fini(void); void pci_env_fini(void);
int mem_map_init(bool legacy_mem); int mem_map_init(bool legacy_mem);
int vtophys_init(void); int vtophys_init(void);
void vtophys_fini(void);
/** /**
* Report a DMA-capable PCI device to the vtophys translation code. * Report a DMA-capable PCI device to the vtophys translation code.

View File

@ -511,6 +511,8 @@ spdk_env_dpdk_post_init(bool legacy_mem)
void void
spdk_env_dpdk_post_fini(void) spdk_env_dpdk_post_fini(void)
{ {
vtophys_fini();
pci_env_fini(); pci_env_fini();
free_args(g_eal_cmdline, g_eal_cmdline_argcount); free_args(g_eal_cmdline, g_eal_cmdline_argcount);

View File

@ -35,6 +35,7 @@
#include "env_internal.h" #include "env_internal.h"
#include <rte_dev.h>
#include <rte_config.h> #include <rte_config.h>
#include <rte_memory.h> #include <rte_memory.h>
#include <rte_eal_memconfig.h> #include <rte_eal_memconfig.h>
@ -1221,6 +1222,87 @@ vfio_noiommu_enabled(void)
return rte_vfio_noiommu_is_enabled(); return rte_vfio_noiommu_is_enabled();
} }
static void
vtophys_iommu_device_event(const char *device_name,
enum rte_dev_event_type event,
void *cb_arg)
{
struct rte_dev_iterator dev_iter;
struct rte_device *dev;
pthread_mutex_lock(&g_vfio.mutex);
switch (event) {
default:
case RTE_DEV_EVENT_ADD:
RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) {
if (strcmp(dev->name, device_name) == 0) {
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) {
/* This is a new PCI device using vfio */
g_vfio.device_ref++;
}
break;
}
}
if (g_vfio.device_ref == 1) {
struct spdk_vfio_dma_map *dma_map;
int ret;
/* This is the first device registered. This means that the first
* IOMMU group might have been just been added to the DPDK vfio container.
* From this point it is certain that the memory can be mapped now.
*/
TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map);
if (ret) {
DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno);
break;
}
}
}
break;
case RTE_DEV_EVENT_REMOVE:
RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) {
if (strcmp(dev->name, device_name) == 0) {
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) {
/* This is a PCI device using vfio */
g_vfio.device_ref--;
}
break;
}
}
if (g_vfio.device_ref == 0) {
struct spdk_vfio_dma_map *dma_map;
int ret;
/* If DPDK doesn't have any additional devices using it's vfio container,
* all the mappings will be automatically removed by the Linux vfio driver.
* We unmap the memory manually to be able to easily re-map it later regardless
* of other, external factors.
*/
TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
struct vfio_iommu_type1_dma_unmap unmap = {};
unmap.argsz = sizeof(unmap);
unmap.flags = 0;
unmap.iova = dma_map->map.iova;
unmap.size = dma_map->map.size;
ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap);
if (ret) {
DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno);
break;
}
}
}
break;
}
pthread_mutex_unlock(&g_vfio.mutex);
}
static void static void
vtophys_iommu_init(void) vtophys_iommu_init(void)
{ {
@ -1229,6 +1311,9 @@ vtophys_iommu_init(void)
const char vfio_path[] = "/dev/vfio/vfio"; const char vfio_path[] = "/dev/vfio/vfio";
DIR *dir; DIR *dir;
struct dirent *d; struct dirent *d;
struct rte_dev_iterator dev_iter;
struct rte_device *dev;
int rc;
if (!vfio_enabled()) { if (!vfio_enabled()) {
return; return;
@ -1269,10 +1354,47 @@ vtophys_iommu_init(void)
return; return;
} }
/* If the IOMMU is enabled, we need to track whether there are any devices present because
* it's only valid to perform vfio IOCTLs to the containers when there is at least
* one device. The device may be a DPDK device that SPDK doesn't otherwise know about, but
* that's ok.
*/
RTE_DEV_FOREACH(dev, "bus=pci", &dev_iter) {
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) {
/* This is a PCI device using vfio */
g_vfio.device_ref++;
}
}
if (spdk_process_is_primary()) {
rc = rte_dev_event_callback_register(NULL, vtophys_iommu_device_event, NULL);
if (rc) {
DEBUG_PRINT("Failed to register device event callback\n");
return;
}
rc = rte_dev_event_monitor_start();
if (rc) {
DEBUG_PRINT("Failed to start device event monitoring.\n");
return;
}
}
g_vfio.enabled = true; g_vfio.enabled = true;
return; return;
} }
static void
vtophys_iommu_fini(void)
{
if (spdk_process_is_primary()) {
rte_dev_event_callback_unregister(NULL, vtophys_iommu_device_event, NULL);
rte_dev_event_monitor_stop();
}
}
#endif #endif
void void
@ -1290,35 +1412,6 @@ vtophys_pci_device_added(struct rte_pci_device *pci_device)
DEBUG_PRINT("Memory allocation error\n"); DEBUG_PRINT("Memory allocation error\n");
} }
pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
#if VFIO_ENABLED
struct spdk_vfio_dma_map *dma_map;
int ret;
if (!g_vfio.enabled) {
return;
}
pthread_mutex_lock(&g_vfio.mutex);
g_vfio.device_ref++;
if (g_vfio.device_ref > 1) {
pthread_mutex_unlock(&g_vfio.mutex);
return;
}
/* This is the first SPDK device using DPDK vfio. This means that the first
* IOMMU group might have been just been added to the DPDK vfio container.
* From this point it is certain that the memory can be mapped now.
*/
TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map);
if (ret) {
DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno);
break;
}
}
pthread_mutex_unlock(&g_vfio.mutex);
#endif
} }
void void
@ -1335,43 +1428,6 @@ vtophys_pci_device_removed(struct rte_pci_device *pci_device)
} }
} }
pthread_mutex_unlock(&g_vtophys_pci_devices_mutex); pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
#if VFIO_ENABLED
struct spdk_vfio_dma_map *dma_map;
int ret;
if (!g_vfio.enabled) {
return;
}
pthread_mutex_lock(&g_vfio.mutex);
assert(g_vfio.device_ref > 0);
g_vfio.device_ref--;
if (g_vfio.device_ref > 0) {
pthread_mutex_unlock(&g_vfio.mutex);
return;
}
/* This is the last SPDK device using DPDK vfio. If DPDK doesn't have
* any additional devices using it's vfio container, all the mappings
* will be automatically removed by the Linux vfio driver. We unmap
* the memory manually to be able to easily re-map it later regardless
* of other, external factors.
*/
TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
struct vfio_iommu_type1_dma_unmap unmap = {};
unmap.argsz = sizeof(unmap);
unmap.flags = 0;
unmap.iova = dma_map->map.iova;
unmap.size = dma_map->map.size;
ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &unmap);
if (ret) {
DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno);
break;
}
}
pthread_mutex_unlock(&g_vfio.mutex);
#endif
} }
int int
@ -1405,6 +1461,14 @@ vtophys_init(void)
return 0; return 0;
} }
void
vtophys_fini(void)
{
#if VFIO_ENABLED
vtophys_iommu_fini();
#endif
}
uint64_t uint64_t
spdk_vtophys(const void *buf, uint64_t *size) spdk_vtophys(const void *buf, uint64_t *size)
{ {

View File

@ -57,6 +57,14 @@ DEFINE_STUB(rte_vfio_noiommu_is_enabled, int, (void), 0);
DEFINE_STUB(rte_memseg_get_fd_thread_unsafe, int, (const struct rte_memseg *ms), 0); DEFINE_STUB(rte_memseg_get_fd_thread_unsafe, int, (const struct rte_memseg *ms), 0);
DEFINE_STUB(rte_memseg_get_fd_offset_thread_unsafe, int, DEFINE_STUB(rte_memseg_get_fd_offset_thread_unsafe, int,
(const struct rte_memseg *ms, size_t *offset), 0); (const struct rte_memseg *ms, size_t *offset), 0);
DEFINE_STUB(rte_dev_iterator_init, int, (struct rte_dev_iterator *it, const char *dev_str), 0);
DEFINE_STUB(rte_dev_iterator_next, struct rte_device *, (struct rte_dev_iterator *it), NULL);
DEFINE_STUB(rte_dev_event_callback_register, int, (const char *device_name,
rte_dev_event_cb_fn cb_fn, void *cb_arg), 0);
DEFINE_STUB(rte_dev_event_callback_unregister, int, (const char *device_name,
rte_dev_event_cb_fn cb_fn, void *cb_arg), 0);
DEFINE_STUB(rte_dev_event_monitor_start, int, (void), 0);
DEFINE_STUB(rte_dev_event_monitor_stop, int, (void), 0);
static int static int
test_mem_map_notify(void *cb_ctx, struct spdk_mem_map *map, test_mem_map_notify(void *cb_ctx, struct spdk_mem_map *map,