diff --git a/lib/env_dpdk/vtophys.c b/lib/env_dpdk/vtophys.c index 8248787714..25233c2590 100644 --- a/lib/env_dpdk/vtophys.c +++ b/lib/env_dpdk/vtophys.c @@ -45,6 +45,31 @@ #include "spdk/queue.h" #include "spdk/util.h" +#ifdef __FreeBSD__ +#define SPDK_VFIO_ENABLED 0 +#else +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#define SPDK_VFIO_ENABLED 1 +#include + +/* Internal DPDK function forward declaration */ +int pci_vfio_is_enabled(void); + +struct vfio_cfg { + int fd; + bool enabled; +}; + +static struct vfio_cfg g_vfio = { + .fd = -1, + .enabled = false +}; +#else +#define SPDK_VFIO_ENABLED 0 +#endif +#endif + #if DEBUG #define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) #else @@ -53,7 +78,49 @@ static struct spdk_mem_map *g_vtophys_map; -/* Try to get the paddr from the DPDK memsegs */ +#if SPDK_VFIO_ENABLED +static int +vtophys_iommu_map_dma(uint64_t vaddr, uint64_t iova, uint64_t size) +{ + struct vfio_iommu_type1_dma_map dma_map; + int ret; + + dma_map.argsz = sizeof(dma_map); + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + dma_map.vaddr = vaddr; + dma_map.iova = iova; + dma_map.size = size; + + ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + DEBUG_PRINT("Cannot set up DMA mapping, error %d\n", errno); + } + + return ret; +} + +static int +vtophys_iommu_unmap_dma(uint64_t iova, uint64_t size) +{ + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.flags = 0; + dma_unmap.iova = iova; + dma_unmap.size = size; + + ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); + + if (ret) { + DEBUG_PRINT("Cannot clear DMA mapping, error %d\n", errno); + } + + return ret; +} +#endif + static uint64_t vtophys_get_paddr_memseg(uint64_t vaddr) { @@ -97,11 +164,12 @@ vtophys_get_paddr_pagemap(uint64_t vaddr) rte_atomic64_read((rte_atomic64_t *)vaddr); paddr = rte_mem_virt2phy((void *)vaddr); } - if (paddr != RTE_BAD_PHYS_ADDR) { - return paddr; + if (paddr == RTE_BAD_PHYS_ADDR) { + /* Unable to get to the physical address. */ + return SPDK_VTOPHYS_ERROR; } - return SPDK_VTOPHYS_ERROR; + return paddr; } static int @@ -130,10 +198,29 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map, switch (action) { case SPDK_MEM_MAP_NOTIFY_REGISTER: if (paddr == SPDK_VTOPHYS_ERROR) { - paddr = vtophys_get_paddr_pagemap((uint64_t)vaddr); - if (paddr == SPDK_VTOPHYS_ERROR) { - DEBUG_PRINT("could not get phys addr for %p\n", vaddr); - return -EFAULT; + /* This is not an address that DPDK is managing. */ +#if SPDK_VFIO_ENABLED + if (g_vfio.enabled) { + /* We'll use the virtual address as the iova. DPDK + * currently uses physical addresses as the iovas (or counts + * up from 0 if it can't get physical addresses), so + * the range of user space virtual addresses and physical + * addresses will never overlap. + */ + paddr = (uint64_t)vaddr; + rc = vtophys_iommu_map_dma((uint64_t)vaddr, paddr, VALUE_2MB); + if (rc) { + return -EFAULT; + } + } else +#endif + { + /* Get the physical address from /proc/self/pagemap. */ + paddr = vtophys_get_paddr_pagemap((uint64_t)vaddr); + if (paddr == SPDK_VTOPHYS_ERROR) { + DEBUG_PRINT("could not get phys addr for %p\n", vaddr); + return -EFAULT; + } } } @@ -145,6 +232,21 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map, rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, VALUE_2MB, paddr); break; case SPDK_MEM_MAP_NOTIFY_UNREGISTER: +#if SPDK_VFIO_ENABLED + if (paddr == SPDK_VTOPHYS_ERROR) { + /* + * This is not an address that DPDK is managing. If vfio is enabled, + * we need to unmap the range from the IOMMU + */ + if (g_vfio.enabled) { + paddr = spdk_mem_map_translate(map, (uint64_t)vaddr); + rc = vtophys_iommu_unmap_dma(paddr, VALUE_2MB); + if (rc) { + return -EFAULT; + } + } + } +#endif rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, VALUE_2MB); break; default: @@ -161,9 +263,61 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map, return rc; } +#if SPDK_VFIO_ENABLED +static void +spdk_vtophys_iommu_init(void) +{ + char proc_fd_path[PATH_MAX + 1]; + char link_path[PATH_MAX + 1]; + const char vfio_path[] = "/dev/vfio/vfio"; + DIR *dir; + struct dirent *d; + + if (!pci_vfio_is_enabled()) { + return; + } + + dir = opendir("/proc/self/fd"); + if (!dir) { + DEBUG_PRINT("Failed to open /proc/self/fd (%d)\n", errno); + return; + } + + while ((d = readdir(dir)) != NULL) { + if (d->d_type != DT_LNK) + continue; + + snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%s", d->d_name); + if (readlink(proc_fd_path, link_path, sizeof(link_path)) != (sizeof(vfio_path) - 1)) { + continue; + } + + if (memcmp(link_path, vfio_path, sizeof(vfio_path) - 1) == 0) { + sscanf(d->d_name, "%d", &g_vfio.fd); + break; + } + } + + closedir(dir); + + if (g_vfio.fd < 0) { + DEBUG_PRINT("Failed to discover DPDK VFIO container fd.\n"); + return; + } + + g_vfio.enabled = true; + + return; +} +#endif + void spdk_vtophys_init(void) { +#if SPDK_VFIO_ENABLED + spdk_vtophys_iommu_init(); +#endif + g_vtophys_map = spdk_mem_map_alloc(SPDK_VTOPHYS_ERROR, spdk_vtophys_notify, NULL); if (g_vtophys_map == NULL) { DEBUG_PRINT("vtophys map allocation failed\n"); diff --git a/lib/vhost/Makefile b/lib/vhost/Makefile index 5cc76ff763..09581b4185 100644 --- a/lib/vhost/Makefile +++ b/lib/vhost/Makefile @@ -38,7 +38,7 @@ CFLAGS += -I. CFLAGS += -Irte_vhost CFLAGS += $(ENV_CFLAGS) -C_SRCS = vhost.c vhost_rpc.c vhost_iommu.c vhost_scsi.c vhost_blk.c +C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c LIBNAME = vhost diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index cbe8f4bffa..a4beeecfc2 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -41,7 +41,6 @@ #include "spdk/vhost.h" #include "vhost_internal.h" -#include "vhost_iommu.h" static uint32_t g_num_ctrlrs[RTE_MAX_LCORE]; @@ -234,10 +233,6 @@ spdk_vhost_dev_mem_register(struct spdk_vhost_dev *vdev) i); continue; } - - if (spdk_iommu_mem_register(region->host_user_addr, region->size)) { - abort(); - } } } @@ -258,10 +253,6 @@ spdk_vhost_dev_mem_unregister(struct spdk_vhost_dev *vdev) continue; /* region has not been registered */ } - if (spdk_iommu_mem_unregister(region->host_user_addr, region->size)) { - abort(); - } - if (spdk_mem_unregister((void *)start, len) != 0) { assert(false); } diff --git a/lib/vhost/vhost_blk.c b/lib/vhost/vhost_blk.c index 6b47a06c69..7be3a0903f 100644 --- a/lib/vhost/vhost_blk.c +++ b/lib/vhost/vhost_blk.c @@ -43,7 +43,6 @@ #include "spdk/vhost.h" #include "vhost_internal.h" -#include "vhost_iommu.h" struct spdk_vhost_blk_task { struct spdk_bdev_io *bdev_io; diff --git a/lib/vhost/vhost_iommu.c b/lib/vhost/vhost_iommu.c deleted file mode 100644 index 74b83a91bf..0000000000 --- a/lib/vhost/vhost_iommu.c +++ /dev/null @@ -1,344 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "spdk/stdinc.h" -#include "spdk/string.h" - -#include "vhost_iommu.h" - -#include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) - -#include - -#include "spdk/env.h" -#include "spdk/util.h" - -#include "spdk_internal/log.h" - -struct vfio_map { - uint64_t iova; - uint64_t size; - size_t ref; -}; - -static struct { - int need_init; - int container_fd; - - - pthread_mutex_t map_lock; - struct vfio_map *maps; - size_t maps_count; - size_t maps_max_count; -} vfio_cfg = { 1, -1, PTHREAD_MUTEX_INITIALIZER }; - -/* Internal DPDK function forward declaration */ -int pci_vfio_is_enabled(void); - -/* Discover DPDK vfio container fd. This is to be removed if DPDK API - * provides interface for memory registration in VFIO container. - * - * Return -1 on error, 0 on success (VFIO is used or not) - */ -static int -vfio_cfg_init(void) -{ - char proc_fd_path[PATH_MAX + 1]; - char link_path[PATH_MAX + 1]; - const char vfio_path[] = "/dev/vfio/vfio"; - const int vfio_path_len = sizeof(vfio_path) - 1; - DIR *dir; - struct dirent *d; - - if (!vfio_cfg.need_init) { - return 0; - } - - vfio_cfg.need_init = 0; - if (!pci_vfio_is_enabled()) { - return 0; - } - - dir = opendir("/proc/self/fd"); - if (!dir) { - SPDK_ERRLOG("Failed to open /proc/self/fd (%d)\n", errno); - return -1; - } - - while ((d = readdir(dir)) != NULL) { - if (d->d_type != DT_LNK) - continue; - - snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%s", d->d_name); - if (readlink(proc_fd_path, link_path, sizeof(link_path)) != vfio_path_len) - continue; - - if (memcmp(link_path, vfio_path, vfio_path_len) == 0) { - sscanf(d->d_name, "%d", &vfio_cfg.container_fd); - break; - } - } - - closedir(dir); - - if (vfio_cfg.container_fd < 0) { - SPDK_ERRLOG("Failed to discover DPDK VFIO container fd.\n"); - return -1; - } - - return 0; -} - -static int -vfio_pci_memory_region_map(int vfio_container_fd, uint64_t vaddr, uint64_t phys_addr, uint64_t size) -{ - struct vfio_iommu_type1_dma_map dma_map; - int ret; - char buf[64]; - - dma_map.argsz = sizeof(dma_map); - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; - dma_map.vaddr = vaddr; - dma_map.iova = phys_addr; - dma_map.size = size; - - SPDK_DEBUGLOG(SPDK_TRACE_VHOST_VFIO, "MAP vaddr:%p phys:%p len:%#"PRIx64"\n", (void *)vaddr, - (void *)phys_addr, size); - ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - - if (ret) { - spdk_strerror_r(errno, buf, sizeof(buf)); - SPDK_ERRLOG("Cannot set up DMA mapping, error %d (%s)\n", errno, buf); - } - - return ret; -} - -static int -vfio_pci_memory_region_unmap(int vfio_container_fd, uint64_t phys_addr, uint64_t size) -{ - struct vfio_iommu_type1_dma_unmap dma_unmap; - int ret; - char buf[64]; - - dma_unmap.argsz = sizeof(dma_unmap); - dma_unmap.flags = 0; - dma_unmap.iova = phys_addr; - dma_unmap.size = size; - - SPDK_DEBUGLOG(SPDK_TRACE_VHOST_VFIO, "UNMAP phys:%p len:%#"PRIx64"\n", (void *)phys_addr, size); - ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); - - if (ret) { - spdk_strerror_r(errno, buf, sizeof(buf)); - SPDK_ERRLOG("Cannot clear DMA mapping, error %d (%s)\n", errno, buf); - } - - return ret; -} - -static int -vfio_pci_memory_region_op(uint64_t vaddr, uint64_t phys_addr, uint64_t size, int op) -{ - int ret = 0; - size_t idx; - struct vfio_map *map = vfio_cfg.maps; - bool found = false; - - if (vfio_cfg.container_fd == -1) { - return 0; - } - - for (idx = 0; idx < vfio_cfg.maps_count; idx++, map++) { - assert(map->ref); - if (map->iova == phys_addr && map->size == size) { - found = true; - break; - } - } - - if (op == VFIO_IOMMU_MAP_DMA) { - if (found) { - map->ref++; - return 0; - } - - ret = vfio_pci_memory_region_map(vfio_cfg.container_fd, vaddr, phys_addr, size); - if (ret) { - return ret; - } - - if (vfio_cfg.maps_count == vfio_cfg.maps_max_count) { - struct vfio_map *new_maps; - size_t new_maps_max_count; - - new_maps_max_count = vfio_cfg.maps_max_count + 128; - new_maps = realloc(vfio_cfg.maps, new_maps_max_count * sizeof(vfio_cfg.maps[0])); - if (new_maps == NULL) { - return -ENOMEM; - } - - vfio_cfg.maps_max_count = new_maps_max_count; - vfio_cfg.maps = new_maps; - map = &vfio_cfg.maps[idx]; - } - - vfio_cfg.maps_count++; - map->iova = phys_addr; - map->size = size; - map->ref = 1; - } else { - if (!found) { - SPDK_ERRLOG("Region vaddr=%p phys_addr=%p len=%#"PRIx64" not VFIO DMA mapped\n", - (void *)vaddr, (void *)phys_addr, size); - return -1; - } - - map->ref--; - if (!map->ref) { - vfio_cfg.maps_count--; - if (vfio_cfg.maps_count != idx) { - memmove(map, map + 1, (vfio_cfg.maps_count - idx) * sizeof(map[0])); - } - - if (vfio_cfg.maps_count == 0) { - free(vfio_cfg.maps); - vfio_cfg.maps = NULL; - vfio_cfg.maps_count = 0; - vfio_cfg.maps_max_count = 0; - } - - ret = vfio_pci_memory_region_unmap(vfio_cfg.container_fd, phys_addr, size); - } - } - - return ret; -} - - -#define SHIFT_2MB 21 /* (1 << 21) == 2MB */ -#define MASK_2MB ((1ULL << SHIFT_2MB) - 1) - -static int -spdk_vfio_mem_op(uint64_t addr, uint64_t len, int dma_op) -{ - const uint64_t len_2mb = 1 << SHIFT_2MB; - uint64_t vaddr, vend, phaddr, phend, vlen; - int ret = 0; - - if (vfio_cfg_init() != 0) { - return -1; - } - - if (vfio_cfg.container_fd == -1) { - return 0; - } - - vaddr = addr; - while (len > 0) { - vlen = spdk_min(len_2mb - (vaddr & MASK_2MB), len); - vend = vaddr + vlen; - - phaddr = spdk_vtophys((void *)vaddr); - phend = spdk_vtophys((void *)(vend - 1)); - - if (phaddr == SPDK_VTOPHYS_ERROR || phend == SPDK_VTOPHYS_ERROR || - phend - phaddr > vlen - 1) { - SPDK_ERRLOG("Invalid memory region addr: %p len:%"PRIu64" " - "spdk_vtophys(%p) = %p spdk_vtophys(%p) = %p\n", - (void *)addr, len, (void *)vaddr, (void *)phaddr, - (void *)vend, (void *)phend); - ret = -1; - break; - } - - ret = vfio_pci_memory_region_op(vaddr, phaddr, vlen, dma_op); - if (ret) { - SPDK_ERRLOG("Failed to %s region region vaddr=%p phys_addr=%p len=%#"PRIx64"\n", - (dma_op == VFIO_IOMMU_MAP_DMA ? "map" : "unmap"), (void *)vaddr, - (void *)phaddr, vlen); - break; - } - - vaddr += vlen; - len -= vlen; - - assert(len == 0 || (vaddr & MASK_2MB) == 0); - } - - if (ret) { - spdk_vfio_mem_op(addr, vaddr - addr, VFIO_IOMMU_UNMAP_DMA); - } - - return ret; -} - -int spdk_iommu_mem_register(uint64_t addr, uint64_t len) -{ - int ret; - - pthread_mutex_lock(&vfio_cfg.map_lock); - ret = spdk_vfio_mem_op(addr, len, VFIO_IOMMU_MAP_DMA); - pthread_mutex_unlock(&vfio_cfg.map_lock); - return ret; -} - -int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len) -{ - int ret; - - pthread_mutex_lock(&vfio_cfg.map_lock); - ret = spdk_vfio_mem_op(addr, len, VFIO_IOMMU_UNMAP_DMA); - pthread_mutex_unlock(&vfio_cfg.map_lock); - return ret; -} - -SPDK_LOG_REGISTER_TRACE_FLAG("vhost_vfio", SPDK_TRACE_VHOST_VFIO) - -#else - -/* linux/vfio.h not available */ - -int spdk_iommu_mem_register(uint64_t addr, uint64_t len) -{ - return 0; -} - -int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len) -{ - return 0; -} - -#endif diff --git a/lib/vhost/vhost_iommu.h b/lib/vhost/vhost_iommu.h deleted file mode 100644 index 713c072b35..0000000000 --- a/lib/vhost/vhost_iommu.h +++ /dev/null @@ -1,61 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright (c) Intel Corporation. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SPDK_VHOST_IOMMU_H -#define SPDK_VHOST_IOMMU_H - -#include "spdk/stdinc.h" - -/** - * Register given memory block in currently used IOMMU. If no IOMMU is used this - * function do nothing but still should be called. - * - * \param addr Start of memory block - * \param len Length of memory block. - * \return 0 on success, -1 on error. - */ -int spdk_iommu_mem_register(uint64_t addr, uint64_t len); - -/** - * Unregister previously registered memory block in currently used IOMMU. If no - * IOMMU is used this function do nothing but still should be called. - * - * \note This functiom might fail for invalid memory block. - * - * \param addr Start of memory block - * \param len Length of memory block. - * \return 0 on success, -1 on error. - */ -int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len); - -#endif /* SPDK_VHOST_IOMMU_H */ diff --git a/test/unit/lib/vhost/vhost.c/vhost_ut.c b/test/unit/lib/vhost/vhost.c/vhost_ut.c index 15d6c7b2da..f7d212eeba 100644 --- a/test/unit/lib/vhost/vhost.c/vhost_ut.c +++ b/test/unit/lib/vhost/vhost.c/vhost_ut.c @@ -45,7 +45,6 @@ DEFINE_STUB(spdk_event_allocate, struct spdk_event *, DEFINE_STUB(spdk_mem_register, int, (void *vaddr, size_t len), 0); DEFINE_STUB(spdk_mem_unregister, int, (void *vaddr, size_t len), 0); DEFINE_STUB(spdk_vtophys, uint64_t, (void *vaddr), 1); -DEFINE_STUB(spdk_iommu_mem_register, int, (uint64_t addr, uint64_t len), 0); DEFINE_STUB(spdk_app_get_core_mask, uint64_t, (void), 0); DEFINE_STUB_V(spdk_app_stop, (int rc)); DEFINE_STUB_V(spdk_event_call, (struct spdk_event *event));