env: Automatically register new memory with the IOMMU

If the IOMMU is enabled, automatically register memory
added by the user through spdk_mem_register().

Change-Id: Ie02c7bf445314da23e2efee9de9c187ed0773a9f
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/375249
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Ben Walker 2017-08-22 14:41:37 -07:00 committed by Jim Harris
parent 01bed940fe
commit 3e084a34e1
7 changed files with 163 additions and 425 deletions

View File

@ -45,6 +45,31 @@
#include "spdk/queue.h"
#include "spdk/util.h"
#ifdef __FreeBSD__
#define SPDK_VFIO_ENABLED 0
#else
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
#define SPDK_VFIO_ENABLED 1
#include <linux/vfio.h>
/* Internal DPDK function forward declaration */
int pci_vfio_is_enabled(void);
struct vfio_cfg {
int fd;
bool enabled;
};
static struct vfio_cfg g_vfio = {
.fd = -1,
.enabled = false
};
#else
#define SPDK_VFIO_ENABLED 0
#endif
#endif
#if DEBUG
#define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
#else
@ -53,7 +78,49 @@
static struct spdk_mem_map *g_vtophys_map;
/* Try to get the paddr from the DPDK memsegs */
#if SPDK_VFIO_ENABLED
static int
vtophys_iommu_map_dma(uint64_t vaddr, uint64_t iova, uint64_t size)
{
struct vfio_iommu_type1_dma_map dma_map;
int ret;
dma_map.argsz = sizeof(dma_map);
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
dma_map.vaddr = vaddr;
dma_map.iova = iova;
dma_map.size = size;
ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map);
if (ret) {
DEBUG_PRINT("Cannot set up DMA mapping, error %d\n", errno);
}
return ret;
}
static int
vtophys_iommu_unmap_dma(uint64_t iova, uint64_t size)
{
struct vfio_iommu_type1_dma_unmap dma_unmap;
int ret;
dma_unmap.argsz = sizeof(dma_unmap);
dma_unmap.flags = 0;
dma_unmap.iova = iova;
dma_unmap.size = size;
ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
if (ret) {
DEBUG_PRINT("Cannot clear DMA mapping, error %d\n", errno);
}
return ret;
}
#endif
static uint64_t
vtophys_get_paddr_memseg(uint64_t vaddr)
{
@ -97,11 +164,12 @@ vtophys_get_paddr_pagemap(uint64_t vaddr)
rte_atomic64_read((rte_atomic64_t *)vaddr);
paddr = rte_mem_virt2phy((void *)vaddr);
}
if (paddr != RTE_BAD_PHYS_ADDR) {
return paddr;
if (paddr == RTE_BAD_PHYS_ADDR) {
/* Unable to get to the physical address. */
return SPDK_VTOPHYS_ERROR;
}
return SPDK_VTOPHYS_ERROR;
return paddr;
}
static int
@ -130,10 +198,29 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map,
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
if (paddr == SPDK_VTOPHYS_ERROR) {
paddr = vtophys_get_paddr_pagemap((uint64_t)vaddr);
if (paddr == SPDK_VTOPHYS_ERROR) {
DEBUG_PRINT("could not get phys addr for %p\n", vaddr);
return -EFAULT;
/* This is not an address that DPDK is managing. */
#if SPDK_VFIO_ENABLED
if (g_vfio.enabled) {
/* We'll use the virtual address as the iova. DPDK
* currently uses physical addresses as the iovas (or counts
* up from 0 if it can't get physical addresses), so
* the range of user space virtual addresses and physical
* addresses will never overlap.
*/
paddr = (uint64_t)vaddr;
rc = vtophys_iommu_map_dma((uint64_t)vaddr, paddr, VALUE_2MB);
if (rc) {
return -EFAULT;
}
} else
#endif
{
/* Get the physical address from /proc/self/pagemap. */
paddr = vtophys_get_paddr_pagemap((uint64_t)vaddr);
if (paddr == SPDK_VTOPHYS_ERROR) {
DEBUG_PRINT("could not get phys addr for %p\n", vaddr);
return -EFAULT;
}
}
}
@ -145,6 +232,21 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map,
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, VALUE_2MB, paddr);
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
#if SPDK_VFIO_ENABLED
if (paddr == SPDK_VTOPHYS_ERROR) {
/*
* This is not an address that DPDK is managing. If vfio is enabled,
* we need to unmap the range from the IOMMU
*/
if (g_vfio.enabled) {
paddr = spdk_mem_map_translate(map, (uint64_t)vaddr);
rc = vtophys_iommu_unmap_dma(paddr, VALUE_2MB);
if (rc) {
return -EFAULT;
}
}
}
#endif
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, VALUE_2MB);
break;
default:
@ -161,9 +263,61 @@ spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map,
return rc;
}
#if SPDK_VFIO_ENABLED
static void
spdk_vtophys_iommu_init(void)
{
char proc_fd_path[PATH_MAX + 1];
char link_path[PATH_MAX + 1];
const char vfio_path[] = "/dev/vfio/vfio";
DIR *dir;
struct dirent *d;
if (!pci_vfio_is_enabled()) {
return;
}
dir = opendir("/proc/self/fd");
if (!dir) {
DEBUG_PRINT("Failed to open /proc/self/fd (%d)\n", errno);
return;
}
while ((d = readdir(dir)) != NULL) {
if (d->d_type != DT_LNK)
continue;
snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%s", d->d_name);
if (readlink(proc_fd_path, link_path, sizeof(link_path)) != (sizeof(vfio_path) - 1)) {
continue;
}
if (memcmp(link_path, vfio_path, sizeof(vfio_path) - 1) == 0) {
sscanf(d->d_name, "%d", &g_vfio.fd);
break;
}
}
closedir(dir);
if (g_vfio.fd < 0) {
DEBUG_PRINT("Failed to discover DPDK VFIO container fd.\n");
return;
}
g_vfio.enabled = true;
return;
}
#endif
void
spdk_vtophys_init(void)
{
#if SPDK_VFIO_ENABLED
spdk_vtophys_iommu_init();
#endif
g_vtophys_map = spdk_mem_map_alloc(SPDK_VTOPHYS_ERROR, spdk_vtophys_notify, NULL);
if (g_vtophys_map == NULL) {
DEBUG_PRINT("vtophys map allocation failed\n");

View File

@ -38,7 +38,7 @@ CFLAGS += -I.
CFLAGS += -Irte_vhost
CFLAGS += $(ENV_CFLAGS)
C_SRCS = vhost.c vhost_rpc.c vhost_iommu.c vhost_scsi.c vhost_blk.c
C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c
LIBNAME = vhost

View File

@ -41,7 +41,6 @@
#include "spdk/vhost.h"
#include "vhost_internal.h"
#include "vhost_iommu.h"
static uint32_t g_num_ctrlrs[RTE_MAX_LCORE];
@ -234,10 +233,6 @@ spdk_vhost_dev_mem_register(struct spdk_vhost_dev *vdev)
i);
continue;
}
if (spdk_iommu_mem_register(region->host_user_addr, region->size)) {
abort();
}
}
}
@ -258,10 +253,6 @@ spdk_vhost_dev_mem_unregister(struct spdk_vhost_dev *vdev)
continue; /* region has not been registered */
}
if (spdk_iommu_mem_unregister(region->host_user_addr, region->size)) {
abort();
}
if (spdk_mem_unregister((void *)start, len) != 0) {
assert(false);
}

View File

@ -43,7 +43,6 @@
#include "spdk/vhost.h"
#include "vhost_internal.h"
#include "vhost_iommu.h"
struct spdk_vhost_blk_task {
struct spdk_bdev_io *bdev_io;

View File

@ -1,344 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright(c) Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/string.h"
#include "vhost_iommu.h"
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
#include <linux/vfio.h>
#include "spdk/env.h"
#include "spdk/util.h"
#include "spdk_internal/log.h"
struct vfio_map {
uint64_t iova;
uint64_t size;
size_t ref;
};
static struct {
int need_init;
int container_fd;
pthread_mutex_t map_lock;
struct vfio_map *maps;
size_t maps_count;
size_t maps_max_count;
} vfio_cfg = { 1, -1, PTHREAD_MUTEX_INITIALIZER };
/* Internal DPDK function forward declaration */
int pci_vfio_is_enabled(void);
/* Discover DPDK vfio container fd. This is to be removed if DPDK API
* provides interface for memory registration in VFIO container.
*
* Return -1 on error, 0 on success (VFIO is used or not)
*/
static int
vfio_cfg_init(void)
{
char proc_fd_path[PATH_MAX + 1];
char link_path[PATH_MAX + 1];
const char vfio_path[] = "/dev/vfio/vfio";
const int vfio_path_len = sizeof(vfio_path) - 1;
DIR *dir;
struct dirent *d;
if (!vfio_cfg.need_init) {
return 0;
}
vfio_cfg.need_init = 0;
if (!pci_vfio_is_enabled()) {
return 0;
}
dir = opendir("/proc/self/fd");
if (!dir) {
SPDK_ERRLOG("Failed to open /proc/self/fd (%d)\n", errno);
return -1;
}
while ((d = readdir(dir)) != NULL) {
if (d->d_type != DT_LNK)
continue;
snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%s", d->d_name);
if (readlink(proc_fd_path, link_path, sizeof(link_path)) != vfio_path_len)
continue;
if (memcmp(link_path, vfio_path, vfio_path_len) == 0) {
sscanf(d->d_name, "%d", &vfio_cfg.container_fd);
break;
}
}
closedir(dir);
if (vfio_cfg.container_fd < 0) {
SPDK_ERRLOG("Failed to discover DPDK VFIO container fd.\n");
return -1;
}
return 0;
}
static int
vfio_pci_memory_region_map(int vfio_container_fd, uint64_t vaddr, uint64_t phys_addr, uint64_t size)
{
struct vfio_iommu_type1_dma_map dma_map;
int ret;
char buf[64];
dma_map.argsz = sizeof(dma_map);
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
dma_map.vaddr = vaddr;
dma_map.iova = phys_addr;
dma_map.size = size;
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_VFIO, "MAP vaddr:%p phys:%p len:%#"PRIx64"\n", (void *)vaddr,
(void *)phys_addr, size);
ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
if (ret) {
spdk_strerror_r(errno, buf, sizeof(buf));
SPDK_ERRLOG("Cannot set up DMA mapping, error %d (%s)\n", errno, buf);
}
return ret;
}
static int
vfio_pci_memory_region_unmap(int vfio_container_fd, uint64_t phys_addr, uint64_t size)
{
struct vfio_iommu_type1_dma_unmap dma_unmap;
int ret;
char buf[64];
dma_unmap.argsz = sizeof(dma_unmap);
dma_unmap.flags = 0;
dma_unmap.iova = phys_addr;
dma_unmap.size = size;
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_VFIO, "UNMAP phys:%p len:%#"PRIx64"\n", (void *)phys_addr, size);
ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
if (ret) {
spdk_strerror_r(errno, buf, sizeof(buf));
SPDK_ERRLOG("Cannot clear DMA mapping, error %d (%s)\n", errno, buf);
}
return ret;
}
static int
vfio_pci_memory_region_op(uint64_t vaddr, uint64_t phys_addr, uint64_t size, int op)
{
int ret = 0;
size_t idx;
struct vfio_map *map = vfio_cfg.maps;
bool found = false;
if (vfio_cfg.container_fd == -1) {
return 0;
}
for (idx = 0; idx < vfio_cfg.maps_count; idx++, map++) {
assert(map->ref);
if (map->iova == phys_addr && map->size == size) {
found = true;
break;
}
}
if (op == VFIO_IOMMU_MAP_DMA) {
if (found) {
map->ref++;
return 0;
}
ret = vfio_pci_memory_region_map(vfio_cfg.container_fd, vaddr, phys_addr, size);
if (ret) {
return ret;
}
if (vfio_cfg.maps_count == vfio_cfg.maps_max_count) {
struct vfio_map *new_maps;
size_t new_maps_max_count;
new_maps_max_count = vfio_cfg.maps_max_count + 128;
new_maps = realloc(vfio_cfg.maps, new_maps_max_count * sizeof(vfio_cfg.maps[0]));
if (new_maps == NULL) {
return -ENOMEM;
}
vfio_cfg.maps_max_count = new_maps_max_count;
vfio_cfg.maps = new_maps;
map = &vfio_cfg.maps[idx];
}
vfio_cfg.maps_count++;
map->iova = phys_addr;
map->size = size;
map->ref = 1;
} else {
if (!found) {
SPDK_ERRLOG("Region vaddr=%p phys_addr=%p len=%#"PRIx64" not VFIO DMA mapped\n",
(void *)vaddr, (void *)phys_addr, size);
return -1;
}
map->ref--;
if (!map->ref) {
vfio_cfg.maps_count--;
if (vfio_cfg.maps_count != idx) {
memmove(map, map + 1, (vfio_cfg.maps_count - idx) * sizeof(map[0]));
}
if (vfio_cfg.maps_count == 0) {
free(vfio_cfg.maps);
vfio_cfg.maps = NULL;
vfio_cfg.maps_count = 0;
vfio_cfg.maps_max_count = 0;
}
ret = vfio_pci_memory_region_unmap(vfio_cfg.container_fd, phys_addr, size);
}
}
return ret;
}
#define SHIFT_2MB 21 /* (1 << 21) == 2MB */
#define MASK_2MB ((1ULL << SHIFT_2MB) - 1)
static int
spdk_vfio_mem_op(uint64_t addr, uint64_t len, int dma_op)
{
const uint64_t len_2mb = 1 << SHIFT_2MB;
uint64_t vaddr, vend, phaddr, phend, vlen;
int ret = 0;
if (vfio_cfg_init() != 0) {
return -1;
}
if (vfio_cfg.container_fd == -1) {
return 0;
}
vaddr = addr;
while (len > 0) {
vlen = spdk_min(len_2mb - (vaddr & MASK_2MB), len);
vend = vaddr + vlen;
phaddr = spdk_vtophys((void *)vaddr);
phend = spdk_vtophys((void *)(vend - 1));
if (phaddr == SPDK_VTOPHYS_ERROR || phend == SPDK_VTOPHYS_ERROR ||
phend - phaddr > vlen - 1) {
SPDK_ERRLOG("Invalid memory region addr: %p len:%"PRIu64" "
"spdk_vtophys(%p) = %p spdk_vtophys(%p) = %p\n",
(void *)addr, len, (void *)vaddr, (void *)phaddr,
(void *)vend, (void *)phend);
ret = -1;
break;
}
ret = vfio_pci_memory_region_op(vaddr, phaddr, vlen, dma_op);
if (ret) {
SPDK_ERRLOG("Failed to %s region region vaddr=%p phys_addr=%p len=%#"PRIx64"\n",
(dma_op == VFIO_IOMMU_MAP_DMA ? "map" : "unmap"), (void *)vaddr,
(void *)phaddr, vlen);
break;
}
vaddr += vlen;
len -= vlen;
assert(len == 0 || (vaddr & MASK_2MB) == 0);
}
if (ret) {
spdk_vfio_mem_op(addr, vaddr - addr, VFIO_IOMMU_UNMAP_DMA);
}
return ret;
}
int spdk_iommu_mem_register(uint64_t addr, uint64_t len)
{
int ret;
pthread_mutex_lock(&vfio_cfg.map_lock);
ret = spdk_vfio_mem_op(addr, len, VFIO_IOMMU_MAP_DMA);
pthread_mutex_unlock(&vfio_cfg.map_lock);
return ret;
}
int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len)
{
int ret;
pthread_mutex_lock(&vfio_cfg.map_lock);
ret = spdk_vfio_mem_op(addr, len, VFIO_IOMMU_UNMAP_DMA);
pthread_mutex_unlock(&vfio_cfg.map_lock);
return ret;
}
SPDK_LOG_REGISTER_TRACE_FLAG("vhost_vfio", SPDK_TRACE_VHOST_VFIO)
#else
/* linux/vfio.h not available */
int spdk_iommu_mem_register(uint64_t addr, uint64_t len)
{
return 0;
}
int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len)
{
return 0;
}
#endif

View File

@ -1,61 +0,0 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPDK_VHOST_IOMMU_H
#define SPDK_VHOST_IOMMU_H
#include "spdk/stdinc.h"
/**
* Register given memory block in currently used IOMMU. If no IOMMU is used this
* function do nothing but still should be called.
*
* \param addr Start of memory block
* \param len Length of memory block.
* \return 0 on success, -1 on error.
*/
int spdk_iommu_mem_register(uint64_t addr, uint64_t len);
/**
* Unregister previously registered memory block in currently used IOMMU. If no
* IOMMU is used this function do nothing but still should be called.
*
* \note This functiom might fail for invalid memory block.
*
* \param addr Start of memory block
* \param len Length of memory block.
* \return 0 on success, -1 on error.
*/
int spdk_iommu_mem_unregister(uint64_t addr, uint64_t len);
#endif /* SPDK_VHOST_IOMMU_H */

View File

@ -45,7 +45,6 @@ DEFINE_STUB(spdk_event_allocate, struct spdk_event *,
DEFINE_STUB(spdk_mem_register, int, (void *vaddr, size_t len), 0);
DEFINE_STUB(spdk_mem_unregister, int, (void *vaddr, size_t len), 0);
DEFINE_STUB(spdk_vtophys, uint64_t, (void *vaddr), 1);
DEFINE_STUB(spdk_iommu_mem_register, int, (uint64_t addr, uint64_t len), 0);
DEFINE_STUB(spdk_app_get_core_mask, uint64_t, (void), 0);
DEFINE_STUB_V(spdk_app_stop, (int rc));
DEFINE_STUB_V(spdk_event_call, (struct spdk_event *event));