numam-spdk/lib/vfio_user/vfio_user_pci.c
Josh Soref cc6920a476 spelling: lib
Part of #2256

* accessible
* activation
* additional
* allocate
* association
* attempt
* barrier
* broadcast
* buffer
* calculate
* cases
* channel
* children
* command
* completion
* connect
* copied
* currently
* descriptor
* destroy
* detachment
* doesn't
* enqueueing
* exceeds
* execution
* extended
* fallback
* finalize
* first
* handling
* hugepages
* ignored
* implementation
* in_capsule
* initialization
* initialized
* initializing
* initiator
* negotiated
* notification
* occurred
* original
* outstanding
* partially
* partition
* processing
* receive
* received
* receiving
* redirected
* regions
* request
* requested
* response
* retrieved
* running
* satisfied
* should
* snapshot
* status
* succeeds
* successfully
* supplied
* those
* transferred
* translate
* triggering
* unregister
* unsupported
* urlsafe
* virtqueue
* volumes
* workaround
* zeroed

Change-Id: I569218754bd9d332ba517d4a61ad23d29eedfd0c
Signed-off-by: Josh Soref <jsoref@gmail.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10405
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2021-12-03 08:12:55 +00:00

453 lines
12 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* vfio-user transport for PCI devices.
*/
#include "spdk/stdinc.h"
#include "spdk/log.h"
#include "spdk/env.h"
#include "spdk/queue.h"
#include "spdk/util.h"
#include "spdk/vfio_user_pci.h"
#include "vfio_user_internal.h"
static uint32_t g_vfio_dev_id;
int
spdk_vfio_user_pci_bar_access(struct vfio_device *dev, uint32_t index, uint64_t offset,
size_t len, void *buf, bool is_write)
{
struct vfio_pci_region *region = &dev->regions[index];
uint32_t i;
if (offset + len > region->size) {
return -EINVAL;
}
if (!region->nr_mmaps || (offset < region->mmaps[0].offset)) {
return vfio_user_dev_mmio_access(dev, index, offset, len, buf, is_write);
}
/* SPARSE MMAP */
for (i = 0; i < region->nr_mmaps; i++) {
if ((offset >= region->mmaps[i].offset) &&
(offset + len <= region->mmaps[i].offset + region->mmaps[i].size)) {
assert(region->mmaps[i].mem != NULL);
void *bar_addr = region->mmaps[i].mem + offset;
if (is_write) {
memcpy(bar_addr, buf, len);
} else {
memcpy(buf, bar_addr, len);
}
return 0;
}
}
return -EFAULT;
}
static int
vfio_add_mr(struct vfio_device *dev, struct vfio_memory_region *mr)
{
if (dev->nr_mrs == VFIO_MAXIMUM_MEMORY_REGIONS) {
SPDK_ERRLOG("Maximum supported memory regions %d\n", VFIO_MAXIMUM_MEMORY_REGIONS);
return -EINVAL;
}
TAILQ_INSERT_TAIL(&dev->mrs_head, mr, link);
dev->nr_mrs++;
SPDK_DEBUGLOG(vfio_pci, "Add memory region: FD %d, VADDR 0x%lx, IOVA 0x%lx, Size 0x%lx\n",
mr->fd, mr->vaddr, mr->iova, mr->size);
return 0;
}
static struct vfio_memory_region *
vfio_get_mr(struct vfio_device *dev, uint64_t addr, size_t len)
{
struct vfio_memory_region *mr, *tmp_mr;
if (dev->nr_mrs == 0) {
return false;
}
TAILQ_FOREACH_SAFE(mr, &dev->mrs_head, link, tmp_mr) {
if ((mr->vaddr == addr) || (mr->iova == addr)) {
return mr;
}
}
return false;
}
static void
vfio_remove_mr(struct vfio_device *dev, uint64_t addr, size_t len)
{
struct vfio_memory_region *mr, *tmp_mr;
TAILQ_FOREACH_SAFE(mr, &dev->mrs_head, link, tmp_mr) {
if ((mr->vaddr == addr) || (mr->iova == addr)) {
SPDK_DEBUGLOG(vfio_pci, "Remove memory region: FD %d, VADDR 0x%lx, IOVA 0x%lx, Size 0x%lx\n",
mr->fd, mr->vaddr, mr->iova, mr->size);
TAILQ_REMOVE(&dev->mrs_head, mr, link);
assert(dev->nr_mrs > 0);
dev->nr_mrs--;
free(mr);
return;
}
}
}
static int
vfio_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
int ret;
struct vfio_device *dev = cb_ctx;
struct vfio_memory_region *mr;
uint64_t offset;
mr = vfio_get_mr(dev, (uint64_t)vaddr, size);
if (action == SPDK_MEM_MAP_NOTIFY_UNREGISTER) {
if (!mr) {
SPDK_ERRLOG("Memory region VADDR %p doesn't exist\n", vaddr);
return -EEXIST;
}
ret = vfio_user_dev_dma_map_unmap(dev, mr, false);
/* remove the memory region */
vfio_remove_mr(dev, (uint64_t)vaddr, size);
return ret;
}
/* SPDK_MEM_MAP_NOTIFY_REGISTER */
if (mr != NULL) {
SPDK_ERRLOG("Memory region VADDR 0x%lx already exist\n", mr->vaddr);
return -EEXIST;
}
mr = calloc(1, sizeof(*mr));
if (mr == NULL) {
return -ENOMEM;
}
mr->vaddr = (uint64_t)(uintptr_t)vaddr;
mr->iova = mr->vaddr;
mr->size = size;
mr->fd = spdk_mem_get_fd_and_offset(vaddr, &offset);
if (mr->fd < 0) {
SPDK_ERRLOG("Error to get the memory map offset\n");
free(mr);
return -EFAULT;
}
mr->offset = offset;
ret = vfio_add_mr(dev, mr);
if (ret) {
free(mr);
return ret;
}
return vfio_user_dev_dma_map_unmap(dev, mr, true);
}
static int
vfio_device_dma_map(struct vfio_device *device)
{
const struct spdk_mem_map_ops vfio_map_ops = {
.notify_cb = vfio_mr_map_notify,
.are_contiguous = NULL,
};
device->map = spdk_mem_map_alloc((uint64_t)NULL, &vfio_map_ops, device);
if (device->map == NULL) {
SPDK_ERRLOG("Failed to allocate memory map structure\n");
return -EFAULT;
}
return 0;
}
static struct vfio_info_cap_header *
vfio_device_get_info_cap(struct vfio_region_info *info, int cap)
{
struct vfio_info_cap_header *h;
size_t offset;
if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) == 0) {
return NULL;
}
offset = info->cap_offset;
while (offset != 0) {
h = (struct vfio_info_cap_header *)((uintptr_t)info + offset);
if (h->id == cap) {
return h;
}
offset = h->next;
}
return NULL;
}
static int
vfio_device_setup_sparse_mmaps(struct vfio_device *device, int index,
struct vfio_region_info *info, int *fds)
{
struct vfio_info_cap_header *hdr;
struct vfio_region_info_cap_sparse_mmap *sparse;
struct vfio_pci_region *region = &device->regions[index];
uint32_t i, j = 0;
int prot = 0;
hdr = vfio_device_get_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
if (!hdr) {
SPDK_NOTICELOG("Device doesn't have sparse mmap\n");
return -EEXIST;
}
sparse = SPDK_CONTAINEROF(hdr, struct vfio_region_info_cap_sparse_mmap, header);
for (i = 0; i < sparse->nr_areas; i++) {
if (sparse->areas[i].size) {
region->mmaps[j].offset = sparse->areas[i].offset;
region->mmaps[j].size = sparse->areas[i].size;
prot |= info->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
prot |= info->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
if (*fds) {
region->mmaps[j].mem = mmap(NULL, region->mmaps[j].size, prot, MAP_SHARED,
fds[i], region->offset + region->mmaps[j].offset);
if (region->mmaps[j].mem == MAP_FAILED) {
SPDK_ERRLOG("Device SPARSE MMAP failed\n");
return -EIO;
}
} else {
SPDK_DEBUGLOG(vfio_pci, "No valid fd, skip mmap for bar %d region %u\n", index, i);
}
SPDK_DEBUGLOG(vfio_pci, "Sparse region %u, Size 0x%llx, Offset 0x%llx, Map addr %p\n",
i, sparse->areas[i].size, sparse->areas[i].offset,
region->mmaps[j].mem);
j++;
}
}
device->regions[index].nr_mmaps = j;
return 0;
}
static int
vfio_device_map_region(struct vfio_device *device, struct vfio_pci_region *region, int fd)
{
int prot = 0;
prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
region->mmaps[0].offset = 0;
region->mmaps[0].size = region->size;
region->mmaps[0].mem = mmap(NULL, region->size, prot, MAP_SHARED,
fd, region->offset);
if (region->mmaps[0].mem == MAP_FAILED) {
SPDK_ERRLOG("Device Region MMAP failed\n");
return -EFAULT;
}
SPDK_DEBUGLOG(vfio_pci, "Memory mapped to %p\n", region->mmaps[0].mem);
region->nr_mmaps = 1;
return 0;
}
static int
vfio_device_map_bars_and_config_region(struct vfio_device *device)
{
uint32_t i;
int ret;
size_t len = 4096;
int fds[VFIO_MAXIMUM_SPARSE_MMAP_REGIONS];
struct vfio_region_info *info;
uint8_t *buf;
buf = calloc(1, len);
if (!buf) {
return -ENOMEM;
}
info = (struct vfio_region_info *)buf;
for (i = 0; i < device->pci_regions; i++) {
memset(info, 0, len);
memset(fds, 0, sizeof(fds));
info->index = i;
ret = vfio_user_get_dev_region_info(device, info, len, fds, VFIO_MAXIMUM_SPARSE_MMAP_REGIONS);
if (ret) {
SPDK_ERRLOG("Device setup bar %d failed\n", ret);
free(buf);
return ret;
}
device->regions[i].size = info->size;
device->regions[i].offset = info->offset;
device->regions[i].flags = info->flags;
SPDK_DEBUGLOG(vfio_pci, "Bar %d, Size 0x%llx, Offset 0x%llx, Flags 0x%x, Cap offset %u\n",
i, info->size, info->offset, info->flags, info->cap_offset);
/* Setup MMAP if any */
if (info->size && (info->flags & VFIO_REGION_INFO_FLAG_MMAP)) {
/* try to map sparse memory region first */
ret = vfio_device_setup_sparse_mmaps(device, i, info, fds);
if (ret < 0) {
ret = vfio_device_map_region(device, &device->regions[i], fds[0]);
}
if (ret != 0) {
SPDK_ERRLOG("Setup Device %s region %d failed\n", device->name, i);
free(buf);
return ret;
}
}
}
free(buf);
return 0;
}
static void
vfio_device_unmap_bars(struct vfio_device *dev)
{
uint32_t i, j;
struct vfio_pci_region *region;
for (i = 0; i < dev->pci_regions; i++) {
region = &dev->regions[i];
for (j = 0; j < region->nr_mmaps; j++) {
if (region->mmaps[j].mem) {
munmap(region->mmaps[j].mem, region->mmaps[j].size);
}
}
}
memset(dev->regions, 0, sizeof(dev->regions));
}
struct vfio_device *
spdk_vfio_user_setup(const char *path)
{
int ret;
struct vfio_device *device = NULL;
struct vfio_user_device_info dev_info = {};
device = calloc(1, sizeof(*device));
if (!device) {
return NULL;
}
TAILQ_INIT(&device->mrs_head);
snprintf(device->path, PATH_MAX, "%s", path);
snprintf(device->name, sizeof(device->name), "vfio-user%u", g_vfio_dev_id++);
ret = vfio_user_dev_setup(device);
if (ret) {
free(device);
SPDK_ERRLOG("Error to setup vfio-user via path %s\n", path);
return NULL;
}
ret = vfio_user_get_dev_info(device, &dev_info, sizeof(dev_info));
if (ret) {
SPDK_ERRLOG("Device get info failed\n");
goto cleanup;
}
device->pci_regions = dev_info.num_regions;
device->flags = dev_info.flags;
ret = vfio_device_map_bars_and_config_region(device);
if (ret) {
goto cleanup;
}
/* Register DMA Region */
ret = vfio_device_dma_map(device);
if (ret) {
SPDK_ERRLOG("Container DMA map failed\n");
goto cleanup;
}
SPDK_DEBUGLOG(vfio_pci, "Device %s, Path %s Setup Successfully\n", device->name, device->path);
return device;
cleanup:
close(device->fd);
free(device);
return NULL;
}
void
spdk_vfio_user_release(struct vfio_device *dev)
{
SPDK_DEBUGLOG(vfio_pci, "Release file %s\n", dev->path);
vfio_device_unmap_bars(dev);
if (dev->map) {
spdk_mem_map_free(&dev->map);
}
close(dev->fd);
free(dev);
}
void *
spdk_vfio_user_get_bar_addr(struct vfio_device *dev, uint32_t index, uint64_t offset, uint32_t len)
{
struct vfio_pci_region *region = &dev->regions[index];
uint32_t i;
if (!region->size || !(region->flags & VFIO_REGION_INFO_FLAG_MMAP)) {
return NULL;
}
for (i = 0; i < region->nr_mmaps; i++) {
if (region->mmaps[i].mem && (region->mmaps[i].offset <= offset) &&
((offset + len) <= (region->mmaps[i].offset + region->mmaps[i].size))) {
return (void *)((uintptr_t)region->mmaps[i].mem + offset - region->mmaps[i].offset);
}
}
return NULL;
}
SPDK_LOG_REGISTER_COMPONENT(vfio_pci)