vfio: allow mapping MSI-X BARs if kernel allows it
Currently, DPDK will skip mapping some areas (or even an entire BAR) if MSI-X table happens to be in them but is smaller than page size. Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this as a capability flag. Capability flags themselves are also only supported since kernel 4.6 [2]. This commit will introduce support for checking VFIO capabilities, and will use it to check if we are allowed to map BARs with MSI-X tables in them, along with backwards compatibility for older kernels, including a workaround for a variable rename in VFIO region info structure [3]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279 [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4 Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is contained in:
parent
b1621823ea
commit
03ba15ca65
@ -415,6 +415,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* region info may contain capability headers, so we need to keep reallocating
|
||||
* the memory until we match allocated memory size with argsz.
|
||||
*/
|
||||
static int
|
||||
pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
|
||||
int region)
|
||||
{
|
||||
struct vfio_region_info *ri;
|
||||
size_t argsz = sizeof(*ri);
|
||||
int ret;
|
||||
|
||||
ri = malloc(sizeof(*ri));
|
||||
if (ri == NULL) {
|
||||
RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
|
||||
return -1;
|
||||
}
|
||||
again:
|
||||
memset(ri, 0, argsz);
|
||||
ri->argsz = argsz;
|
||||
ri->index = region;
|
||||
|
||||
ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
|
||||
if (ret < 0) {
|
||||
free(ri);
|
||||
return ret;
|
||||
}
|
||||
if (ri->argsz != argsz) {
|
||||
struct vfio_region_info *tmp;
|
||||
|
||||
argsz = ri->argsz;
|
||||
tmp = realloc(ri, argsz);
|
||||
|
||||
if (tmp == NULL) {
|
||||
/* realloc failed but the ri is still there */
|
||||
free(ri);
|
||||
RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
|
||||
return -1;
|
||||
}
|
||||
ri = tmp;
|
||||
goto again;
|
||||
}
|
||||
*info = ri;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct vfio_info_cap_header *
|
||||
pci_vfio_info_cap(struct vfio_region_info *info, int cap)
|
||||
{
|
||||
struct vfio_info_cap_header *h;
|
||||
size_t offset;
|
||||
|
||||
if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
|
||||
/* VFIO info does not advertise capabilities */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
offset = VFIO_CAP_OFFSET(info);
|
||||
while (offset != 0) {
|
||||
h = RTE_PTR_ADD(info, offset);
|
||||
if (h->id == cap)
|
||||
return h;
|
||||
offset = h->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
|
||||
{
|
||||
struct vfio_region_info *info;
|
||||
int ret;
|
||||
|
||||
ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
|
||||
ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
|
||||
|
||||
/* cleanup */
|
||||
free(info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
|
||||
{
|
||||
@ -464,56 +551,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
|
||||
pci_addr);
|
||||
goto err_vfio_dev_fd;
|
||||
goto err_vfio_res;
|
||||
}
|
||||
/* if we found our MSI-X BAR region, check if we can mmap it */
|
||||
if (vfio_res->msix_table.bar_index != -1) {
|
||||
int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
|
||||
vfio_res->msix_table.bar_index);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
|
||||
goto err_vfio_res;
|
||||
} else if (ret != 0) {
|
||||
/* we can map it, so we don't care where it is */
|
||||
RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
|
||||
vfio_res->msix_table.bar_index = -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
|
||||
struct vfio_region_info reg = { .argsz = sizeof(reg) };
|
||||
struct vfio_region_info *reg = NULL;
|
||||
void *bar_addr;
|
||||
|
||||
reg.index = i;
|
||||
|
||||
ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
|
||||
if (ret) {
|
||||
ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, " %s cannot get device region info "
|
||||
"error %i (%s)\n", pci_addr, errno, strerror(errno));
|
||||
"error %i (%s)\n", pci_addr, errno,
|
||||
strerror(errno));
|
||||
goto err_vfio_res;
|
||||
}
|
||||
|
||||
/* chk for io port region */
|
||||
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
free(reg);
|
||||
goto err_vfio_res;
|
||||
else if (ret) {
|
||||
} else if (ret) {
|
||||
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
|
||||
i);
|
||||
free(reg);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* skip non-mmapable BARs */
|
||||
if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
|
||||
if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
|
||||
free(reg);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* try mapping somewhere close to the end of hugepages */
|
||||
if (pci_map_addr == NULL)
|
||||
pci_map_addr = pci_find_max_end_va();
|
||||
|
||||
bar_addr = pci_map_addr;
|
||||
pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
|
||||
pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
|
||||
|
||||
maps[i].addr = bar_addr;
|
||||
maps[i].offset = reg.offset;
|
||||
maps[i].size = reg.size;
|
||||
maps[i].offset = reg->offset;
|
||||
maps[i].size = reg->size;
|
||||
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
|
||||
|
||||
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
|
||||
pci_addr, i, strerror(errno));
|
||||
free(reg);
|
||||
goto err_vfio_res;
|
||||
}
|
||||
|
||||
dev->mem_resource[i].addr = maps[i].addr;
|
||||
|
||||
free(reg);
|
||||
}
|
||||
|
||||
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
|
||||
|
@ -14,6 +14,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* determine if VFIO is present on the system
|
||||
*/
|
||||
@ -44,6 +46,30 @@ extern "C" {
|
||||
#define RTE_VFIO_NOIOMMU 8
|
||||
#endif
|
||||
|
||||
/*
|
||||
* capabilities are only supported on kernel 4.6+. there were also some API
|
||||
* changes as well, so add a macro to get cap offset.
|
||||
*/
|
||||
#ifdef VFIO_REGION_INFO_FLAG_CAPS
|
||||
#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
|
||||
#define VFIO_CAP_OFFSET(x) (x->cap_offset)
|
||||
#else
|
||||
#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
|
||||
#define VFIO_CAP_OFFSET(x) (x->resv)
|
||||
struct vfio_info_cap_header {
|
||||
uint16_t id;
|
||||
uint16_t version;
|
||||
uint32_t next;
|
||||
};
|
||||
#endif
|
||||
|
||||
/* kernels 4.16+ can map BAR containing MSI-X table */
|
||||
#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
|
||||
#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
|
||||
#else
|
||||
#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
|
||||
#endif
|
||||
|
||||
#else /* not VFIO_PRESENT */
|
||||
|
||||
/* we don't need an actual definition, only pointer is used */
|
||||
|
Loading…
Reference in New Issue
Block a user