eal/linux: allow to map BARs with MSI-X tables
While VFIO doesn't allow us to map complete BARs with MSI-X tables, it does allow us to map around them in PAGE_SIZE granularity. There might be adapters that provide their registers in the same BAR but on a different page. For example, Intel's NVME adapter, though not a network adapter, provides only one MMIO BAR that contains the MSI-X table. Signed-off-by: Dan Aloni <dan@kernelim.com> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is contained in:
parent
4769bc5a27
commit
90a1633b23
@ -118,13 +118,14 @@ pci_find_max_end_va(void)
|
|||||||
|
|
||||||
/* map a particular resource from a file */
|
/* map a particular resource from a file */
|
||||||
void *
|
void *
|
||||||
pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
|
pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
|
||||||
|
int additional_flags)
|
||||||
{
|
{
|
||||||
void *mapaddr;
|
void *mapaddr;
|
||||||
|
|
||||||
/* Map the PCI memory resource of device */
|
/* Map the PCI memory resource of device */
|
||||||
mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
|
mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED, fd, offset);
|
MAP_SHARED | additional_flags, fd, offset);
|
||||||
if (mapaddr == MAP_FAILED) {
|
if (mapaddr == MAP_FAILED) {
|
||||||
RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
|
RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
|
||||||
__func__, fd, requested_addr,
|
__func__, fd, requested_addr,
|
||||||
|
@ -66,7 +66,7 @@ extern void *pci_map_addr;
|
|||||||
void *pci_find_max_end_va(void);
|
void *pci_find_max_end_va(void);
|
||||||
|
|
||||||
void *pci_map_resource(void *requested_addr, int fd, off_t offset,
|
void *pci_map_resource(void *requested_addr, int fd, off_t offset,
|
||||||
size_t size);
|
size_t size, int additional_flags);
|
||||||
|
|
||||||
/* map IGB_UIO resource prototype */
|
/* map IGB_UIO resource prototype */
|
||||||
int pci_uio_map_resource(struct rte_pci_device *dev);
|
int pci_uio_map_resource(struct rte_pci_device *dev);
|
||||||
|
@ -146,7 +146,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
|
|||||||
|
|
||||||
if (pci_map_resource(uio_res->maps[i].addr, fd,
|
if (pci_map_resource(uio_res->maps[i].addr, fd,
|
||||||
(off_t)uio_res->maps[i].offset,
|
(off_t)uio_res->maps[i].offset,
|
||||||
(size_t)uio_res->maps[i].size)
|
(size_t)uio_res->maps[i].size, 0)
|
||||||
!= uio_res->maps[i].addr) {
|
!= uio_res->maps[i].addr) {
|
||||||
RTE_LOG(ERR, EAL,
|
RTE_LOG(ERR, EAL,
|
||||||
"Cannot mmap device resource\n");
|
"Cannot mmap device resource\n");
|
||||||
@ -409,7 +409,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
|
|||||||
pci_map_addr = pci_find_max_end_va();
|
pci_map_addr = pci_find_max_end_va();
|
||||||
|
|
||||||
mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
|
mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
|
||||||
(size_t)maps[j].size);
|
(size_t)maps[j].size, 0);
|
||||||
if (mapaddr == MAP_FAILED)
|
if (mapaddr == MAP_FAILED)
|
||||||
fail = 1;
|
fail = 1;
|
||||||
|
|
||||||
|
@ -62,6 +62,9 @@
|
|||||||
|
|
||||||
#ifdef VFIO_PRESENT
|
#ifdef VFIO_PRESENT
|
||||||
|
|
||||||
|
#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
|
||||||
|
#define PAGE_MASK (~(PAGE_SIZE - 1))
|
||||||
|
|
||||||
#define VFIO_DIR "/dev/vfio"
|
#define VFIO_DIR "/dev/vfio"
|
||||||
#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
|
#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
|
||||||
#define VFIO_GROUP_FMT "/dev/vfio/%u"
|
#define VFIO_GROUP_FMT "/dev/vfio/%u"
|
||||||
@ -72,10 +75,12 @@ static struct vfio_config vfio_cfg;
|
|||||||
|
|
||||||
/* get PCI BAR number where MSI-X interrupts are */
|
/* get PCI BAR number where MSI-X interrupts are */
|
||||||
static int
|
static int
|
||||||
pci_vfio_get_msix_bar(int fd, int *msix_bar)
|
pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
|
||||||
|
uint32_t *msix_table_size)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
uint32_t reg;
|
uint32_t reg;
|
||||||
|
uint16_t flags;
|
||||||
uint8_t cap_id, cap_offset;
|
uint8_t cap_id, cap_offset;
|
||||||
|
|
||||||
/* read PCI capability pointer from config space */
|
/* read PCI capability pointer from config space */
|
||||||
@ -134,7 +139,18 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = pread64(fd, &flags, sizeof(flags),
|
||||||
|
VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
|
||||||
|
cap_offset + 2);
|
||||||
|
if (ret != sizeof(flags)) {
|
||||||
|
RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
|
||||||
|
"space!\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
|
*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
|
||||||
|
*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
|
||||||
|
*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -532,6 +548,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
|
|||||||
int i, ret, msix_bar;
|
int i, ret, msix_bar;
|
||||||
struct mapped_pci_resource *vfio_res = NULL;
|
struct mapped_pci_resource *vfio_res = NULL;
|
||||||
struct pci_map *maps;
|
struct pci_map *maps;
|
||||||
|
uint32_t msix_table_offset = 0;
|
||||||
|
uint32_t msix_table_size = 0;
|
||||||
|
|
||||||
dev->intr_handle.fd = -1;
|
dev->intr_handle.fd = -1;
|
||||||
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
|
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
|
||||||
@ -657,9 +675,10 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* get MSI-X BAR, if any (we have to know where it is because we can't
|
/* get MSI-X BAR, if any (we have to know where it is because we can't
|
||||||
* mmap it when using VFIO) */
|
* easily mmap it when using VFIO) */
|
||||||
msix_bar = -1;
|
msix_bar = -1;
|
||||||
ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar);
|
ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
|
||||||
|
&msix_table_offset, &msix_table_size);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
|
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
|
||||||
close(vfio_dev_fd);
|
close(vfio_dev_fd);
|
||||||
@ -702,6 +721,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
|
|||||||
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
|
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
|
||||||
struct vfio_region_info reg = { .argsz = sizeof(reg) };
|
struct vfio_region_info reg = { .argsz = sizeof(reg) };
|
||||||
void *bar_addr;
|
void *bar_addr;
|
||||||
|
struct memreg {
|
||||||
|
uint32_t offset, size;
|
||||||
|
} memreg[2] = {};
|
||||||
|
|
||||||
reg.index = i;
|
reg.index = i;
|
||||||
|
|
||||||
@ -720,21 +742,77 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
|
|||||||
if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
|
if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* skip MSI-X BAR */
|
if (i == msix_bar) {
|
||||||
if (i == msix_bar)
|
/*
|
||||||
continue;
|
* VFIO will not let us map the MSI-X table,
|
||||||
|
* but we can map around it.
|
||||||
|
*/
|
||||||
|
uint32_t table_start = msix_table_offset;
|
||||||
|
uint32_t table_end = table_start + msix_table_size;
|
||||||
|
table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
|
||||||
|
table_start &= PAGE_MASK;
|
||||||
|
|
||||||
|
if (table_start == 0 && table_end >= reg.size) {
|
||||||
|
/* Cannot map this BAR */
|
||||||
|
RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
memreg[0].offset = reg.offset;
|
||||||
|
memreg[0].size = table_start;
|
||||||
|
memreg[1].offset = table_end;
|
||||||
|
memreg[1].size = reg.size - table_end;
|
||||||
|
|
||||||
|
RTE_LOG(DEBUG, EAL,
|
||||||
|
"Trying to map BAR %d that contains the MSI-X "
|
||||||
|
"table. Trying offsets: "
|
||||||
|
"%04x:%04x, %04x:%04x\n", i,
|
||||||
|
memreg[0].offset, memreg[0].size,
|
||||||
|
memreg[1].offset, memreg[1].size);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
memreg[0].offset = reg.offset;
|
||||||
|
memreg[0].size = reg.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* try to figure out an address */
|
||||||
if (internal_config.process_type == RTE_PROC_PRIMARY) {
|
if (internal_config.process_type == RTE_PROC_PRIMARY) {
|
||||||
/* try mapping somewhere close to the end of hugepages */
|
/* try mapping somewhere close to the end of hugepages */
|
||||||
if (pci_map_addr == NULL)
|
if (pci_map_addr == NULL)
|
||||||
pci_map_addr = pci_find_max_end_va();
|
pci_map_addr = pci_find_max_end_va();
|
||||||
|
|
||||||
bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
|
bar_addr = pci_map_addr;
|
||||||
reg.size);
|
|
||||||
pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
|
pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
|
||||||
} else {
|
} else {
|
||||||
bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
|
bar_addr = maps[i].addr;
|
||||||
reg.size);
|
}
|
||||||
|
|
||||||
|
/* reserve the address using an inaccessible mapping */
|
||||||
|
bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
|
||||||
|
MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (bar_addr != MAP_FAILED) {
|
||||||
|
void *map_addr = NULL;
|
||||||
|
if (memreg[0].size) {
|
||||||
|
/* actual map of first part */
|
||||||
|
map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
|
||||||
|
memreg[0].offset,
|
||||||
|
memreg[0].size,
|
||||||
|
MAP_FIXED);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if there's a second part, try to map it */
|
||||||
|
if (map_addr != MAP_FAILED
|
||||||
|
&& memreg[1].offset && memreg[1].size) {
|
||||||
|
void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset);
|
||||||
|
map_addr = pci_map_resource(second_addr,
|
||||||
|
vfio_dev_fd, memreg[1].offset,
|
||||||
|
memreg[1].size,
|
||||||
|
MAP_FIXED);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (map_addr == MAP_FAILED || !map_addr) {
|
||||||
|
munmap(bar_addr, reg.size);
|
||||||
|
bar_addr = MAP_FAILED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bar_addr == MAP_FAILED ||
|
if (bar_addr == MAP_FAILED ||
|
||||||
|
@ -43,9 +43,13 @@
|
|||||||
#include <linux/vfio.h>
|
#include <linux/vfio.h>
|
||||||
|
|
||||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
||||||
#define RTE_PCI_MSIX_TABLE_BIR 0x7
|
#define RTE_PCI_MSIX_TABLE_BIR 0x7
|
||||||
|
#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
|
||||||
|
#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff
|
||||||
#else
|
#else
|
||||||
#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
|
#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
|
||||||
|
#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
|
||||||
|
#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define VFIO_PRESENT
|
#define VFIO_PRESENT
|
||||||
|
Loading…
Reference in New Issue
Block a user