pci: support multiple PCI regions per device

Need to change PCI code to support multiple I/O regions on a single device.
Some devices like VMXNET3 have multiple PCI memory regions, and some
have none.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Intel
This commit is contained in:
Stephen Hemminger 2013-05-30 19:12:39 +02:00 committed by Thomas Monjalon
parent c7f4888629
commit eee16c964c
9 changed files with 332 additions and 120 deletions

View File

@ -308,7 +308,7 @@ port_reg_off_is_invalid(portid_t port_id, uint32_t reg_off)
(unsigned)reg_off);
return 1;
}
pci_len = ports[port_id].dev_info.pci_dev->mem_resource.len;
pci_len = ports[port_id].dev_info.pci_dev->mem_resource[0].len;
if (reg_off >= pci_len) {
printf("Port %d: register offset %u (0x%X) out of port PCI "
"resource (length=%"PRIu64")\n",

View File

@ -375,8 +375,9 @@ port_pci_reg_read(struct rte_port *port, uint32_t reg_off)
void *reg_addr;
uint32_t reg_v;
reg_addr = (void *)((char *)port->dev_info.pci_dev->mem_resource.addr +
reg_off);
reg_addr = (void *)
((char *)port->dev_info.pci_dev->mem_resource[0].addr +
reg_off);
reg_v = *((volatile uint32_t *)reg_addr);
return rte_le_to_cpu_32(reg_v);
}
@ -389,8 +390,9 @@ port_pci_reg_write(struct rte_port *port, uint32_t reg_off, uint32_t reg_v)
{
void *reg_addr;
reg_addr = (void *)((char *)port->dev_info.pci_dev->mem_resource.addr +
reg_off);
reg_addr = (void *)
((char *)port->dev_info.pci_dev->mem_resource[0].addr +
reg_off);
*((volatile uint32_t *)reg_addr) = rte_cpu_to_le_32(reg_v);
}

View File

@ -152,12 +152,19 @@ rte_eal_pci_probe(void)
static int
pci_dump_one_device(struct rte_pci_device *dev)
{
int i;
printf(PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
dev->addr.devid, dev->addr.function);
printf(" - vendor:%x device:%x\n", dev->id.vendor_id,
dev->id.device_id);
printf(" %16.16"PRIx64" %16.16"PRIx64"\n",
dev->mem_resource.phys_addr, dev->mem_resource.len);
for (i = 0; i != sizeof(dev->mem_resource) /
sizeof(dev->mem_resource[0]); i++) {
printf(" %16.16"PRIx64" %16.16"PRIx64"\n",
dev->mem_resource[i].phys_addr,
dev->mem_resource[i].len);
}
return 0;
}

View File

@ -139,7 +139,7 @@ struct rte_pci_device {
TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
struct rte_pci_addr addr; /**< PCI location. */
struct rte_pci_id id; /**< PCI ID. */
struct rte_pci_resource mem_resource; /**< PCI Memory Resource */
struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
struct rte_intr_handle intr_handle; /**< Interrupt handle */
char previous_dr[PATH_MAX]; /**< path for pre-dpdk driver*/
const struct rte_pci_driver *driver; /**< Associated driver */

View File

@ -81,6 +81,12 @@
* enabling bus master.
*/
struct uio_map {
void *addr;
uint64_t offset;
uint64_t size;
uint64_t phaddr;
};
#define PROC_MODULES "/proc/modules"
@ -99,15 +105,15 @@ struct uio_resource {
TAILQ_ENTRY(uio_resource) next;
struct rte_pci_addr pci_addr;
void *addr;
char path[PATH_MAX];
unsigned long size;
unsigned long offset;
size_t nb_maps;
struct uio_map maps[PCI_MAX_RESOURCE];
};
TAILQ_HEAD(uio_res_list, uio_resource);
static struct uio_res_list *uio_res_list = NULL;
static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
/*
* Check that a kernel module is loaded. Returns 0 on success, or if the
@ -232,55 +238,42 @@ pci_uio_bind_device(struct rte_pci_device *dev, const char *module_name)
}
/* map a particular resource from a file */
static void *
pci_map_resource(struct rte_pci_device *dev, void *requested_addr, const char *devname,
unsigned long offset, unsigned long size)
/*
* open devname: it can take some time to
* appear, so we wait some time before returning an error
*/
static int uio_open(const char *devname)
{
unsigned n;
int fd;
void *mapaddr;
int n, fd;
/*
* open devname, and mmap it: it can take some time to
* appear, so we wait some time before returning an error
*/
for (n=0; n<UIO_DEV_WAIT_TIMEOUT*10; n++) {
for (n=0; n < UIO_DEV_WAIT_TIMEOUT*10; n++) {
fd = open(devname, O_RDWR);
if (fd >= 0)
break;
return fd;
if (errno != ENOENT)
break;
usleep(100000);
}
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno));
goto fail;
}
return -1;
}
/* map a particular resource from a file */
static void *
pci_mmap(int fd, void *addr, off_t offset, size_t size)
{
void *mapaddr;
/* Map the PCI memory resource of device */
mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, offset);
if (mapaddr == MAP_FAILED ||
(requested_addr != NULL && mapaddr != requested_addr)) {
RTE_LOG(ERR, EAL, "%s(): cannot mmap %s: %s\n", __func__,
devname, strerror(errno));
close(fd);
goto fail;
}
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* save fd if in primary process */
dev->intr_handle.fd = fd;
dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
} else {
/* fd is not needed in slave process, close it */
dev->intr_handle.fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
close(fd);
mapaddr = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, offset);
if (mapaddr == MAP_FAILED || (addr != NULL && mapaddr != addr)) {
RTE_LOG(ERR, EAL, "%s(): cannot mmap %zd@0x%lx: %s\n",
__func__, size, offset, strerror(errno));
return NULL;
}
RTE_LOG(DEBUG, EAL, "PCI memory mapped at %p\n", mapaddr);
return mapaddr;
fail:
@ -289,44 +282,124 @@ fail:
return NULL;
}
#define OFF_MAX ((uint64_t)(off_t)-1)
static ssize_t
pci_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
{
size_t i;
char dirname[PATH_MAX];
char filename[PATH_MAX];
uint64_t offset, size;
for (i = 0; i != nb_maps; i++) {
/* check if map directory exists */
rte_snprintf(dirname, sizeof(dirname),
"%s/maps/map%u", devname, i);
if (access(dirname, F_OK) != 0)
break;
/* get mapping offset */
rte_snprintf(filename, sizeof(filename),
"%s/offset", dirname);
if (pci_parse_sysfs_value(filename, &offset) < 0) {
RTE_LOG(ERR, EAL,
"%s(): cannot parse offset of %s\n",
__func__, dirname);
return (-1);
}
/* get mapping size */
rte_snprintf(filename, sizeof(filename),
"%s/size", dirname);
if (pci_parse_sysfs_value(filename, &size) < 0) {
RTE_LOG(ERR, EAL,
"%s(): cannot parse size of %s\n",
__func__, dirname);
return (-1);
}
/* get mapping physical address */
rte_snprintf(filename, sizeof(filename),
"%s/addr", dirname);
if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
RTE_LOG(ERR, EAL,
"%s(): cannot parse addr of %s\n",
__func__, dirname);
return (-1);
}
if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
RTE_LOG(ERR, EAL,
"%s(): offset/size exceed system max value\n",
__func__);
return (-1);
}
maps[i].offset = offset;
maps[i].size = size;
}
return (i);
}
static int
pci_uio_map_secondary(struct rte_pci_device *dev)
{
size_t i;
struct uio_resource *uio_res;
TAILQ_FOREACH(uio_res, uio_res_list, next) {
/* skip this element if it doesn't match our PCI address */
if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
continue;
for (i = 0; i != uio_res->nb_maps; i++) {
if (pci_map_resource(dev, uio_res->maps[i].addr,
uio_res->path,
(off_t)uio_res->maps[i].offset,
(size_t)uio_res->maps[i].size) !=
uio_res->maps[i].addr) {
RTE_LOG(ERR, EAL,
"Cannot mmap device resource\n");
return (-1);
}
}
return (0);
}
RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
return -1;
}
/* map the PCI resource of a PCI device in virtual memory */
static int
pci_uio_map_resource(struct rte_pci_device *dev)
{
int i, j;
struct dirent *e;
DIR *dir;
char dirname[PATH_MAX];
char dirname2[PATH_MAX];
char filename[PATH_MAX];
char devname[PATH_MAX]; /* contains the /dev/uioX */
void *mapaddr;
unsigned uio_num;
unsigned long size, offset;
uint64_t phaddr;
uint64_t offset;
uint64_t pagesz;
ssize_t nb_maps;
struct rte_pci_addr *loc = &dev->addr;
struct uio_resource *uio_res;
struct uio_map *maps;
RTE_LOG(DEBUG, EAL, "map PCI resource for device "PCI_PRI_FMT"\n",
loc->domain, loc->bus, loc->devid, loc->function);
/* secondary processes - use already recorded details */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
TAILQ_FOREACH(uio_res, uio_res_list, next) {
/* skip this element if it doesn't match our PCI address */
if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
continue;
if (pci_map_resource(dev, uio_res->addr, uio_res->path, \
uio_res->offset, uio_res->size) == uio_res->addr)
return 0;
else {
RTE_LOG(ERR, EAL, "Cannot mmap device resource\n");
return -1;
}
}
RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
return -1;
}
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return pci_uio_map_restore(dev);
/* depending on kernel version, uio can be located in uio/uioX
* or uio:uioX */
@ -379,49 +452,65 @@ pci_uio_map_resource(struct rte_pci_device *dev)
closedir(dir);
/* No uio resource found */
if (e == NULL)
return 0;
/* get mapping offset */
rte_snprintf(filename, sizeof(filename),
"%s/maps/map0/offset", dirname2);
if (eal_parse_sysfs_value(filename, &offset) < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot parse offset\n",
__func__);
if (e == NULL) {
RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
return -1;
}
/* get mapping size */
rte_snprintf(filename, sizeof(filename),
"%s/maps/map0/size", dirname2);
if (eal_parse_sysfs_value(filename, &size) < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot parse size\n",
__func__);
return -1;
/* allocate the mapping details for secondary processes*/
if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) {
RTE_LOG(ERR, EAL,
"%s(): cannot store uio mmap details\n", __func__);
return (-1);
}
/* open and mmap /dev/uioX */
rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
mapaddr = pci_map_resource(dev, NULL, devname, offset, size);
if (mapaddr == NULL)
return -1;
dev->mem_resource.addr = mapaddr;
/* save the mapping details for secondary processes*/
uio_res = rte_malloc("UIO_RES", sizeof(*uio_res), 0);
if (uio_res == NULL) {
RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__);
return -1;
}
uio_res->addr = mapaddr;
uio_res->offset = offset;
uio_res->size = size;
rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
/* collect info about device mappings */
if ((nb_maps = pci_uio_get_mappings(dirname2, uio_res->maps,
sizeof (uio_res->maps) / sizeof (uio_res->maps[0])))
< 0)
return (nb_maps);
uio_res->nb_maps = nb_maps;
/* Map all BARs */
pagesz = sysconf(_SC_PAGESIZE);
maps = uio_res->maps;
for (i = 0; i != PCI_MAX_RESOURCE; i++) {
/* skip empty BAR */
if ((phaddr = dev->mem_resource[i].phys_addr) == 0)
continue;
for (j = 0; j != nb_maps && (phaddr != maps[j].phaddr ||
dev->mem_resource[i].len != maps[j].size);
j++)
;
/* if matching map is found, then use it */
if (j != nb_maps) {
offset = j * pagesz;
if (maps[j].addr != NULL ||
(mapaddr = pci_map_resource(dev,
NULL, devname, (off_t)offset,
(size_t)maps[j].size)) == NULL) {
return (-1);
}
maps[j].addr = mapaddr;
maps[j].offset = offset;
dev->mem_resource[i].addr = mapaddr;
}
}
TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
return 0;
return (0);
}
/* parse the "resource" sysfs file */
@ -470,10 +559,10 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
}
if (flags & IORESOURCE_MEM) {
dev->mem_resource.phys_addr = phys_addr;
dev->mem_resource.len = end_addr - phys_addr + 1;
dev->mem_resource.addr = NULL; /* not mapped for now */
break;
dev->mem_resource[i].phys_addr = phys_addr;
dev->mem_resource[i].len = end_addr - phys_addr + 1;
/* not mapped for now */
dev->mem_resource[i].addr = NULL;
}
}
fclose(f);
@ -484,6 +573,40 @@ error:
return -1;
}
/*
* parse a sysfs file containing one integer value
* different to the eal version, as it needs to work with 64-bit values
*/
static int
pci_parse_sysfs_value(const char *filename, uint64_t *val)
{
FILE *f;
char buf[BUFSIZ];
char *end = NULL;
f = fopen(filename, "r");
if (f == NULL) {
RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
__func__, filename);
return -1;
}
if (fgets(buf, sizeof(buf), f) == NULL) {
RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
__func__, filename);
fclose(f);
return -1;
}
*val = strtoull(buf, &end, 0);
if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
__func__, filename);
fclose(f);
return -1;
}
fclose(f);
return 0;
}
/* Compare two PCI device addresses. */
static int
@ -828,7 +951,23 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
/* map the NIC resources */
if (pci_uio_map_resource(dev) < 0)
return -1;
#else
/* just map the NIC resources */
if (pci_uio_map_resource(dev) < 0)
return -1;
#endif
/* We always should have BAR0 mapped */
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
dev->mem_resource[0].addr == NULL) {
RTE_LOG(ERR, EAL,
"%s(): BAR0 is not mapped\n",
__func__);
return (-1);
}
/* reference driver structure */
dev->driver = dr;
/* call the driver devinit() function */
return dr->devinit(dr, dev);

View File

@ -258,6 +258,9 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
unsigned long addr, len;
void *internal_addr;
if (sizeof(info->mem) / sizeof (info->mem[0]) <= n)
return (EINVAL);
addr = pci_resource_start(dev, pci_bar);
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
@ -273,6 +276,29 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
return 0;
}
/* Get pci port io resources described by bar #pci_bar in uio resource n. */
static int
igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
int n, int pci_bar, const char *name)
{
unsigned long addr, len;
if (sizeof(info->port) / sizeof (info->port[0]) <= n)
return (EINVAL);
addr = pci_resource_start(dev, pci_bar);
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return (-1);
info->port[n].name = name;
info->port[n].start = addr;
info->port[n].size = len;
info->port[n].porttype = UIO_PORT_X86;
return (0);
}
/* Unmap previously ioremap'd resources */
static void
igbuio_pci_release_iomem(struct uio_info *info)
@ -284,6 +310,44 @@ igbuio_pci_release_iomem(struct uio_info *info)
}
}
static int
igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
{
int i, iom, iop, ret;
unsigned long flags;
static const char *bar_names[PCI_STD_RESOURCE_END + 1] = {
"BAR0",
"BAR1",
"BAR2",
"BAR3",
"BAR4",
"BAR5",
};
iom = 0;
iop = 0;
for (i = 0; i != sizeof(bar_names) / sizeof(bar_names[0]); i++) {
if (pci_resource_len(dev, i) != 0 &&
pci_resource_start(dev, i) != 0) {
flags = pci_resource_flags(dev, i);
if (flags & IORESOURCE_MEM) {
if ((ret = igbuio_pci_setup_iomem(dev, info,
iom, i, bar_names[i])) != 0)
return (ret);
iom++;
} else if (flags & IORESOURCE_IO) {
if ((ret = igbuio_pci_setup_ioport(dev, info,
iop, i, bar_names[i])) != 0)
return (ret);
iop++;
}
}
}
return ((iom != 0) ? ret : ENOENT);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
static int __devinit
#else
@ -306,13 +370,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
goto fail_free;
}
/* XXX should we use 64 bits ? */
/* set 32-bit DMA mask */
if (pci_set_dma_mask(dev,(uint64_t)0xffffffff)) {
printk(KERN_ERR "Cannot set DMA mask\n");
goto fail_disable;
}
/*
* reserve device's PCI memory regions for use by this
* module
@ -326,8 +383,17 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
pci_set_master(dev);
/* remap IO memory */
if (igbuio_pci_setup_iomem(dev, &udev->info, 0, 0, "config"))
goto fail_release_regions;
if (igbuio_setup_bars(dev, &udev->info))
goto fail_release_iomem;
/* set 64-bit DMA mask */
if (pci_set_dma_mask(dev, DMA_BIT_MASK(64))) {
printk(KERN_ERR "Cannot set DMA mask\n");
goto fail_release_iomem;
} else if (pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64))) {
printk(KERN_ERR "Cannot set consistent DMA mask\n");
goto fail_release_iomem;
}
/* fill uio infos */
udev->info.name = "Intel IGB UIO";
@ -384,7 +450,6 @@ fail_release_iomem:
igbuio_pci_release_iomem(&udev->info);
if (udev->mode == IGBUIO_MSIX_INTR_MODE)
pci_disable_msix(udev->pdev);
fail_release_regions:
pci_release_regions(dev);
fail_disable:
pci_disable_device(dev);

View File

@ -233,7 +233,7 @@ eth_em_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
return 0;
}
hw->hw_addr = (void *)pci_dev->mem_resource.addr;
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
hw->device_id = pci_dev->id.device_id;
/* For ICH8 support we'll need to map the flash memory BAR */

View File

@ -299,10 +299,9 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
return 0;
}
hw->hw_addr= (void *)pci_dev->mem_resource.addr;
hw->hw_addr= (void *)pci_dev->mem_resource[0].addr;
igb_identify_hardware(eth_dev);
if (e1000_setup_init_funcs(hw, TRUE) != E1000_SUCCESS) {
error = -EIO;
goto err_late;
@ -415,7 +414,7 @@ eth_igbvf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
hw->device_id = pci_dev->id.device_id;
hw->vendor_id = pci_dev->id.vendor_id;
hw->hw_addr = (void *)pci_dev->mem_resource.addr;
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
/* Initialize the shared code */
diag = e1000_setup_init_funcs(hw, TRUE);

View File

@ -555,7 +555,7 @@ eth_ixgbe_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
/* Vendor and Device ID need to be set before init of shared code */
hw->device_id = pci_dev->id.device_id;
hw->vendor_id = pci_dev->id.vendor_id;
hw->hw_addr = (void *)pci_dev->mem_resource.addr;
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
/* Initialize the shared code */
diag = ixgbe_init_shared_code(hw);
@ -687,7 +687,7 @@ eth_ixgbevf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
hw->device_id = pci_dev->id.device_id;
hw->vendor_id = pci_dev->id.vendor_id;
hw->hw_addr = (void *)pci_dev->mem_resource.addr;
hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
/* initialize the vfta */
memset(shadow_vfta, 0, sizeof(*shadow_vfta));