Wangyu (Eric) d25ab4b7f1 bus/pci: align next mapping address on page boundary
Currently, the next address picked by PCI mapping infrastructure
may be page-unaligned due to BAR length being smaller than page size.
This leads to a situation where the requested map address is invalid,
resulting in mmap() call returning an arbitrary address,
which will later interfere with device BAR mapping in secondary processes.

Fix it by always aligning the next requested address on page boundary.

Fixes: c752998b5e2e ("pci: introduce library and driver")
Cc: stable@dpdk.org

Signed-off-by: Xiaofeng Deng <dengxiaofeng@huawei.com>
Signed-off-by: Wangyu (Eric) <seven.wangyu@huawei.com>
Acked-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Acked-by: Min Hu (Connor) <humin29@huawei.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Gavin Hu <gavin.hu@arm.com>
2019-11-14 14:17:48 +01:00

570 lines
13 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <inttypes.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/sysmacros.h>
#include <linux/pci_regs.h>
#if defined(RTE_ARCH_X86)
#include <sys/io.h>
#endif
#include <rte_string_fns.h>
#include <rte_log.h>
#include <rte_pci.h>
#include <rte_bus_pci.h>
#include <rte_common.h>
#include <rte_malloc.h>
#include "eal_filesystem.h"
#include "pci_init.h"
void *pci_map_addr = NULL;
#define OFF_MAX ((uint64_t)(off_t)-1)
int
pci_uio_read_config(const struct rte_intr_handle *intr_handle,
void *buf, size_t len, off_t offset)
{
return pread(intr_handle->uio_cfg_fd, buf, len, offset);
}
int
pci_uio_write_config(const struct rte_intr_handle *intr_handle,
const void *buf, size_t len, off_t offset)
{
return pwrite(intr_handle->uio_cfg_fd, buf, len, offset);
}
static int
pci_uio_set_bus_master(int dev_fd)
{
uint16_t reg;
int ret;
ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL,
"Cannot read command from PCI config space!\n");
return -1;
}
/* return if bus mastering is already on */
if (reg & PCI_COMMAND_MASTER)
return 0;
reg |= PCI_COMMAND_MASTER;
ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL,
"Cannot write command to PCI config space!\n");
return -1;
}
return 0;
}
static int
pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
{
FILE *f;
char filename[PATH_MAX];
int ret;
unsigned major, minor;
dev_t dev;
/* get the name of the sysfs file that contains the major and minor
* of the uio device and read its content */
snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
f = fopen(filename, "r");
if (f == NULL) {
RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
__func__);
return -1;
}
ret = fscanf(f, "%u:%u", &major, &minor);
if (ret != 2) {
RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
__func__);
fclose(f);
return -1;
}
fclose(f);
/* create the char device "mknod /dev/uioX c major minor" */
snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
dev = makedev(major, minor);
ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
if (ret != 0) {
RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
__func__, strerror(errno));
return -1;
}
return ret;
}
/*
* Return the uioX char device used for a pci device. On success, return
* the UIO number and fill dstbuf string with the path of the device in
* sysfs. On error, return a negative value. In this case dstbuf is
* invalid.
*/
static int
pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
unsigned int buflen, int create)
{
struct rte_pci_addr *loc = &dev->addr;
int uio_num = -1;
struct dirent *e;
DIR *dir;
char dirname[PATH_MAX];
/* depending on kernel version, uio can be located in uio/uioX
* or uio:uioX */
snprintf(dirname, sizeof(dirname),
"%s/" PCI_PRI_FMT "/uio", rte_pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);
if (dir == NULL) {
/* retry with the parent directory */
snprintf(dirname, sizeof(dirname),
"%s/" PCI_PRI_FMT, rte_pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);
if (dir == NULL) {
RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
return -1;
}
}
/* take the first file starting with "uio" */
while ((e = readdir(dir)) != NULL) {
/* format could be uio%d ...*/
int shortprefix_len = sizeof("uio") - 1;
/* ... or uio:uio%d */
int longprefix_len = sizeof("uio:uio") - 1;
char *endptr;
if (strncmp(e->d_name, "uio", 3) != 0)
continue;
/* first try uio%d */
errno = 0;
uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
break;
}
/* then try uio:uio%d */
errno = 0;
uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
break;
}
}
closedir(dir);
/* No uio resource found */
if (e == NULL)
return -1;
/* create uio device if we've been asked to */
if (rte_eal_create_uio_dev() && create &&
pci_mknod_uio_dev(dstbuf, uio_num) < 0)
RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
return uio_num;
}
void
pci_uio_free_resource(struct rte_pci_device *dev,
struct mapped_pci_resource *uio_res)
{
rte_free(uio_res);
if (dev->intr_handle.uio_cfg_fd >= 0) {
close(dev->intr_handle.uio_cfg_fd);
dev->intr_handle.uio_cfg_fd = -1;
}
if (dev->intr_handle.fd >= 0) {
close(dev->intr_handle.fd);
dev->intr_handle.fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
}
}
int
pci_uio_alloc_resource(struct rte_pci_device *dev,
struct mapped_pci_resource **uio_res)
{
char dirname[PATH_MAX];
char cfgname[PATH_MAX];
char devname[PATH_MAX]; /* contains the /dev/uioX */
int uio_num;
struct rte_pci_addr *loc;
loc = &dev->addr;
/* find uio resource */
uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
if (uio_num < 0) {
RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
return 1;
}
snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
/* save fd if in primary process */
dev->intr_handle.fd = open(devname, O_RDWR);
if (dev->intr_handle.fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
goto error;
}
snprintf(cfgname, sizeof(cfgname),
"/sys/class/uio/uio%u/device/config", uio_num);
dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
if (dev->intr_handle.uio_cfg_fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
cfgname, strerror(errno));
goto error;
}
if (dev->kdrv == RTE_KDRV_IGB_UIO)
dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
else {
dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
/* set bus master that is not done by uio_pci_generic */
if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
goto error;
}
}
/* allocate the mapping details for secondary processes*/
*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
if (*uio_res == NULL) {
RTE_LOG(ERR, EAL,
"%s(): cannot store uio mmap details\n", __func__);
goto error;
}
strlcpy((*uio_res)->path, devname, sizeof((*uio_res)->path));
memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
return 0;
error:
pci_uio_free_resource(dev, *uio_res);
return -1;
}
int
pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
{
int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
int wc_activate = 0;
if (dev->driver != NULL)
wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
loc = &dev->addr;
maps = uio_res->maps;
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, sizeof(devname), 0);
if (maps[map_idx].path == NULL) {
RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
strerror(errno));
return -1;
}
/*
* open resource file, to mmap it
*/
if (wc_activate) {
/* update devname for mmap */
snprintf(devname, sizeof(devname),
"%s/" PCI_PRI_FMT "/resource%d_wc",
rte_pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid,
loc->function, res_idx);
fd = open(devname, O_RDWR);
if (fd < 0 && errno != ENOENT) {
RTE_LOG(INFO, EAL, "%s cannot be mapped. "
"Fall-back to non prefetchable mode.\n",
devname);
}
}
if (!wc_activate || fd < 0) {
snprintf(devname, sizeof(devname),
"%s/" PCI_PRI_FMT "/resource%d",
rte_pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid,
loc->function, res_idx);
/* then try to map resource file */
fd = open(devname, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
goto error;
}
}
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
mapaddr = pci_map_resource(pci_map_addr, fd, 0,
(size_t)dev->mem_resource[res_idx].len, 0);
close(fd);
if (mapaddr == MAP_FAILED)
goto error;
pci_map_addr = RTE_PTR_ADD(mapaddr,
(size_t)dev->mem_resource[res_idx].len);
pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, sysconf(_SC_PAGE_SIZE));
maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
maps[map_idx].size = dev->mem_resource[res_idx].len;
maps[map_idx].addr = mapaddr;
maps[map_idx].offset = 0;
strcpy(maps[map_idx].path, devname);
dev->mem_resource[res_idx].addr = mapaddr;
return 0;
error:
rte_free(maps[map_idx].path);
return -1;
}
#if defined(RTE_ARCH_X86)
int
pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
char dirname[PATH_MAX];
char filename[PATH_MAX];
int uio_num;
unsigned long start;
if (rte_eal_iopl_init() != 0) {
RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
__func__, dev->name);
return -1;
}
uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
if (uio_num < 0)
return -1;
/* get portio start */
snprintf(filename, sizeof(filename),
"%s/portio/port%d/start", dirname, bar);
if (eal_parse_sysfs_value(filename, &start) < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
__func__);
return -1;
}
/* ensure we don't get anything funny here, read/write will cast to
* uin16_t */
if (start > UINT16_MAX)
return -1;
/* FIXME only for primary process ? */
if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
dev->intr_handle.fd = open(filename, O_RDWR);
if (dev->intr_handle.fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
filename, strerror(errno));
return -1;
}
dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
}
RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
p->base = start;
p->len = 0;
return 0;
}
#else
int
pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
FILE *f;
char buf[BUFSIZ];
char filename[PATH_MAX];
uint64_t phys_addr, end_addr, flags;
int fd, i;
void *addr;
/* open and read addresses of the corresponding resource in sysfs */
snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
dev->addr.devid, dev->addr.function);
f = fopen(filename, "r");
if (f == NULL) {
RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
strerror(errno));
return -1;
}
for (i = 0; i < bar + 1; i++) {
if (fgets(buf, sizeof(buf), f) == NULL) {
RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
goto error;
}
}
if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
&end_addr, &flags) < 0)
goto error;
if ((flags & IORESOURCE_IO) == 0) {
RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
goto error;
}
snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
dev->addr.devid, dev->addr.function, bar);
/* mmap the pci resource */
fd = open(filename, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
strerror(errno));
goto error;
}
addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
close(fd);
if (addr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n",
strerror(errno));
goto error;
}
/* strangely, the base address is mmap addr + phys_addr */
p->base = (uintptr_t)addr + phys_addr;
p->len = end_addr + 1;
RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base);
fclose(f);
return 0;
error:
fclose(f);
return -1;
}
#endif
void
pci_uio_ioport_read(struct rte_pci_ioport *p,
void *data, size_t len, off_t offset)
{
uint8_t *d;
int size;
uintptr_t reg = p->base + offset;
for (d = data; len > 0; d += size, reg += size, len -= size) {
if (len >= 4) {
size = 4;
#if defined(RTE_ARCH_X86)
*(uint32_t *)d = inl(reg);
#else
*(uint32_t *)d = *(volatile uint32_t *)reg;
#endif
} else if (len >= 2) {
size = 2;
#if defined(RTE_ARCH_X86)
*(uint16_t *)d = inw(reg);
#else
*(uint16_t *)d = *(volatile uint16_t *)reg;
#endif
} else {
size = 1;
#if defined(RTE_ARCH_X86)
*d = inb(reg);
#else
*d = *(volatile uint8_t *)reg;
#endif
}
}
}
void
pci_uio_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset)
{
const uint8_t *s;
int size;
uintptr_t reg = p->base + offset;
for (s = data; len > 0; s += size, reg += size, len -= size) {
if (len >= 4) {
size = 4;
#if defined(RTE_ARCH_X86)
outl_p(*(const uint32_t *)s, reg);
#else
*(volatile uint32_t *)reg = *(const uint32_t *)s;
#endif
} else if (len >= 2) {
size = 2;
#if defined(RTE_ARCH_X86)
outw_p(*(const uint16_t *)s, reg);
#else
*(volatile uint16_t *)reg = *(const uint16_t *)s;
#endif
} else {
size = 1;
#if defined(RTE_ARCH_X86)
outb_p(*s, reg);
#else
*(volatile uint8_t *)reg = *s;
#endif
}
}
}
int
pci_uio_ioport_unmap(struct rte_pci_ioport *p)
{
#if defined(RTE_ARCH_X86)
RTE_SET_USED(p);
/* FIXME close intr fd ? */
return 0;
#else
return munmap((void *)(uintptr_t)p->base, p->len);
#endif
}