eal/linux: support running as unprivileged user

For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.

Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
This commit is contained in:
Ben Walker 2017-01-31 10:44:53 -07:00 committed by Thomas Monjalon
parent 7968191b5a
commit cdc242f260
3 changed files with 71 additions and 22 deletions

View File

@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
/**
* Returns true if the system is able to obtain
* physical addresses. Return false if using DMA
* addresses through an IOMMU.
*
* Drivers based on uio will not load unless physical
* addresses are obtainable. It is only possible to get
* physical addresses when running as a privileged user.
*/
bool rte_eal_using_phys_addrs(void);
#endif /* _EAL_PRIVATE_H_ */

View File

@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
static unsigned proc_pagemap_readable;
static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
test_proc_pagemap_readable(void)
test_phys_addrs_available(void)
{
int fd = open("/proc/self/pagemap", O_RDONLY);
uint64_t tmp;
phys_addr_t physaddr;
if (fd < 0) {
physaddr = rte_mem_virt2phy(&tmp);
if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
"Cannot open /proc/self/pagemap: %s. "
"virt2phys address translation will not work\n",
"Cannot obtain physical addresses: %s. "
"Only vfio will function.\n",
strerror(errno));
return;
phys_addrs_available = false;
}
/* Is readable */
close(fd);
proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
if (!proc_pagemap_readable)
if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
if ((page & 0x7fffffffffffffULL) == 0)
return RTE_BAD_PHYS_ADDR;
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i;
unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@ -254,6 +256,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
return 0;
}
/*
* For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
*/
static int
set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned int i;
static phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
hugepg_tbl[i].physaddr = addr;
addr += hugepg_tbl[i].size;
}
return 0;
}
/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
test_proc_pagemap_readable();
test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
continue;
}
/* find physical addresses and sockets for each hugepage */
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
if (phys_addrs_available) {
/* find physical addresses for each hugepage */
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
"for %u MB pages\n",
(unsigned int)(hpi->hugepage_sz / 0x100000));
goto fail;
}
} else {
/* set physical addresses for each hugepage */
if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
"for %u MB pages\n",
(unsigned int)(hpi->hugepage_sz / 0x100000));
goto fail;
}
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
test_proc_pagemap_readable();
test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
bool
rte_eal_using_phys_addrs(void)
{
return phys_addrs_available;
}

View File

@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
/* map resources for devices that use uio */
ret = pci_uio_map_resource(dev);
if (rte_eal_using_phys_addrs()) {
/* map resources for devices that use uio */
ret = pci_uio_map_resource(dev);
}
break;
default:
RTE_LOG(DEBUG, EAL,