mem: share hugepage info primary and secondary

Since we are going to need to map hugepages in both primary and
secondary processes, we need to know where we should look for
hugetlbfs mountpoints. So, share those with secondary processes,
and map them on init.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
This commit is contained in:
Anatoly Burakov 2018-04-11 13:30:33 +01:00 committed by Thomas Monjalon
parent 41519b9006
commit cb97d93e9d
10 changed files with 217 additions and 67 deletions

View File

@ -289,7 +289,7 @@ eal_get_hugepage_mem_size(void)
for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
if (hpi->hugedir != NULL) {
if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
size += hpi->hugepage_sz * hpi->num_pages[j];
}
@ -561,12 +561,17 @@ rte_eal_init(int argc, char **argv)
/* autodetect the iova mapping mode (default is iova_pa) */
rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
if (internal_config.no_hugetlbfs == 0 &&
eal_hugepage_info_init() < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
rte_atomic32_clear(&run_once);
return -1;
if (internal_config.no_hugetlbfs == 0) {
/* rte_config isn't initialized yet */
ret = internal_config.process_type == RTE_PROC_PRIMARY ?
eal_hugepage_info_init() :
eal_hugepage_info_read();
if (ret < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
rte_atomic32_clear(&run_once);
return -1;
}
}
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {

View File

@ -19,10 +19,10 @@
* Used in this file to store the hugepage file map on disk
*/
static void *
create_shared_memory(const char *filename, const size_t mem_size)
map_shared_memory(const char *filename, const size_t mem_size, int flags)
{
void *retval;
int fd = open(filename, O_CREAT | O_RDWR, 0666);
int fd = open(filename, flags, 0666);
if (fd < 0)
return NULL;
if (ftruncate(fd, mem_size) < 0) {
@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size)
return retval;
}
static void *
open_shared_memory(const char *filename, const size_t mem_size)
{
return map_shared_memory(filename, mem_size, O_RDWR);
}
static void *
create_shared_memory(const char *filename, const size_t mem_size)
{
return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
}
/*
* No hugepage support on freebsd, but we dummy it, using contigmem driver
*/
@ -46,13 +58,10 @@ eal_hugepage_info_init(void)
/* re-use the linux "internal config" structure for our memory data */
struct hugepage_info *hpi = &internal_config.hugepage_info[0];
struct hugepage_info *tmp_hpi;
unsigned int i;
internal_config.num_hugepage_sizes = 1;
/* nothing more to be done for secondary */
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
return 0;
sysctl_size = sizeof(num_buffers);
error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
&sysctl_size, NULL, 0);
@ -87,7 +96,7 @@ eal_hugepage_info_init(void)
RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
num_buffers, (int)(buffer_size>>10));
hpi->hugedir = CONTIGMEM_DEV;
snprintf(hpi->hugedir, sizeof(hpi->hugedir), "%s", CONTIGMEM_DEV);
hpi->hugepage_sz = buffer_size;
hpi->num_pages[0] = num_buffers;
hpi->lock_descriptor = fd;
@ -101,6 +110,14 @@ eal_hugepage_info_init(void)
memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
/* we've copied file descriptors along with everything else, but they
* will be invalid in secondary process, so overwrite them
*/
for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
struct hugepage_info *tmp = &tmp_hpi[i];
tmp->lock_descriptor = -1;
}
if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
@ -108,3 +125,28 @@ eal_hugepage_info_init(void)
return 0;
}
/* copy stuff from shared info into internal config */
int
eal_hugepage_info_read(void)
{
struct hugepage_info *hpi = &internal_config.hugepage_info[0];
struct hugepage_info *tmp_hpi;
internal_config.num_hugepage_sizes = 1;
tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
sizeof(internal_config.hugepage_info));
if (tmp_hpi == NULL) {
RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
return -1;
}
memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
}
return 0;
}

View File

@ -242,23 +242,10 @@ int
rte_eal_hugepage_attach(void)
{
const struct hugepage_info *hpi;
int fd_hugepage_info, fd_hugepage = -1;
int fd_hugepage = -1;
unsigned int i;
/* Obtain a file descriptor for hugepage_info */
fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY);
if (fd_hugepage_info < 0) {
RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
return -1;
}
/* Map the shared hugepage_info into the process address spaces */
hpi = mmap(NULL, sizeof(internal_config.hugepage_info),
PROT_READ, MAP_PRIVATE, fd_hugepage_info, 0);
if (hpi == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
goto error;
}
hpi = &internal_config.hugepage_info[0];
for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
const struct hugepage_info *cur_hpi = &hpi[i];
@ -288,13 +275,9 @@ rte_eal_hugepage_attach(void)
}
/* hugepage_info is no longer required */
munmap((void *)(uintptr_t)hpi, sizeof(internal_config.hugepage_info));
close(fd_hugepage_info);
return 0;
error:
if (fd_hugepage_info >= 0)
close(fd_hugepage_info);
if (fd_hugepage >= 0)
close(fd_hugepage);
return -1;

View File

@ -179,8 +179,11 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_cfg->socket_mem[i] = 0;
/* zero out hugedir descriptors */
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
memset(&internal_cfg->hugepage_info[i], 0,
sizeof(internal_cfg->hugepage_info[0]));
internal_cfg->hugepage_info[i].lock_descriptor = -1;
}
internal_cfg->base_virtaddr = 0;
internal_cfg->syslog_facility = LOG_DAEMON;

View File

@ -85,6 +85,23 @@ eal_hugepage_info_path(void)
return buffer;
}
/** Path of hugepage info file. */
#define HUGEPAGE_FILE_FMT "%s/.%s_hugepage_file"
static inline const char *
eal_hugepage_file_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
const char *directory = default_config_dir;
const char *home_dir = getenv("HOME");
if (getuid() != 0 && home_dir != NULL)
directory = home_dir;
snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_FILE_FMT, directory,
internal_config.hugefile_prefix);
return buffer;
}
/** String format for hugepage map files. */
#define HUGEFILE_FMT "%s/%smap_%d"
#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"

View File

@ -26,9 +26,15 @@ struct hugepage_file {
};
/**
* Read the information from linux on what hugepages are available
* for the EAL to use
* Read the information on what hugepages are available for the EAL to use,
* clearing out any unused ones.
*/
int eal_hugepage_info_init(void);
/**
* Read whatever information primary process has shared about hugepages into
* secondary process.
*/
int eal_hugepage_info_read(void);
#endif /* EAL_HUGEPAGES_H */

View File

@ -21,7 +21,7 @@
*/
struct hugepage_info {
uint64_t hugepage_sz; /**< size of a huge page */
const char *hugedir; /**< dir where hugetlbfs is mounted */
char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */
uint32_t num_pages[RTE_MAX_NUMA_NODES];
/**< number of hugepages of that size on each socket */
int lock_descriptor; /**< file descriptor for hugepage dir */

View File

@ -807,13 +807,17 @@ rte_eal_init(int argc, char **argv)
"KNI module inserted\n");
}
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
eal_hugepage_info_init() < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
rte_atomic32_clear(&run_once);
return -1;
if (internal_config.no_hugetlbfs == 0) {
/* rte_config isn't initialized yet */
ret = internal_config.process_type == RTE_PROC_PRIMARY ?
eal_hugepage_info_init() :
eal_hugepage_info_read();
if (ret < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
rte_atomic32_clear(&run_once);
return -1;
}
}
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {

View File

@ -14,6 +14,7 @@
#include <stdarg.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/queue.h>
#include <sys/stat.h>
@ -33,6 +34,39 @@
static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node";
/*
* Uses mmap to create a shared memory area for storage of data
* Used in this file to store the hugepage file map on disk
*/
static void *
map_shared_memory(const char *filename, const size_t mem_size, int flags)
{
void *retval;
int fd = open(filename, flags, 0666);
if (fd < 0)
return NULL;
if (ftruncate(fd, mem_size) < 0) {
close(fd);
return NULL;
}
retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
close(fd);
return retval;
}
static void *
open_shared_memory(const char *filename, const size_t mem_size)
{
return map_shared_memory(filename, mem_size, O_RDWR);
}
static void *
create_shared_memory(const char *filename, const size_t mem_size)
{
return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
}
/* this function is only called from eal_hugepage_info_init which itself
* is only called from a primary process */
static uint32_t
@ -299,15 +333,9 @@ compare_hpi(const void *a, const void *b)
return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
}
/*
* when we initialize the hugepage info, everything goes
* to socket 0 by default. it will later get sorted by memory
* initialization procedure.
*/
int
eal_hugepage_info_init(void)
{
const char dirent_start_text[] = "hugepages-";
static int
hugepage_info_init(void)
{ const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
unsigned int i, total_pages, num_sizes = 0;
DIR *dir;
@ -323,6 +351,7 @@ eal_hugepage_info_init(void)
for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
struct hugepage_info *hpi;
const char *hugedir;
if (strncmp(dirent->d_name, dirent_start_text,
dirent_start_len) != 0)
@ -334,10 +363,10 @@ eal_hugepage_info_init(void)
hpi = &internal_config.hugepage_info[num_sizes];
hpi->hugepage_sz =
rte_str_to_size(&dirent->d_name[dirent_start_len]);
hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
hugedir = get_hugepage_dir(hpi->hugepage_sz);
/* first, check if we have a mountpoint */
if (hpi->hugedir == NULL) {
if (hugedir == NULL) {
uint32_t num_pages;
num_pages = get_num_hugepages(dirent->d_name);
@ -349,6 +378,7 @@ eal_hugepage_info_init(void)
num_pages, hpi->hugepage_sz);
continue;
}
snprintf(hpi->hugedir, sizeof(hpi->hugedir), "%s", hugedir);
/* try to obtain a writelock */
hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
@ -411,13 +441,11 @@ eal_hugepage_info_init(void)
for (i = 0; i < num_sizes; i++) {
/* pages may no longer all be on socket 0, so check all */
unsigned int j, num_pages = 0;
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
struct hugepage_info *hpi =
&internal_config.hugepage_info[i];
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
num_pages += hpi->num_pages[j];
}
if (internal_config.hugepage_info[i].hugedir != NULL &&
if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0 &&
num_pages > 0)
return 0;
}
@ -425,3 +453,64 @@ eal_hugepage_info_init(void)
/* no valid hugepage mounts available, return error */
return -1;
}
/*
* when we initialize the hugepage info, everything goes
* to socket 0 by default. it will later get sorted by memory
* initialization procedure.
*/
int
eal_hugepage_info_init(void)
{
struct hugepage_info *hpi, *tmp_hpi;
unsigned int i;
if (hugepage_info_init() < 0)
return -1;
hpi = &internal_config.hugepage_info[0];
tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
sizeof(internal_config.hugepage_info));
if (tmp_hpi == NULL) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
return -1;
}
memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
/* we've copied file descriptors along with everything else, but they
* will be invalid in secondary process, so overwrite them
*/
for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
struct hugepage_info *tmp = &tmp_hpi[i];
tmp->lock_descriptor = -1;
}
if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
}
return 0;
}
int eal_hugepage_info_read(void)
{
struct hugepage_info *hpi = &internal_config.hugepage_info[0];
struct hugepage_info *tmp_hpi;
tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
sizeof(internal_config.hugepage_info));
if (tmp_hpi == NULL) {
RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
return -1;
}
memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
}
return 0;
}

View File

@ -1060,7 +1060,7 @@ get_socket_mem_size(int socket)
for (i = 0; i < internal_config.num_hugepage_sizes; i++){
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
if (hpi->hugedir != NULL)
if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0)
size += hpi->hugepage_sz * hpi->num_pages[socket];
}
@ -1160,7 +1160,8 @@ calc_num_pages_per_socket(uint64_t * memory,
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
/* skips if the memory on specific socket wasn't requested */
for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
hp_used[i].hugedir = hp_info[i].hugedir;
snprintf(hp_used[i].hugedir, sizeof(hp_used[i].hugedir),
"%s", hp_info[i].hugedir);
hp_used[i].num_pages[socket] = RTE_MIN(
memory[socket] / hp_info[i].hugepage_sz,
hp_info[i].num_pages[socket]);
@ -1235,7 +1236,7 @@ eal_get_hugepage_mem_size(void)
for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
if (hpi->hugedir != NULL) {
if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
size += hpi->hugepage_sz * hpi->num_pages[j];
}
@ -1509,7 +1510,7 @@ eal_legacy_hugepage_init(void)
}
/* create shared memory */
hugepage = create_shared_memory(eal_hugepage_info_path(),
hugepage = create_shared_memory(eal_hugepage_file_path(),
nr_hugefiles * sizeof(struct hugepage_file));
if (hugepage == NULL) {
@ -1694,16 +1695,16 @@ eal_legacy_hugepage_attach(void)
test_phys_addrs_available();
fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
fd_hugepage = open(eal_hugepage_file_path(), O_RDONLY);
if (fd_hugepage < 0) {
RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_file_path());
goto error;
}
size = getFileSize(fd_hugepage);
hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
if (hp == MAP_FAILED) {
RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_file_path());
goto error;
}