mem: extract common dynamic memory allocation
Code in Linux EAL that supports dynamic memory allocation (as opposed to static allocation used by FreeBSD) is not OS-dependent and can be reused by Windows EAL. Move such code to a file compiled only for the OS that require it. Keep Anatoly Burakov maintainer of extracted code. Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
This commit is contained in:
parent
83713ef276
commit
694161b7e0
@ -209,6 +209,7 @@ F: lib/librte_eal/include/rte_fbarray.h
|
||||
F: lib/librte_eal/include/rte_mem*
|
||||
F: lib/librte_eal/include/rte_malloc.h
|
||||
F: lib/librte_eal/common/*malloc*
|
||||
F: lib/librte_eal/common/eal_common_dynmem.c
|
||||
F: lib/librte_eal/common/eal_common_fbarray.c
|
||||
F: lib/librte_eal/common/eal_common_mem*
|
||||
F: lib/librte_eal/common/eal_hugepages.h
|
||||
|
521
lib/librte_eal/common/eal_common_dynmem.c
Normal file
521
lib/librte_eal/common/eal_common_dynmem.c
Normal file
@ -0,0 +1,521 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2010-2014 Intel Corporation.
|
||||
* Copyright(c) 2013 6WIND S.A.
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <rte_log.h>
|
||||
#include <rte_string_fns.h>
|
||||
|
||||
#include "eal_internal_cfg.h"
|
||||
#include "eal_memalloc.h"
|
||||
#include "eal_memcfg.h"
|
||||
#include "eal_private.h"
|
||||
|
||||
/** @file Functions common to EALs that support dynamic memory allocation. */
|
||||
|
||||
int
|
||||
eal_dynmem_memseg_lists_init(void)
|
||||
{
|
||||
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
|
||||
struct memtype {
|
||||
uint64_t page_sz;
|
||||
int socket_id;
|
||||
} *memtypes = NULL;
|
||||
int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
|
||||
struct rte_memseg_list *msl;
|
||||
uint64_t max_mem, max_mem_per_type;
|
||||
unsigned int max_seglists_per_type;
|
||||
unsigned int n_memtypes, cur_type;
|
||||
|
||||
/* no-huge does not need this at all */
|
||||
if (internal_config.no_hugetlbfs)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* figuring out amount of memory we're going to have is a long and very
|
||||
* involved process. the basic element we're operating with is a memory
|
||||
* type, defined as a combination of NUMA node ID and page size (so that
|
||||
* e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
|
||||
*
|
||||
* deciding amount of memory going towards each memory type is a
|
||||
* balancing act between maximum segments per type, maximum memory per
|
||||
* type, and number of detected NUMA nodes. the goal is to make sure
|
||||
* each memory type gets at least one memseg list.
|
||||
*
|
||||
* the total amount of memory is limited by RTE_MAX_MEM_MB value.
|
||||
*
|
||||
* the total amount of memory per type is limited by either
|
||||
* RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
|
||||
* of detected NUMA nodes. additionally, maximum number of segments per
|
||||
* type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
|
||||
* smaller page sizes, it can take hundreds of thousands of segments to
|
||||
* reach the above specified per-type memory limits.
|
||||
*
|
||||
* additionally, each type may have multiple memseg lists associated
|
||||
* with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
|
||||
* page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
|
||||
*
|
||||
* the number of memseg lists per type is decided based on the above
|
||||
* limits, and also taking number of detected NUMA nodes, to make sure
|
||||
* that we don't run out of memseg lists before we populate all NUMA
|
||||
* nodes with memory.
|
||||
*
|
||||
* we do this in three stages. first, we collect the number of types.
|
||||
* then, we figure out memory constraints and populate the list of
|
||||
* would-be memseg lists. then, we go ahead and allocate the memseg
|
||||
* lists.
|
||||
*/
|
||||
|
||||
/* create space for mem types */
|
||||
n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
|
||||
memtypes = calloc(n_memtypes, sizeof(*memtypes));
|
||||
if (memtypes == NULL) {
|
||||
RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* populate mem types */
|
||||
cur_type = 0;
|
||||
for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
|
||||
hpi_idx++) {
|
||||
struct hugepage_info *hpi;
|
||||
uint64_t hugepage_sz;
|
||||
|
||||
hpi = &internal_config.hugepage_info[hpi_idx];
|
||||
hugepage_sz = hpi->hugepage_sz;
|
||||
|
||||
for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
|
||||
int socket_id = rte_socket_id_by_idx(i);
|
||||
|
||||
#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
|
||||
/* we can still sort pages by socket in legacy mode */
|
||||
if (!internal_config.legacy_mem && socket_id > 0)
|
||||
break;
|
||||
#endif
|
||||
memtypes[cur_type].page_sz = hugepage_sz;
|
||||
memtypes[cur_type].socket_id = socket_id;
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Detected memory type: "
|
||||
"socket_id:%u hugepage_sz:%" PRIu64 "\n",
|
||||
socket_id, hugepage_sz);
|
||||
}
|
||||
}
|
||||
/* number of memtypes could have been lower due to no NUMA support */
|
||||
n_memtypes = cur_type;
|
||||
|
||||
/* set up limits for types */
|
||||
max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
|
||||
max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
|
||||
max_mem / n_memtypes);
|
||||
/*
|
||||
* limit maximum number of segment lists per type to ensure there's
|
||||
* space for memseg lists for all NUMA nodes with all page sizes
|
||||
*/
|
||||
max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
|
||||
|
||||
if (max_seglists_per_type == 0) {
|
||||
RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
|
||||
RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* go through all mem types and create segment lists */
|
||||
msl_idx = 0;
|
||||
for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
|
||||
unsigned int cur_seglist, n_seglists, n_segs;
|
||||
unsigned int max_segs_per_type, max_segs_per_list;
|
||||
struct memtype *type = &memtypes[cur_type];
|
||||
uint64_t max_mem_per_list, pagesz;
|
||||
int socket_id;
|
||||
|
||||
pagesz = type->page_sz;
|
||||
socket_id = type->socket_id;
|
||||
|
||||
/*
|
||||
* we need to create segment lists for this type. we must take
|
||||
* into account the following things:
|
||||
*
|
||||
* 1. total amount of memory we can use for this memory type
|
||||
* 2. total amount of memory per memseg list allowed
|
||||
* 3. number of segments needed to fit the amount of memory
|
||||
* 4. number of segments allowed per type
|
||||
* 5. number of segments allowed per memseg list
|
||||
* 6. number of memseg lists we are allowed to take up
|
||||
*/
|
||||
|
||||
/* calculate how much segments we will need in total */
|
||||
max_segs_per_type = max_mem_per_type / pagesz;
|
||||
/* limit number of segments to maximum allowed per type */
|
||||
max_segs_per_type = RTE_MIN(max_segs_per_type,
|
||||
(unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
|
||||
/* limit number of segments to maximum allowed per list */
|
||||
max_segs_per_list = RTE_MIN(max_segs_per_type,
|
||||
(unsigned int)RTE_MAX_MEMSEG_PER_LIST);
|
||||
|
||||
/* calculate how much memory we can have per segment list */
|
||||
max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
|
||||
(uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
|
||||
|
||||
/* calculate how many segments each segment list will have */
|
||||
n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
|
||||
|
||||
/* calculate how many segment lists we can have */
|
||||
n_seglists = RTE_MIN(max_segs_per_type / n_segs,
|
||||
max_mem_per_type / max_mem_per_list);
|
||||
|
||||
/* limit number of segment lists according to our maximum */
|
||||
n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
|
||||
"n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
|
||||
n_seglists, n_segs, socket_id, pagesz);
|
||||
|
||||
/* create all segment lists */
|
||||
for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
|
||||
if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
|
||||
RTE_LOG(ERR, EAL,
|
||||
"No more space in memseg lists, please increase %s\n",
|
||||
RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
|
||||
goto out;
|
||||
}
|
||||
msl = &mcfg->memsegs[msl_idx++];
|
||||
|
||||
if (eal_memseg_list_init(msl, pagesz, n_segs,
|
||||
socket_id, cur_seglist, true))
|
||||
goto out;
|
||||
|
||||
if (eal_memseg_list_alloc(msl, 0)) {
|
||||
RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* we're successful */
|
||||
ret = 0;
|
||||
out:
|
||||
free(memtypes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __rte_unused
|
||||
hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
|
||||
{
|
||||
struct hugepage_info *hpi = arg;
|
||||
|
||||
if (msl->page_sz != hpi->hugepage_sz)
|
||||
return 0;
|
||||
|
||||
hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
limits_callback(int socket_id, size_t cur_limit, size_t new_len)
|
||||
{
|
||||
RTE_SET_USED(socket_id);
|
||||
RTE_SET_USED(cur_limit);
|
||||
RTE_SET_USED(new_len);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
eal_dynmem_hugepage_init(void)
|
||||
{
|
||||
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
|
||||
uint64_t memory[RTE_MAX_NUMA_NODES];
|
||||
int hp_sz_idx, socket_id;
|
||||
|
||||
memset(used_hp, 0, sizeof(used_hp));
|
||||
|
||||
for (hp_sz_idx = 0;
|
||||
hp_sz_idx < (int) internal_config.num_hugepage_sizes;
|
||||
hp_sz_idx++) {
|
||||
#ifndef RTE_ARCH_64
|
||||
struct hugepage_info dummy;
|
||||
unsigned int i;
|
||||
#endif
|
||||
/* also initialize used_hp hugepage sizes in used_hp */
|
||||
struct hugepage_info *hpi;
|
||||
hpi = &internal_config.hugepage_info[hp_sz_idx];
|
||||
used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
|
||||
|
||||
#ifndef RTE_ARCH_64
|
||||
/* for 32-bit, limit number of pages on socket to whatever we've
|
||||
* preallocated, as we cannot allocate more.
|
||||
*/
|
||||
memset(&dummy, 0, sizeof(dummy));
|
||||
dummy.hugepage_sz = hpi->hugepage_sz;
|
||||
if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
|
||||
hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
|
||||
dummy.num_pages[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* make a copy of socket_mem, needed for balanced allocation. */
|
||||
for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
|
||||
memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
|
||||
|
||||
/* calculate final number of pages */
|
||||
if (eal_dynmem_calc_num_pages_per_socket(memory,
|
||||
internal_config.hugepage_info, used_hp,
|
||||
internal_config.num_hugepage_sizes) < 0)
|
||||
return -1;
|
||||
|
||||
for (hp_sz_idx = 0;
|
||||
hp_sz_idx < (int)internal_config.num_hugepage_sizes;
|
||||
hp_sz_idx++) {
|
||||
for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
|
||||
socket_id++) {
|
||||
struct rte_memseg **pages;
|
||||
struct hugepage_info *hpi = &used_hp[hp_sz_idx];
|
||||
unsigned int num_pages = hpi->num_pages[socket_id];
|
||||
unsigned int num_pages_alloc;
|
||||
|
||||
if (num_pages == 0)
|
||||
continue;
|
||||
|
||||
RTE_LOG(DEBUG, EAL,
|
||||
"Allocating %u pages of size %" PRIu64 "M "
|
||||
"on socket %i\n",
|
||||
num_pages, hpi->hugepage_sz >> 20, socket_id);
|
||||
|
||||
/* we may not be able to allocate all pages in one go,
|
||||
* because we break up our memory map into multiple
|
||||
* memseg lists. therefore, try allocating multiple
|
||||
* times and see if we can get the desired number of
|
||||
* pages from multiple allocations.
|
||||
*/
|
||||
|
||||
num_pages_alloc = 0;
|
||||
do {
|
||||
int i, cur_pages, needed;
|
||||
|
||||
needed = num_pages - num_pages_alloc;
|
||||
|
||||
pages = malloc(sizeof(*pages) * needed);
|
||||
|
||||
/* do not request exact number of pages */
|
||||
cur_pages = eal_memalloc_alloc_seg_bulk(pages,
|
||||
needed, hpi->hugepage_sz,
|
||||
socket_id, false);
|
||||
if (cur_pages <= 0) {
|
||||
free(pages);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* mark preallocated pages as unfreeable */
|
||||
for (i = 0; i < cur_pages; i++) {
|
||||
struct rte_memseg *ms = pages[i];
|
||||
ms->flags |=
|
||||
RTE_MEMSEG_FLAG_DO_NOT_FREE;
|
||||
}
|
||||
free(pages);
|
||||
|
||||
num_pages_alloc += cur_pages;
|
||||
} while (num_pages_alloc != num_pages);
|
||||
}
|
||||
}
|
||||
|
||||
/* if socket limits were specified, set them */
|
||||
if (internal_config.force_socket_limits) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
|
||||
uint64_t limit = internal_config.socket_limit[i];
|
||||
if (limit == 0)
|
||||
continue;
|
||||
if (rte_mem_alloc_validator_register("socket-limit",
|
||||
limits_callback, i, limit))
|
||||
RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
__rte_unused /* function is unused on 32-bit builds */
|
||||
static inline uint64_t
|
||||
get_socket_mem_size(int socket)
|
||||
{
|
||||
uint64_t size = 0;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
|
||||
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
|
||||
size += hpi->hugepage_sz * hpi->num_pages[socket];
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
eal_dynmem_calc_num_pages_per_socket(
|
||||
uint64_t *memory, struct hugepage_info *hp_info,
|
||||
struct hugepage_info *hp_used, unsigned int num_hp_info)
|
||||
{
|
||||
unsigned int socket, j, i = 0;
|
||||
unsigned int requested, available;
|
||||
int total_num_pages = 0;
|
||||
uint64_t remaining_mem, cur_mem;
|
||||
uint64_t total_mem = internal_config.memory;
|
||||
|
||||
if (num_hp_info == 0)
|
||||
return -1;
|
||||
|
||||
/* if specific memory amounts per socket weren't requested */
|
||||
if (internal_config.force_sockets == 0) {
|
||||
size_t total_size;
|
||||
#ifdef RTE_ARCH_64
|
||||
int cpu_per_socket[RTE_MAX_NUMA_NODES];
|
||||
size_t default_size;
|
||||
unsigned int lcore_id;
|
||||
|
||||
/* Compute number of cores per socket */
|
||||
memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
|
||||
RTE_LCORE_FOREACH(lcore_id) {
|
||||
cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Automatically spread requested memory amongst detected
|
||||
* sockets according to number of cores from CPU mask present
|
||||
* on each socket.
|
||||
*/
|
||||
total_size = internal_config.memory;
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
|
||||
socket++) {
|
||||
|
||||
/* Set memory amount per socket */
|
||||
default_size = internal_config.memory *
|
||||
cpu_per_socket[socket] / rte_lcore_count();
|
||||
|
||||
/* Limit to maximum available memory on socket */
|
||||
default_size = RTE_MIN(
|
||||
default_size, get_socket_mem_size(socket));
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] = default_size;
|
||||
total_size -= default_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If some memory is remaining, try to allocate it by getting
|
||||
* all available memory from sockets, one after the other.
|
||||
*/
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
|
||||
socket++) {
|
||||
/* take whatever is available */
|
||||
default_size = RTE_MIN(
|
||||
get_socket_mem_size(socket) - memory[socket],
|
||||
total_size);
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] += default_size;
|
||||
total_size -= default_size;
|
||||
}
|
||||
#else
|
||||
/* in 32-bit mode, allocate all of the memory only on master
|
||||
* lcore socket
|
||||
*/
|
||||
total_size = internal_config.memory;
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
|
||||
socket++) {
|
||||
struct rte_config *cfg = rte_eal_get_configuration();
|
||||
unsigned int master_lcore_socket;
|
||||
|
||||
master_lcore_socket =
|
||||
rte_lcore_to_socket_id(cfg->master_lcore);
|
||||
|
||||
if (master_lcore_socket != socket)
|
||||
continue;
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] = total_size;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0;
|
||||
socket++) {
|
||||
/* skips if the memory on specific socket wasn't requested */
|
||||
for (i = 0; i < num_hp_info && memory[socket] != 0; i++) {
|
||||
rte_strscpy(hp_used[i].hugedir, hp_info[i].hugedir,
|
||||
sizeof(hp_used[i].hugedir));
|
||||
hp_used[i].num_pages[socket] = RTE_MIN(
|
||||
memory[socket] / hp_info[i].hugepage_sz,
|
||||
hp_info[i].num_pages[socket]);
|
||||
|
||||
cur_mem = hp_used[i].num_pages[socket] *
|
||||
hp_used[i].hugepage_sz;
|
||||
|
||||
memory[socket] -= cur_mem;
|
||||
total_mem -= cur_mem;
|
||||
|
||||
total_num_pages += hp_used[i].num_pages[socket];
|
||||
|
||||
/* check if we have met all memory requests */
|
||||
if (memory[socket] == 0)
|
||||
break;
|
||||
|
||||
/* Check if we have any more pages left at this size,
|
||||
* if so, move on to next size.
|
||||
*/
|
||||
if (hp_used[i].num_pages[socket] ==
|
||||
hp_info[i].num_pages[socket])
|
||||
continue;
|
||||
/* At this point we know that there are more pages
|
||||
* available that are bigger than the memory we want,
|
||||
* so lets see if we can get enough from other page
|
||||
* sizes.
|
||||
*/
|
||||
remaining_mem = 0;
|
||||
for (j = i+1; j < num_hp_info; j++)
|
||||
remaining_mem += hp_info[j].hugepage_sz *
|
||||
hp_info[j].num_pages[socket];
|
||||
|
||||
/* Is there enough other memory?
|
||||
* If not, allocate another page and quit.
|
||||
*/
|
||||
if (remaining_mem < memory[socket]) {
|
||||
cur_mem = RTE_MIN(
|
||||
memory[socket], hp_info[i].hugepage_sz);
|
||||
memory[socket] -= cur_mem;
|
||||
total_mem -= cur_mem;
|
||||
hp_used[i].num_pages[socket]++;
|
||||
total_num_pages++;
|
||||
break; /* we are done with this socket*/
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't satisfy all memory requirements per socket */
|
||||
if (memory[socket] > 0 &&
|
||||
internal_config.socket_mem[socket] != 0) {
|
||||
/* to prevent icc errors */
|
||||
requested = (unsigned int)(
|
||||
internal_config.socket_mem[socket] / 0x100000);
|
||||
available = requested -
|
||||
((unsigned int)(memory[socket] / 0x100000));
|
||||
RTE_LOG(ERR, EAL, "Not enough memory available on "
|
||||
"socket %u! Requested: %uMB, available: %uMB\n",
|
||||
socket, requested, available);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't satisfy total memory requirements */
|
||||
if (total_mem > 0) {
|
||||
requested = (unsigned int)(internal_config.memory / 0x100000);
|
||||
available = requested - (unsigned int)(total_mem / 0x100000);
|
||||
RTE_LOG(ERR, EAL, "Not enough memory available! "
|
||||
"Requested: %uMB, available: %uMB\n",
|
||||
requested, available);
|
||||
return -1;
|
||||
}
|
||||
return total_num_pages;
|
||||
}
|
@ -13,6 +13,8 @@
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_memory.h>
|
||||
|
||||
#include "eal_internal_cfg.h"
|
||||
|
||||
/**
|
||||
* Structure storing internal configuration (per-lcore)
|
||||
*/
|
||||
@ -316,6 +318,45 @@ eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags);
|
||||
void
|
||||
eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs);
|
||||
|
||||
/**
|
||||
* Distribute available memory between MSLs.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, (-1) on failure.
|
||||
*/
|
||||
int
|
||||
eal_dynmem_memseg_lists_init(void);
|
||||
|
||||
/**
|
||||
* Preallocate hugepages for dynamic allocation.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, (-1) on failure.
|
||||
*/
|
||||
int
|
||||
eal_dynmem_hugepage_init(void);
|
||||
|
||||
/**
|
||||
* Given the list of hugepage sizes and the number of pages thereof,
|
||||
* calculate the best number of pages of each size to fulfill the request
|
||||
* for RAM on each NUMA node.
|
||||
*
|
||||
* @param memory
|
||||
* Amounts of memory requested for each NUMA node of RTE_MAX_NUMA_NODES.
|
||||
* @param hp_info
|
||||
* Information about hugepages of different size.
|
||||
* @param hp_used
|
||||
* Receives information about used hugepages of each size.
|
||||
* @param num_hp_info
|
||||
* Number of elements in hp_info and hp_used.
|
||||
* @return
|
||||
* 0 on success, (-1) on failure.
|
||||
*/
|
||||
int
|
||||
eal_dynmem_calc_num_pages_per_socket(
|
||||
uint64_t *memory, struct hugepage_info *hp_info,
|
||||
struct hugepage_info *hp_used, unsigned int num_hp_info);
|
||||
|
||||
/**
|
||||
* Get cpu core_id.
|
||||
*
|
||||
@ -595,7 +636,7 @@ void *
|
||||
eal_mem_reserve(void *requested_addr, size_t size, int flags);
|
||||
|
||||
/**
|
||||
* Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
|
||||
* Free memory obtained by eal_mem_reserve() and possibly allocated.
|
||||
*
|
||||
* If *virt* and *size* describe a part of the reserved region,
|
||||
* only this part of the region is freed (accurately up to the system
|
||||
|
@ -56,3 +56,7 @@ sources += files(
|
||||
'rte_reciprocal.c',
|
||||
'rte_service.c',
|
||||
)
|
||||
|
||||
if is_linux
|
||||
sources += files('eal_common_dynmem.c')
|
||||
endif
|
||||
|
@ -315,14 +315,6 @@ get_mem_amount(uint64_t page_sz, uint64_t max_mem)
|
||||
return RTE_ALIGN(area_sz, page_sz);
|
||||
}
|
||||
|
||||
static int
|
||||
memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
|
||||
int n_segs, int socket_id, int type_msl_idx)
|
||||
{
|
||||
return eal_memseg_list_init(
|
||||
msl, page_sz, n_segs, socket_id, type_msl_idx, false);
|
||||
}
|
||||
|
||||
static int
|
||||
memseg_list_alloc(struct rte_memseg_list *msl)
|
||||
{
|
||||
@ -419,8 +411,8 @@ memseg_primary_init(void)
|
||||
cur_max_mem);
|
||||
n_segs = cur_mem / hugepage_sz;
|
||||
|
||||
if (memseg_list_init(msl, hugepage_sz, n_segs,
|
||||
0, type_msl_idx))
|
||||
if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
|
||||
0, type_msl_idx, false))
|
||||
return -1;
|
||||
|
||||
total_segs += msl->memseg_arr.len;
|
||||
|
@ -50,6 +50,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_timer.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memzone.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_log.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_launch.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_dynmem.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_mcfg.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memalloc.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memory.c
|
||||
|
@ -812,20 +812,6 @@ memseg_list_free(struct rte_memseg_list *msl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
|
||||
int n_segs, int socket_id, int type_msl_idx)
|
||||
{
|
||||
return eal_memseg_list_init(
|
||||
msl, page_sz, n_segs, socket_id, type_msl_idx, true);
|
||||
}
|
||||
|
||||
static int
|
||||
memseg_list_alloc(struct rte_memseg_list *msl)
|
||||
{
|
||||
return eal_memseg_list_alloc(msl, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Our VA space is not preallocated yet, so preallocate it here. We need to know
|
||||
* how many segments there are in order to map all pages into one address space,
|
||||
@ -969,12 +955,12 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
|
||||
}
|
||||
|
||||
/* now, allocate fbarray itself */
|
||||
if (memseg_list_init(msl, page_sz, n_segs, socket,
|
||||
msl_idx) < 0)
|
||||
if (eal_memseg_list_init(msl, page_sz, n_segs,
|
||||
socket, msl_idx, true) < 0)
|
||||
return -1;
|
||||
|
||||
/* finally, allocate VA space */
|
||||
if (memseg_list_alloc(msl) < 0) {
|
||||
if (eal_memseg_list_alloc(msl, 0) < 0) {
|
||||
RTE_LOG(ERR, EAL, "Cannot preallocate 0x%"PRIx64"kB hugepages\n",
|
||||
page_sz >> 10);
|
||||
return -1;
|
||||
@ -1048,182 +1034,6 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
|
||||
return 0;
|
||||
}
|
||||
|
||||
__rte_unused /* function is unused on 32-bit builds */
|
||||
static inline uint64_t
|
||||
get_socket_mem_size(int socket)
|
||||
{
|
||||
uint64_t size = 0;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < internal_config.num_hugepage_sizes; i++){
|
||||
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
|
||||
size += hpi->hugepage_sz * hpi->num_pages[socket];
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is a NUMA-aware equivalent of calc_num_pages.
|
||||
* It takes in the list of hugepage sizes and the
|
||||
* number of pages thereof, and calculates the best number of
|
||||
* pages of each size to fulfill the request for <memory> ram
|
||||
*/
|
||||
static int
|
||||
calc_num_pages_per_socket(uint64_t * memory,
|
||||
struct hugepage_info *hp_info,
|
||||
struct hugepage_info *hp_used,
|
||||
unsigned num_hp_info)
|
||||
{
|
||||
unsigned socket, j, i = 0;
|
||||
unsigned requested, available;
|
||||
int total_num_pages = 0;
|
||||
uint64_t remaining_mem, cur_mem;
|
||||
uint64_t total_mem = internal_config.memory;
|
||||
|
||||
if (num_hp_info == 0)
|
||||
return -1;
|
||||
|
||||
/* if specific memory amounts per socket weren't requested */
|
||||
if (internal_config.force_sockets == 0) {
|
||||
size_t total_size;
|
||||
#ifdef RTE_ARCH_64
|
||||
int cpu_per_socket[RTE_MAX_NUMA_NODES];
|
||||
size_t default_size;
|
||||
unsigned lcore_id;
|
||||
|
||||
/* Compute number of cores per socket */
|
||||
memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
|
||||
RTE_LCORE_FOREACH(lcore_id) {
|
||||
cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Automatically spread requested memory amongst detected sockets according
|
||||
* to number of cores from cpu mask present on each socket
|
||||
*/
|
||||
total_size = internal_config.memory;
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
|
||||
|
||||
/* Set memory amount per socket */
|
||||
default_size = (internal_config.memory * cpu_per_socket[socket])
|
||||
/ rte_lcore_count();
|
||||
|
||||
/* Limit to maximum available memory on socket */
|
||||
default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] = default_size;
|
||||
total_size -= default_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If some memory is remaining, try to allocate it by getting all
|
||||
* available memory from sockets, one after the other
|
||||
*/
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
|
||||
/* take whatever is available */
|
||||
default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
|
||||
total_size);
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] += default_size;
|
||||
total_size -= default_size;
|
||||
}
|
||||
#else
|
||||
/* in 32-bit mode, allocate all of the memory only on master
|
||||
* lcore socket
|
||||
*/
|
||||
total_size = internal_config.memory;
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
|
||||
socket++) {
|
||||
struct rte_config *cfg = rte_eal_get_configuration();
|
||||
unsigned int master_lcore_socket;
|
||||
|
||||
master_lcore_socket =
|
||||
rte_lcore_to_socket_id(cfg->master_lcore);
|
||||
|
||||
if (master_lcore_socket != socket)
|
||||
continue;
|
||||
|
||||
/* Update sizes */
|
||||
memory[socket] = total_size;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
|
||||
/* skips if the memory on specific socket wasn't requested */
|
||||
for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
|
||||
strlcpy(hp_used[i].hugedir, hp_info[i].hugedir,
|
||||
sizeof(hp_used[i].hugedir));
|
||||
hp_used[i].num_pages[socket] = RTE_MIN(
|
||||
memory[socket] / hp_info[i].hugepage_sz,
|
||||
hp_info[i].num_pages[socket]);
|
||||
|
||||
cur_mem = hp_used[i].num_pages[socket] *
|
||||
hp_used[i].hugepage_sz;
|
||||
|
||||
memory[socket] -= cur_mem;
|
||||
total_mem -= cur_mem;
|
||||
|
||||
total_num_pages += hp_used[i].num_pages[socket];
|
||||
|
||||
/* check if we have met all memory requests */
|
||||
if (memory[socket] == 0)
|
||||
break;
|
||||
|
||||
/* check if we have any more pages left at this size, if so
|
||||
* move on to next size */
|
||||
if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
|
||||
continue;
|
||||
/* At this point we know that there are more pages available that are
|
||||
* bigger than the memory we want, so lets see if we can get enough
|
||||
* from other page sizes.
|
||||
*/
|
||||
remaining_mem = 0;
|
||||
for (j = i+1; j < num_hp_info; j++)
|
||||
remaining_mem += hp_info[j].hugepage_sz *
|
||||
hp_info[j].num_pages[socket];
|
||||
|
||||
/* is there enough other memory, if not allocate another page and quit */
|
||||
if (remaining_mem < memory[socket]){
|
||||
cur_mem = RTE_MIN(memory[socket],
|
||||
hp_info[i].hugepage_sz);
|
||||
memory[socket] -= cur_mem;
|
||||
total_mem -= cur_mem;
|
||||
hp_used[i].num_pages[socket]++;
|
||||
total_num_pages++;
|
||||
break; /* we are done with this socket*/
|
||||
}
|
||||
}
|
||||
/* if we didn't satisfy all memory requirements per socket */
|
||||
if (memory[socket] > 0 &&
|
||||
internal_config.socket_mem[socket] != 0) {
|
||||
/* to prevent icc errors */
|
||||
requested = (unsigned) (internal_config.socket_mem[socket] /
|
||||
0x100000);
|
||||
available = requested -
|
||||
((unsigned) (memory[socket] / 0x100000));
|
||||
RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! "
|
||||
"Requested: %uMB, available: %uMB\n", socket,
|
||||
requested, available);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't satisfy total memory requirements */
|
||||
if (total_mem > 0) {
|
||||
requested = (unsigned) (internal_config.memory / 0x100000);
|
||||
available = requested - (unsigned) (total_mem / 0x100000);
|
||||
RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB,"
|
||||
" available: %uMB\n", requested, available);
|
||||
return -1;
|
||||
}
|
||||
return total_num_pages;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
eal_get_hugepage_mem_size(void)
|
||||
{
|
||||
@ -1529,7 +1339,7 @@ eal_legacy_hugepage_init(void)
|
||||
memory[i] = internal_config.socket_mem[i];
|
||||
|
||||
/* calculate final number of pages */
|
||||
nr_hugepages = calc_num_pages_per_socket(memory,
|
||||
nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory,
|
||||
internal_config.hugepage_info, used_hp,
|
||||
internal_config.num_hugepage_sizes);
|
||||
|
||||
@ -1656,140 +1466,6 @@ eal_legacy_hugepage_init(void)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __rte_unused
|
||||
hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
|
||||
{
|
||||
struct hugepage_info *hpi = arg;
|
||||
|
||||
if (msl->page_sz != hpi->hugepage_sz)
|
||||
return 0;
|
||||
|
||||
hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
limits_callback(int socket_id, size_t cur_limit, size_t new_len)
|
||||
{
|
||||
RTE_SET_USED(socket_id);
|
||||
RTE_SET_USED(cur_limit);
|
||||
RTE_SET_USED(new_len);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
eal_hugepage_init(void)
|
||||
{
|
||||
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
|
||||
uint64_t memory[RTE_MAX_NUMA_NODES];
|
||||
int hp_sz_idx, socket_id;
|
||||
|
||||
memset(used_hp, 0, sizeof(used_hp));
|
||||
|
||||
for (hp_sz_idx = 0;
|
||||
hp_sz_idx < (int) internal_config.num_hugepage_sizes;
|
||||
hp_sz_idx++) {
|
||||
#ifndef RTE_ARCH_64
|
||||
struct hugepage_info dummy;
|
||||
unsigned int i;
|
||||
#endif
|
||||
/* also initialize used_hp hugepage sizes in used_hp */
|
||||
struct hugepage_info *hpi;
|
||||
hpi = &internal_config.hugepage_info[hp_sz_idx];
|
||||
used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
|
||||
|
||||
#ifndef RTE_ARCH_64
|
||||
/* for 32-bit, limit number of pages on socket to whatever we've
|
||||
* preallocated, as we cannot allocate more.
|
||||
*/
|
||||
memset(&dummy, 0, sizeof(dummy));
|
||||
dummy.hugepage_sz = hpi->hugepage_sz;
|
||||
if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
|
||||
hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
|
||||
dummy.num_pages[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* make a copy of socket_mem, needed for balanced allocation. */
|
||||
for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
|
||||
memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
|
||||
|
||||
/* calculate final number of pages */
|
||||
if (calc_num_pages_per_socket(memory,
|
||||
internal_config.hugepage_info, used_hp,
|
||||
internal_config.num_hugepage_sizes) < 0)
|
||||
return -1;
|
||||
|
||||
for (hp_sz_idx = 0;
|
||||
hp_sz_idx < (int)internal_config.num_hugepage_sizes;
|
||||
hp_sz_idx++) {
|
||||
for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
|
||||
socket_id++) {
|
||||
struct rte_memseg **pages;
|
||||
struct hugepage_info *hpi = &used_hp[hp_sz_idx];
|
||||
unsigned int num_pages = hpi->num_pages[socket_id];
|
||||
unsigned int num_pages_alloc;
|
||||
|
||||
if (num_pages == 0)
|
||||
continue;
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n",
|
||||
num_pages, hpi->hugepage_sz >> 20, socket_id);
|
||||
|
||||
/* we may not be able to allocate all pages in one go,
|
||||
* because we break up our memory map into multiple
|
||||
* memseg lists. therefore, try allocating multiple
|
||||
* times and see if we can get the desired number of
|
||||
* pages from multiple allocations.
|
||||
*/
|
||||
|
||||
num_pages_alloc = 0;
|
||||
do {
|
||||
int i, cur_pages, needed;
|
||||
|
||||
needed = num_pages - num_pages_alloc;
|
||||
|
||||
pages = malloc(sizeof(*pages) * needed);
|
||||
|
||||
/* do not request exact number of pages */
|
||||
cur_pages = eal_memalloc_alloc_seg_bulk(pages,
|
||||
needed, hpi->hugepage_sz,
|
||||
socket_id, false);
|
||||
if (cur_pages <= 0) {
|
||||
free(pages);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* mark preallocated pages as unfreeable */
|
||||
for (i = 0; i < cur_pages; i++) {
|
||||
struct rte_memseg *ms = pages[i];
|
||||
ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
|
||||
}
|
||||
free(pages);
|
||||
|
||||
num_pages_alloc += cur_pages;
|
||||
} while (num_pages_alloc != num_pages);
|
||||
}
|
||||
}
|
||||
/* if socket limits were specified, set them */
|
||||
if (internal_config.force_socket_limits) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
|
||||
uint64_t limit = internal_config.socket_limit[i];
|
||||
if (limit == 0)
|
||||
continue;
|
||||
if (rte_mem_alloc_validator_register("socket-limit",
|
||||
limits_callback, i, limit))
|
||||
RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* uses fstat to report the size of a file on disk
|
||||
*/
|
||||
@ -1948,7 +1624,7 @@ rte_eal_hugepage_init(void)
|
||||
{
|
||||
return internal_config.legacy_mem ?
|
||||
eal_legacy_hugepage_init() :
|
||||
eal_hugepage_init();
|
||||
eal_dynmem_hugepage_init();
|
||||
}
|
||||
|
||||
int
|
||||
@ -2127,8 +1803,9 @@ memseg_primary_init_32(void)
|
||||
max_pagesz_mem);
|
||||
n_segs = cur_mem / hugepage_sz;
|
||||
|
||||
if (memseg_list_init(msl, hugepage_sz, n_segs,
|
||||
socket_id, type_msl_idx)) {
|
||||
if (eal_memseg_list_init(msl, hugepage_sz,
|
||||
n_segs, socket_id, type_msl_idx,
|
||||
true)) {
|
||||
/* failing to allocate a memseg list is
|
||||
* a serious error.
|
||||
*/
|
||||
@ -2136,7 +1813,7 @@ memseg_primary_init_32(void)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (memseg_list_alloc(msl)) {
|
||||
if (eal_memseg_list_alloc(msl, 0)) {
|
||||
/* if we couldn't allocate VA space, we
|
||||
* can try with smaller page sizes.
|
||||
*/
|
||||
@ -2167,185 +1844,7 @@ memseg_primary_init_32(void)
|
||||
static int __rte_unused
|
||||
memseg_primary_init(void)
|
||||
{
|
||||
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
|
||||
struct memtype {
|
||||
uint64_t page_sz;
|
||||
int socket_id;
|
||||
} *memtypes = NULL;
|
||||
int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
|
||||
struct rte_memseg_list *msl;
|
||||
uint64_t max_mem, max_mem_per_type;
|
||||
unsigned int max_seglists_per_type;
|
||||
unsigned int n_memtypes, cur_type;
|
||||
|
||||
/* no-huge does not need this at all */
|
||||
if (internal_config.no_hugetlbfs)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* figuring out amount of memory we're going to have is a long and very
|
||||
* involved process. the basic element we're operating with is a memory
|
||||
* type, defined as a combination of NUMA node ID and page size (so that
|
||||
* e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
|
||||
*
|
||||
* deciding amount of memory going towards each memory type is a
|
||||
* balancing act between maximum segments per type, maximum memory per
|
||||
* type, and number of detected NUMA nodes. the goal is to make sure
|
||||
* each memory type gets at least one memseg list.
|
||||
*
|
||||
* the total amount of memory is limited by RTE_MAX_MEM_MB value.
|
||||
*
|
||||
* the total amount of memory per type is limited by either
|
||||
* RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
|
||||
* of detected NUMA nodes. additionally, maximum number of segments per
|
||||
* type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
|
||||
* smaller page sizes, it can take hundreds of thousands of segments to
|
||||
* reach the above specified per-type memory limits.
|
||||
*
|
||||
* additionally, each type may have multiple memseg lists associated
|
||||
* with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
|
||||
* page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
|
||||
*
|
||||
* the number of memseg lists per type is decided based on the above
|
||||
* limits, and also taking number of detected NUMA nodes, to make sure
|
||||
* that we don't run out of memseg lists before we populate all NUMA
|
||||
* nodes with memory.
|
||||
*
|
||||
* we do this in three stages. first, we collect the number of types.
|
||||
* then, we figure out memory constraints and populate the list of
|
||||
* would-be memseg lists. then, we go ahead and allocate the memseg
|
||||
* lists.
|
||||
*/
|
||||
|
||||
/* create space for mem types */
|
||||
n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
|
||||
memtypes = calloc(n_memtypes, sizeof(*memtypes));
|
||||
if (memtypes == NULL) {
|
||||
RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* populate mem types */
|
||||
cur_type = 0;
|
||||
for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
|
||||
hpi_idx++) {
|
||||
struct hugepage_info *hpi;
|
||||
uint64_t hugepage_sz;
|
||||
|
||||
hpi = &internal_config.hugepage_info[hpi_idx];
|
||||
hugepage_sz = hpi->hugepage_sz;
|
||||
|
||||
for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
|
||||
int socket_id = rte_socket_id_by_idx(i);
|
||||
|
||||
#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
|
||||
/* we can still sort pages by socket in legacy mode */
|
||||
if (!internal_config.legacy_mem && socket_id > 0)
|
||||
break;
|
||||
#endif
|
||||
memtypes[cur_type].page_sz = hugepage_sz;
|
||||
memtypes[cur_type].socket_id = socket_id;
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Detected memory type: "
|
||||
"socket_id:%u hugepage_sz:%" PRIu64 "\n",
|
||||
socket_id, hugepage_sz);
|
||||
}
|
||||
}
|
||||
/* number of memtypes could have been lower due to no NUMA support */
|
||||
n_memtypes = cur_type;
|
||||
|
||||
/* set up limits for types */
|
||||
max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
|
||||
max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
|
||||
max_mem / n_memtypes);
|
||||
/*
|
||||
* limit maximum number of segment lists per type to ensure there's
|
||||
* space for memseg lists for all NUMA nodes with all page sizes
|
||||
*/
|
||||
max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
|
||||
|
||||
if (max_seglists_per_type == 0) {
|
||||
RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
|
||||
RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* go through all mem types and create segment lists */
|
||||
msl_idx = 0;
|
||||
for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
|
||||
unsigned int cur_seglist, n_seglists, n_segs;
|
||||
unsigned int max_segs_per_type, max_segs_per_list;
|
||||
struct memtype *type = &memtypes[cur_type];
|
||||
uint64_t max_mem_per_list, pagesz;
|
||||
int socket_id;
|
||||
|
||||
pagesz = type->page_sz;
|
||||
socket_id = type->socket_id;
|
||||
|
||||
/*
|
||||
* we need to create segment lists for this type. we must take
|
||||
* into account the following things:
|
||||
*
|
||||
* 1. total amount of memory we can use for this memory type
|
||||
* 2. total amount of memory per memseg list allowed
|
||||
* 3. number of segments needed to fit the amount of memory
|
||||
* 4. number of segments allowed per type
|
||||
* 5. number of segments allowed per memseg list
|
||||
* 6. number of memseg lists we are allowed to take up
|
||||
*/
|
||||
|
||||
/* calculate how much segments we will need in total */
|
||||
max_segs_per_type = max_mem_per_type / pagesz;
|
||||
/* limit number of segments to maximum allowed per type */
|
||||
max_segs_per_type = RTE_MIN(max_segs_per_type,
|
||||
(unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
|
||||
/* limit number of segments to maximum allowed per list */
|
||||
max_segs_per_list = RTE_MIN(max_segs_per_type,
|
||||
(unsigned int)RTE_MAX_MEMSEG_PER_LIST);
|
||||
|
||||
/* calculate how much memory we can have per segment list */
|
||||
max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
|
||||
(uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
|
||||
|
||||
/* calculate how many segments each segment list will have */
|
||||
n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
|
||||
|
||||
/* calculate how many segment lists we can have */
|
||||
n_seglists = RTE_MIN(max_segs_per_type / n_segs,
|
||||
max_mem_per_type / max_mem_per_list);
|
||||
|
||||
/* limit number of segment lists according to our maximum */
|
||||
n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
|
||||
"n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
|
||||
n_seglists, n_segs, socket_id, pagesz);
|
||||
|
||||
/* create all segment lists */
|
||||
for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
|
||||
if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
|
||||
RTE_LOG(ERR, EAL,
|
||||
"No more space in memseg lists, please increase %s\n",
|
||||
RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
|
||||
goto out;
|
||||
}
|
||||
msl = &mcfg->memsegs[msl_idx++];
|
||||
|
||||
if (memseg_list_init(msl, pagesz, n_segs,
|
||||
socket_id, cur_seglist))
|
||||
goto out;
|
||||
|
||||
if (memseg_list_alloc(msl)) {
|
||||
RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* we're successful */
|
||||
ret = 0;
|
||||
out:
|
||||
free(memtypes);
|
||||
return ret;
|
||||
return eal_dynmem_memseg_lists_init();
|
||||
}
|
||||
|
||||
static int
|
||||
@ -2369,7 +1868,7 @@ memseg_secondary_init(void)
|
||||
}
|
||||
|
||||
/* preallocate VA space */
|
||||
if (memseg_list_alloc(msl)) {
|
||||
if (eal_memseg_list_alloc(msl, 0)) {
|
||||
RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
|
||||
return -1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user