2018-01-29 14:11:25 +01:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright(c) 2010-2018 Intel Corporation.
|
|
|
|
* Copyright(c) 2014 6WIND S.A.
|
2014-02-10 11:49:10 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <syslog.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <sys/file.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/queue.h>
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
#include <sys/stat.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2018-01-21 20:48:06 -05:00
|
|
|
#include <rte_compat.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
#include <rte_common.h>
|
|
|
|
#include <rte_debug.h>
|
|
|
|
#include <rte_memory.h>
|
|
|
|
#include <rte_launch.h>
|
|
|
|
#include <rte_eal.h>
|
2017-03-22 16:19:27 -04:00
|
|
|
#include <rte_errno.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
#include <rte_per_lcore.h>
|
|
|
|
#include <rte_lcore.h>
|
2017-07-11 15:19:28 +01:00
|
|
|
#include <rte_service_component.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
#include <rte_log.h>
|
|
|
|
#include <rte_random.h>
|
|
|
|
#include <rte_cycles.h>
|
|
|
|
#include <rte_string_fns.h>
|
|
|
|
#include <rte_cpuflags.h>
|
|
|
|
#include <rte_interrupts.h>
|
2017-01-19 14:21:35 +00:00
|
|
|
#include <rte_bus.h>
|
2014-07-02 11:14:03 +02:00
|
|
|
#include <rte_dev.h>
|
2014-04-25 13:59:41 +02:00
|
|
|
#include <rte_devargs.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
#include <rte_version.h>
|
2018-04-16 13:13:56 +01:00
|
|
|
#include <rte_vfio.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
#include <malloc_heap.h>
|
2020-04-30 17:01:34 +01:00
|
|
|
#include <rte_telemetry.h>
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
#include "eal_private.h"
|
|
|
|
#include "eal_thread.h"
|
|
|
|
#include "eal_internal_cfg.h"
|
|
|
|
#include "eal_filesystem.h"
|
|
|
|
#include "eal_hugepages.h"
|
2014-09-22 10:37:59 +02:00
|
|
|
#include "eal_options.h"
|
2019-07-05 14:10:32 +01:00
|
|
|
#include "eal_memcfg.h"
|
2020-04-23 00:33:32 +05:30
|
|
|
#include "eal_trace.h"
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
|
|
|
|
|
|
|
|
/* define fd variable here, because file needs to be kept open for the
|
|
|
|
* duration of the program, as we hold a write lock on it in the primary proc */
|
|
|
|
static int mem_cfg_fd = -1;
|
|
|
|
|
|
|
|
static struct flock wr_lock = {
|
|
|
|
.l_type = F_WRLCK,
|
|
|
|
.l_whence = SEEK_SET,
|
mem: replace memseg with memseg lists
Before, we were aggregating multiple pages into one memseg, so the
number of memsegs was small. Now, each page gets its own memseg,
so the list of memsegs is huge. To accommodate the new memseg list
size and to keep the under-the-hood workings sane, the memseg list
is now not just a single list, but multiple lists. To be precise,
each hugepage size available on the system gets one or more memseg
lists, per socket.
In order to support dynamic memory allocation, we reserve all
memory in advance (unless we're in 32-bit legacy mode, in which
case we do not preallocate memory). As in, we do an anonymous
mmap() of the entire maximum size of memory per hugepage size, per
socket (which is limited to either RTE_MAX_MEMSEG_PER_TYPE pages or
RTE_MAX_MEM_MB_PER_TYPE megabytes worth of memory, whichever is the
smaller one), split over multiple lists (which are limited to
either RTE_MAX_MEMSEG_PER_LIST memsegs or RTE_MAX_MEM_MB_PER_LIST
megabytes per list, whichever is the smaller one). There is also
a global limit of CONFIG_RTE_MAX_MEM_MB megabytes, which is mainly
used for 32-bit targets to limit amounts of preallocated memory,
but can be used to place an upper limit on total amount of VA
memory that can be allocated by DPDK application.
So, for each hugepage size, we get (by default) up to 128G worth
of memory, per socket, split into chunks of up to 32G in size.
The address space is claimed at the start, in eal_common_memory.c.
The actual page allocation code is in eal_memalloc.c (Linux-only),
and largely consists of copied EAL memory init code.
Pages in the list are also indexed by address. That is, in order
to figure out where the page belongs, one can simply look at base
address for a memseg list. Similarly, figuring out IOVA address
of a memzone is a matter of finding the right memseg list, getting
offset and dividing by page size to get the appropriate memseg.
This commit also removes rte_eal_dump_physmem_layout() call,
according to deprecation notice [1], and removes that deprecation
notice as well.
On 32-bit targets due to limited VA space, DPDK will no longer
spread memory to different sockets like before. Instead, it will
(by default) allocate all of the memory on socket where master
lcore is. To override this behavior, --socket-mem must be used.
The rest of the changes are really ripple effects from the memseg
change - heap changes, compile fixes, and rewrites to support
fbarray-backed memseg lists. Due to earlier switch to _walk()
functions, most of the changes are simple fixes, however some
of the _walk() calls were switched to memseg list walk, where
it made sense to do so.
Additionally, we are also switching locks from flock() to fcntl().
Down the line, we will be introducing single-file segments option,
and we cannot use flock() locks to lock parts of the file. Therefore,
we will use fcntl() locks for legacy mem as well, in case someone is
unfortunate enough to accidentally start legacy mem primary process
alongside an already working non-legacy mem-based primary process.
[1] http://dpdk.org/dev/patchwork/patch/34002/
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
2018-04-11 13:30:24 +01:00
|
|
|
.l_start = offsetof(struct rte_mem_config, memsegs),
|
2020-06-29 15:37:32 +03:00
|
|
|
.l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs),
|
2014-02-10 11:49:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* internal configuration (per-core) */
|
|
|
|
struct lcore_config lcore_config[RTE_MAX_LCORE];
|
|
|
|
|
|
|
|
/* used by rte_rdtsc() */
|
|
|
|
int rte_cycles_vmware_tsc_map;
|
|
|
|
|
2018-05-14 17:27:42 +01:00
|
|
|
static const char *default_runtime_dir = "/var/run";
|
|
|
|
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
int
|
|
|
|
eal_create_runtime_dir(void)
|
|
|
|
{
|
2018-05-14 17:27:42 +01:00
|
|
|
const char *directory = default_runtime_dir;
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
|
|
|
|
const char *fallback = "/tmp";
|
2020-06-29 15:37:32 +03:00
|
|
|
char run_dir[PATH_MAX];
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
char tmp[PATH_MAX];
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (getuid() != 0) {
|
|
|
|
/* try XDG path first, fall back to /tmp */
|
|
|
|
if (xdg_runtime_dir != NULL)
|
|
|
|
directory = xdg_runtime_dir;
|
|
|
|
else
|
|
|
|
directory = fallback;
|
|
|
|
}
|
|
|
|
/* create DPDK subdirectory under runtime dir */
|
|
|
|
ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
|
|
|
|
if (ret < 0 || ret == sizeof(tmp)) {
|
|
|
|
RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create prefix-specific subdirectory under DPDK runtime dir */
|
2020-06-29 15:37:32 +03:00
|
|
|
ret = snprintf(run_dir, sizeof(run_dir), "%s/%s",
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
tmp, eal_get_hugefile_prefix());
|
2020-06-29 15:37:32 +03:00
|
|
|
if (ret < 0 || ret == sizeof(run_dir)) {
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create the path if it doesn't exist. no "mkdir -p" here, so do it
|
|
|
|
* step by step.
|
|
|
|
*/
|
2018-05-18 15:55:35 +02:00
|
|
|
ret = mkdir(tmp, 0700);
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
if (ret < 0 && errno != EEXIST) {
|
|
|
|
RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
|
|
|
|
tmp, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
ret = mkdir(run_dir, 0700);
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
if (ret < 0 && errno != EEXIST) {
|
|
|
|
RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
|
2020-06-29 15:37:32 +03:00
|
|
|
run_dir, strerror(errno));
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (eal_set_runtime_dir(run_dir, sizeof(run_dir)))
|
|
|
|
return -1;
|
|
|
|
|
eal: add directory for runtime data
Currently, during runtime, DPDK will store a bunch of files here
and there (in /var/run, /tmp or in $HOME). Fix it by creating a
DPDK-specific runtime directory, under which all runtime data
will be placed. The template for creating this runtime directory
is the following:
<base path>/dpdk/<DPDK prefix>/
Where <base path> is set to either "/var/run" if run as root, or
$XDG_RUNTIME_DIR if run as non-root, with a fallback to /tmp if
$XDG_RUNTIME_DIR is not defined. So, for example, if run as root,
by default all runtime data will be stored at /var/run/dpdk/rte/.
There is no equivalent of "mkdir -p", so we will be creating the
path step by step.
Nothing uses this new path yet, changes for that will come in
next commit.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Reshma Pattan <reshma.pattan@intel.com>
2018-05-14 17:27:41 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
int
|
|
|
|
eal_clean_runtime_dir(void)
|
|
|
|
{
|
2019-01-10 11:34:08 +00:00
|
|
|
/* FreeBSD doesn't need this implemented for now, because, unlike Linux,
|
|
|
|
* FreeBSD doesn't create per-process files, so no need to clean up.
|
|
|
|
*/
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
/* parse a sysfs (or other) file containing one integer value */
|
|
|
|
int
|
|
|
|
eal_parse_sysfs_value(const char *filename, unsigned long *val)
|
|
|
|
{
|
|
|
|
FILE *f;
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
char *end = NULL;
|
|
|
|
|
|
|
|
if ((f = fopen(filename, "r")) == NULL) {
|
|
|
|
RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
|
|
|
|
__func__, filename);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fgets(buf, sizeof(buf), f) == NULL) {
|
|
|
|
RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
|
|
|
|
__func__, filename);
|
|
|
|
fclose(f);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*val = strtoul(buf, &end, 0);
|
|
|
|
if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
|
|
|
|
RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
|
|
|
|
__func__, filename);
|
|
|
|
fclose(f);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* create memory configuration in shared/mmap memory. Take out
|
|
|
|
* a write lock on the memsegs, so we can auto-detect primary/secondary.
|
|
|
|
* This means we never close the file while running (auto-close on exit).
|
|
|
|
* We also don't lock the whole file, so that in future we can use read-locks
|
|
|
|
* on other parts, e.g. memzones, to detect if there are running secondary
|
|
|
|
* processes. */
|
2019-06-10 10:08:29 +03:00
|
|
|
static int
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_eal_config_create(void)
|
|
|
|
{
|
2020-06-29 15:37:32 +03:00
|
|
|
struct rte_config *config = rte_eal_get_configuration();
|
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2019-10-24 13:36:50 +01:00
|
|
|
size_t page_sz = sysconf(_SC_PAGE_SIZE);
|
2020-06-29 15:37:32 +03:00
|
|
|
size_t cfg_len = sizeof(struct rte_mem_config);
|
2019-10-24 13:36:50 +01:00
|
|
|
size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
|
|
|
|
void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
|
2014-02-10 11:49:10 +00:00
|
|
|
int retval;
|
|
|
|
|
|
|
|
const char *pathname = eal_runtime_config_path();
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->no_shconf)
|
2019-06-10 10:08:29 +03:00
|
|
|
return 0;
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2019-10-24 16:17:56 +01:00
|
|
|
/* map the config before base address so that we don't waste a page */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->base_virtaddr != 0)
|
2019-10-24 16:17:56 +01:00
|
|
|
rte_mem_cfg_addr = (void *)
|
2020-06-29 15:37:32 +03:00
|
|
|
RTE_ALIGN_FLOOR(internal_conf->base_virtaddr -
|
2019-10-24 13:36:50 +01:00
|
|
|
sizeof(struct rte_mem_config), page_sz);
|
2019-10-24 16:17:56 +01:00
|
|
|
else
|
|
|
|
rte_mem_cfg_addr = NULL;
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
if (mem_cfg_fd < 0){
|
2019-04-03 17:00:34 +01:00
|
|
|
mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
|
2019-06-10 10:08:29 +03:00
|
|
|
if (mem_cfg_fd < 0) {
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
|
|
|
|
pathname);
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
2019-10-24 13:36:50 +01:00
|
|
|
retval = ftruncate(mem_cfg_fd, cfg_len);
|
2014-02-10 11:49:10 +00:00
|
|
|
if (retval < 0){
|
|
|
|
close(mem_cfg_fd);
|
2019-06-10 10:08:29 +03:00
|
|
|
mem_cfg_fd = -1;
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
|
|
|
|
pathname);
|
|
|
|
return -1;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
|
|
|
|
if (retval < 0){
|
|
|
|
close(mem_cfg_fd);
|
2019-06-10 10:08:29 +03:00
|
|
|
mem_cfg_fd = -1;
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
|
|
|
|
"process running?\n", pathname);
|
|
|
|
return -1;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
2019-10-24 13:36:50 +01:00
|
|
|
/* reserve space for config */
|
|
|
|
rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
|
|
|
|
&cfg_len_aligned, page_sz, 0, 0);
|
|
|
|
if (rte_mem_cfg_addr == NULL) {
|
2019-06-10 10:08:29 +03:00
|
|
|
RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
|
|
|
|
close(mem_cfg_fd);
|
|
|
|
mem_cfg_fd = -1;
|
|
|
|
return -1;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
2019-10-24 13:36:50 +01:00
|
|
|
|
|
|
|
/* remap the actual file into the space we've just reserved */
|
|
|
|
mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
|
|
|
|
cfg_len_aligned, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
|
|
|
|
if (mapped_mem_cfg_addr == MAP_FAILED) {
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
|
|
|
|
munmap(rte_mem_cfg_addr, cfg_len);
|
|
|
|
close(mem_cfg_fd);
|
|
|
|
mem_cfg_fd = -1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config));
|
|
|
|
config->mem_config = rte_mem_cfg_addr;
|
2019-06-10 10:08:29 +03:00
|
|
|
|
2019-07-12 15:48:08 +01:00
|
|
|
/* store address of the config in the config itself so that secondary
|
|
|
|
* processes could later map the config into this exact location
|
|
|
|
*/
|
2020-06-29 15:37:32 +03:00
|
|
|
config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
|
2019-06-10 10:08:29 +03:00
|
|
|
return 0;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* attach to an existing shared memory config */
|
2019-06-10 10:08:29 +03:00
|
|
|
static int
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_eal_config_attach(void)
|
|
|
|
{
|
|
|
|
void *rte_mem_cfg_addr;
|
|
|
|
const char *pathname = eal_runtime_config_path();
|
2020-06-29 15:37:32 +03:00
|
|
|
struct rte_config *config = rte_eal_get_configuration();
|
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->no_shconf)
|
2019-06-10 10:08:29 +03:00
|
|
|
return 0;
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
if (mem_cfg_fd < 0){
|
|
|
|
mem_cfg_fd = open(pathname, O_RDWR);
|
2019-06-10 10:08:29 +03:00
|
|
|
if (mem_cfg_fd < 0) {
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
|
|
|
|
pathname);
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
rte_mem_cfg_addr = mmap(NULL, sizeof(*config->mem_config),
|
2019-06-27 12:33:46 +01:00
|
|
|
PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
|
|
|
|
/* don't close the fd here, it will be closed on reattach */
|
2019-06-10 10:08:29 +03:00
|
|
|
if (rte_mem_cfg_addr == MAP_FAILED) {
|
2019-06-27 12:33:46 +01:00
|
|
|
close(mem_cfg_fd);
|
|
|
|
mem_cfg_fd = -1;
|
2019-06-10 10:08:29 +03:00
|
|
|
RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
|
|
|
|
errno, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
config->mem_config = rte_mem_cfg_addr;
|
2019-06-10 10:08:29 +03:00
|
|
|
|
|
|
|
return 0;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
2019-06-27 12:33:46 +01:00
|
|
|
/* reattach the shared config at exact memory location primary process has it */
|
|
|
|
static int
|
|
|
|
rte_eal_config_reattach(void)
|
|
|
|
{
|
|
|
|
struct rte_mem_config *mem_config;
|
|
|
|
void *rte_mem_cfg_addr;
|
2020-06-29 15:37:32 +03:00
|
|
|
struct rte_config *config = rte_eal_get_configuration();
|
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2019-06-27 12:33:46 +01:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->no_shconf)
|
2019-06-27 12:33:46 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* save the address primary process has mapped shared config to */
|
|
|
|
rte_mem_cfg_addr =
|
2020-06-29 15:37:32 +03:00
|
|
|
(void *)(uintptr_t)config->mem_config->mem_cfg_addr;
|
2019-06-27 12:33:46 +01:00
|
|
|
|
|
|
|
/* unmap original config */
|
2020-06-29 15:37:32 +03:00
|
|
|
munmap(config->mem_config, sizeof(struct rte_mem_config));
|
2019-06-27 12:33:46 +01:00
|
|
|
|
|
|
|
/* remap the config at proper address */
|
|
|
|
mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
|
|
|
|
sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
|
|
mem_cfg_fd, 0);
|
|
|
|
close(mem_cfg_fd);
|
|
|
|
mem_cfg_fd = -1;
|
|
|
|
|
2019-10-24 16:17:56 +01:00
|
|
|
if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
|
|
|
|
if (mem_config != MAP_FAILED) {
|
|
|
|
/* errno is stale, don't use */
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
|
2019-10-24 16:17:57 +01:00
|
|
|
" - please use '--" OPT_BASE_VIRTADDR
|
|
|
|
"' option\n",
|
2019-10-24 16:17:56 +01:00
|
|
|
rte_mem_cfg_addr, mem_config);
|
|
|
|
munmap(mem_config, sizeof(struct rte_mem_config));
|
|
|
|
return -1;
|
|
|
|
}
|
2019-06-27 12:33:46 +01:00
|
|
|
RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
|
2019-10-24 16:17:56 +01:00
|
|
|
errno, strerror(errno));
|
2019-06-27 12:33:46 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
config->mem_config = mem_config;
|
2019-06-27 12:33:46 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
/* Detect if we are a primary or a secondary process */
|
2014-11-20 22:57:22 +01:00
|
|
|
enum rte_proc_type_t
|
2014-02-10 11:49:10 +00:00
|
|
|
eal_proc_type_detect(void)
|
|
|
|
{
|
|
|
|
enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
|
|
|
|
const char *pathname = eal_runtime_config_path();
|
2020-06-29 15:37:32 +03:00
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2018-07-18 11:53:42 +01:00
|
|
|
/* if there no shared config, there can be no secondary processes */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (!internal_conf->no_shconf) {
|
2018-07-18 11:53:42 +01:00
|
|
|
/* if we can open the file but not get a write-lock we are a
|
|
|
|
* secondary process. NOTE: if we get a file handle back, we
|
|
|
|
* keep that open and don't close it to prevent a race condition
|
|
|
|
* between multiple opens.
|
|
|
|
*/
|
|
|
|
if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
|
|
|
|
(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
|
|
|
|
ptype = RTE_PROC_SECONDARY;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
|
|
|
|
ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
|
|
|
|
|
|
|
|
return ptype;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sets up rte_config structure with the pointer to shared memory config.*/
|
2019-06-10 10:08:29 +03:00
|
|
|
static int
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_config_init(void)
|
|
|
|
{
|
2020-06-29 15:37:32 +03:00
|
|
|
struct rte_config *config = rte_eal_get_configuration();
|
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
config->process_type = internal_conf->process_type;
|
|
|
|
|
|
|
|
switch (config->process_type) {
|
2014-02-10 11:49:10 +00:00
|
|
|
case RTE_PROC_PRIMARY:
|
2019-06-10 10:08:29 +03:00
|
|
|
if (rte_eal_config_create() < 0)
|
|
|
|
return -1;
|
2019-07-05 14:10:34 +01:00
|
|
|
eal_mcfg_update_from_internal();
|
2014-02-10 11:49:10 +00:00
|
|
|
break;
|
|
|
|
case RTE_PROC_SECONDARY:
|
2019-06-10 10:08:29 +03:00
|
|
|
if (rte_eal_config_attach() < 0)
|
|
|
|
return -1;
|
2019-07-05 14:10:32 +01:00
|
|
|
eal_mcfg_wait_complete();
|
2019-07-05 18:26:27 +01:00
|
|
|
if (eal_mcfg_check_version() < 0) {
|
|
|
|
RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
|
|
|
|
return -1;
|
|
|
|
}
|
2019-06-27 12:33:46 +01:00
|
|
|
if (rte_eal_config_reattach() < 0)
|
|
|
|
return -1;
|
2020-07-06 22:52:30 +02:00
|
|
|
if (!__rte_mp_enable()) {
|
|
|
|
RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
|
|
|
|
return -1;
|
|
|
|
}
|
2019-07-05 14:10:34 +01:00
|
|
|
eal_mcfg_update_internal();
|
2014-02-10 11:49:10 +00:00
|
|
|
break;
|
|
|
|
case RTE_PROC_AUTO:
|
|
|
|
case RTE_PROC_INVALID:
|
2019-06-10 10:08:29 +03:00
|
|
|
RTE_LOG(ERR, EAL, "Invalid process type %d\n",
|
2020-06-29 15:37:32 +03:00
|
|
|
config->process_type);
|
2019-06-10 10:08:29 +03:00
|
|
|
return -1;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
2019-06-10 10:08:29 +03:00
|
|
|
|
|
|
|
return 0;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* display usage */
|
|
|
|
static void
|
|
|
|
eal_usage(const char *prgname)
|
|
|
|
{
|
2020-06-29 15:37:33 +03:00
|
|
|
rte_usage_hook_t hook = eal_get_application_usage_hook();
|
|
|
|
|
2014-09-22 10:37:59 +02:00
|
|
|
printf("\nUsage: %s ", prgname);
|
|
|
|
eal_common_usage();
|
2014-02-10 11:49:10 +00:00
|
|
|
/* Allow the application to print its usage message too if hook is set */
|
2020-06-29 15:37:33 +03:00
|
|
|
if (hook) {
|
2014-02-10 11:49:10 +00:00
|
|
|
printf("===== Application Usage =====\n\n");
|
2020-06-29 15:37:33 +03:00
|
|
|
(hook)(prgname);
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline size_t
|
|
|
|
eal_get_hugepage_mem_size(void)
|
|
|
|
{
|
|
|
|
uint64_t size = 0;
|
|
|
|
unsigned i, j;
|
2020-06-29 15:37:32 +03:00
|
|
|
struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
|
|
|
|
struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
|
2018-04-11 13:30:33 +01:00
|
|
|
if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
|
2014-02-10 11:49:10 +00:00
|
|
|
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
|
|
|
|
size += hpi->hugepage_sz * hpi->num_pages[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
|
|
|
|
}
|
|
|
|
|
2015-06-08 16:55:52 -05:00
|
|
|
/* Parse the arguments for --log-level only */
|
|
|
|
static void
|
|
|
|
eal_log_level_parse(int argc, char **argv)
|
|
|
|
{
|
|
|
|
int opt;
|
|
|
|
char **argvopt;
|
|
|
|
int option_index;
|
2015-10-19 21:13:10 +08:00
|
|
|
const int old_optind = optind;
|
|
|
|
const int old_optopt = optopt;
|
|
|
|
const int old_optreset = optreset;
|
|
|
|
char * const old_optarg = optarg;
|
2020-06-29 15:37:32 +03:00
|
|
|
struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2015-06-08 16:55:52 -05:00
|
|
|
|
|
|
|
argvopt = argv;
|
2015-10-19 21:13:10 +08:00
|
|
|
optind = 1;
|
|
|
|
optreset = 1;
|
2015-06-08 16:55:52 -05:00
|
|
|
|
|
|
|
while ((opt = getopt_long(argc, argvopt, eal_short_options,
|
|
|
|
eal_long_options, &option_index)) != EOF) {
|
|
|
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* getopt is not happy, stop right now */
|
|
|
|
if (opt == '?')
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = (opt == OPT_LOG_LEVEL_NUM) ?
|
2020-06-29 15:37:32 +03:00
|
|
|
eal_parse_common_option(opt, optarg, internal_conf) : 0;
|
2015-06-08 16:55:52 -05:00
|
|
|
|
|
|
|
/* common parser is not happy */
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-10-19 21:13:10 +08:00
|
|
|
/* restore getopt lib */
|
|
|
|
optind = old_optind;
|
|
|
|
optopt = old_optopt;
|
|
|
|
optreset = old_optreset;
|
|
|
|
optarg = old_optarg;
|
2015-06-08 16:55:52 -05:00
|
|
|
}
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
/* Parse the argument given in the command line of the application */
|
|
|
|
static int
|
|
|
|
eal_parse_args(int argc, char **argv)
|
|
|
|
{
|
2014-11-20 22:57:22 +01:00
|
|
|
int opt, ret;
|
2014-02-10 11:49:10 +00:00
|
|
|
char **argvopt;
|
|
|
|
int option_index;
|
|
|
|
char *prgname = argv[0];
|
2015-10-19 21:13:10 +08:00
|
|
|
const int old_optind = optind;
|
|
|
|
const int old_optopt = optopt;
|
|
|
|
const int old_optreset = optreset;
|
|
|
|
char * const old_optarg = optarg;
|
2020-06-29 15:37:32 +03:00
|
|
|
struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
argvopt = argv;
|
2015-10-19 21:13:10 +08:00
|
|
|
optind = 1;
|
|
|
|
optreset = 1;
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2014-09-22 10:37:59 +02:00
|
|
|
while ((opt = getopt_long(argc, argvopt, eal_short_options,
|
|
|
|
eal_long_options, &option_index)) != EOF) {
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-04-30 17:01:35 +01:00
|
|
|
/* getopt didn't recognise the option */
|
2015-01-29 17:51:17 +01:00
|
|
|
if (opt == '?') {
|
|
|
|
eal_usage(prgname);
|
2015-10-19 21:13:10 +08:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
2015-01-29 17:51:17 +01:00
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
ret = eal_parse_common_option(opt, optarg, internal_conf);
|
2014-09-22 10:37:59 +02:00
|
|
|
/* common parser is not happy */
|
|
|
|
if (ret < 0) {
|
|
|
|
eal_usage(prgname);
|
2015-10-19 21:13:10 +08:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
2014-09-22 10:37:59 +02:00
|
|
|
}
|
|
|
|
/* common parser handled this option */
|
2014-11-17 10:14:10 +01:00
|
|
|
if (ret == 0)
|
2014-09-22 10:37:59 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (opt) {
|
2017-10-06 13:15:29 +05:30
|
|
|
case OPT_MBUF_POOL_OPS_NAME_NUM:
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
{
|
|
|
|
char *ops_name = strdup(optarg);
|
|
|
|
if (ops_name == NULL)
|
|
|
|
RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
|
|
|
|
else {
|
|
|
|
/* free old ops name */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->user_mbuf_pool_ops_name !=
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
NULL)
|
2020-06-29 15:37:32 +03:00
|
|
|
free(internal_conf->user_mbuf_pool_ops_name);
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
internal_conf->user_mbuf_pool_ops_name =
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
ops_name;
|
|
|
|
}
|
2017-10-06 13:15:29 +05:30
|
|
|
break;
|
eal: fix strdup usages in internal config
Currently, we use strdup in a few places to store command-line
parameter values for certain internal config values. There are
several issues with that.
First of all, they're never freed, so memory ends up leaking
either after EAL exit, or when these command-line options are
supplied multiple times.
Second of all, they're defined as `const char *`, so they
*cannot* be freed even if we wanted to.
Finally, strdup may return NULL, which will be stored in the
config. For most fields, NULL is a valid value, but for the
default prefix, the value is always expected to be valid.
To fix all of this, three things are done. First, we change
the definitions of these values to `char *` as opposed to
`const char *`. This does not break the ABI, and previous
code assumes constness (which is more restrictive), so it's
safe to do so.
Then, fix all usages of strdup to check return value, and add
a cleanup function that will free the memory occupied by
these strings, as well as freeing them before assigning a new
value to prevent leaks when parameter is specified multiple
times.
And finally, add an internal API to query hugefile prefix, so
that, absent of a valid value, a default value will be
returned, and also fix up all usages of hugefile prefix to
use this API instead of accessing hugefile prefix directly.
Bugzilla ID: 108
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-01-10 13:38:59 +00:00
|
|
|
}
|
2015-01-29 17:51:17 +01:00
|
|
|
case 'h':
|
|
|
|
eal_usage(prgname);
|
|
|
|
exit(EXIT_SUCCESS);
|
2014-02-10 11:49:10 +00:00
|
|
|
default:
|
2014-09-22 10:38:00 +02:00
|
|
|
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
|
2014-09-22 10:37:59 +02:00
|
|
|
RTE_LOG(ERR, EAL, "Option %c is not supported "
|
|
|
|
"on FreeBSD\n", opt);
|
2014-09-22 10:38:00 +02:00
|
|
|
} else if (opt >= OPT_LONG_MIN_NUM &&
|
|
|
|
opt < OPT_LONG_MAX_NUM) {
|
|
|
|
RTE_LOG(ERR, EAL, "Option %s is not supported "
|
|
|
|
"on FreeBSD\n",
|
|
|
|
eal_long_options[option_index].name);
|
2014-09-22 10:37:59 +02:00
|
|
|
} else {
|
|
|
|
RTE_LOG(ERR, EAL, "Option %d is not supported "
|
|
|
|
"on FreeBSD\n", opt);
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
eal_usage(prgname);
|
2015-10-19 21:13:10 +08:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-18 11:53:42 +01:00
|
|
|
/* create runtime data directory */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->no_shconf == 0 &&
|
2018-07-18 11:53:42 +01:00
|
|
|
eal_create_runtime_dir() < 0) {
|
|
|
|
RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
|
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (eal_adjust_config(internal_conf) != 0) {
|
2015-10-19 21:13:10 +08:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2014-11-17 10:14:10 +01:00
|
|
|
/* sanity checks */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (eal_check_common_options(internal_conf) != 0) {
|
2014-04-25 13:59:41 +02:00
|
|
|
eal_usage(prgname);
|
2015-10-19 21:13:10 +08:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (optind >= 0)
|
|
|
|
argv[optind-1] = prgname;
|
|
|
|
ret = optind-1;
|
2015-10-19 21:13:10 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
/* restore getopt lib */
|
|
|
|
optind = old_optind;
|
|
|
|
optopt = old_optopt;
|
|
|
|
optreset = old_optreset;
|
|
|
|
optarg = old_optarg;
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-04-11 13:30:04 +01:00
|
|
|
static int
|
mem: replace memseg with memseg lists
Before, we were aggregating multiple pages into one memseg, so the
number of memsegs was small. Now, each page gets its own memseg,
so the list of memsegs is huge. To accommodate the new memseg list
size and to keep the under-the-hood workings sane, the memseg list
is now not just a single list, but multiple lists. To be precise,
each hugepage size available on the system gets one or more memseg
lists, per socket.
In order to support dynamic memory allocation, we reserve all
memory in advance (unless we're in 32-bit legacy mode, in which
case we do not preallocate memory). As in, we do an anonymous
mmap() of the entire maximum size of memory per hugepage size, per
socket (which is limited to either RTE_MAX_MEMSEG_PER_TYPE pages or
RTE_MAX_MEM_MB_PER_TYPE megabytes worth of memory, whichever is the
smaller one), split over multiple lists (which are limited to
either RTE_MAX_MEMSEG_PER_LIST memsegs or RTE_MAX_MEM_MB_PER_LIST
megabytes per list, whichever is the smaller one). There is also
a global limit of CONFIG_RTE_MAX_MEM_MB megabytes, which is mainly
used for 32-bit targets to limit amounts of preallocated memory,
but can be used to place an upper limit on total amount of VA
memory that can be allocated by DPDK application.
So, for each hugepage size, we get (by default) up to 128G worth
of memory, per socket, split into chunks of up to 32G in size.
The address space is claimed at the start, in eal_common_memory.c.
The actual page allocation code is in eal_memalloc.c (Linux-only),
and largely consists of copied EAL memory init code.
Pages in the list are also indexed by address. That is, in order
to figure out where the page belongs, one can simply look at base
address for a memseg list. Similarly, figuring out IOVA address
of a memzone is a matter of finding the right memseg list, getting
offset and dividing by page size to get the appropriate memseg.
This commit also removes rte_eal_dump_physmem_layout() call,
according to deprecation notice [1], and removes that deprecation
notice as well.
On 32-bit targets due to limited VA space, DPDK will no longer
spread memory to different sockets like before. Instead, it will
(by default) allocate all of the memory on socket where master
lcore is. To override this behavior, --socket-mem must be used.
The rest of the changes are really ripple effects from the memseg
change - heap changes, compile fixes, and rewrites to support
fbarray-backed memseg lists. Due to earlier switch to _walk()
functions, most of the changes are simple fixes, however some
of the _walk() calls were switched to memseg list walk, where
it made sense to do so.
Additionally, we are also switching locks from flock() to fcntl().
Down the line, we will be introducing single-file segments option,
and we cannot use flock() locks to lock parts of the file. Therefore,
we will use fcntl() locks for legacy mem as well, in case someone is
unfortunate enough to accidentally start legacy mem primary process
alongside an already working non-legacy mem-based primary process.
[1] http://dpdk.org/dev/patchwork/patch/34002/
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
2018-04-11 13:30:24 +01:00
|
|
|
check_socket(const struct rte_memseg_list *msl, void *arg)
|
2018-04-11 13:30:04 +01:00
|
|
|
{
|
|
|
|
int *socket_id = arg;
|
|
|
|
|
2018-10-02 14:34:40 +01:00
|
|
|
if (msl->external)
|
|
|
|
return 0;
|
|
|
|
|
mem: replace memseg with memseg lists
Before, we were aggregating multiple pages into one memseg, so the
number of memsegs was small. Now, each page gets its own memseg,
so the list of memsegs is huge. To accommodate the new memseg list
size and to keep the under-the-hood workings sane, the memseg list
is now not just a single list, but multiple lists. To be precise,
each hugepage size available on the system gets one or more memseg
lists, per socket.
In order to support dynamic memory allocation, we reserve all
memory in advance (unless we're in 32-bit legacy mode, in which
case we do not preallocate memory). As in, we do an anonymous
mmap() of the entire maximum size of memory per hugepage size, per
socket (which is limited to either RTE_MAX_MEMSEG_PER_TYPE pages or
RTE_MAX_MEM_MB_PER_TYPE megabytes worth of memory, whichever is the
smaller one), split over multiple lists (which are limited to
either RTE_MAX_MEMSEG_PER_LIST memsegs or RTE_MAX_MEM_MB_PER_LIST
megabytes per list, whichever is the smaller one). There is also
a global limit of CONFIG_RTE_MAX_MEM_MB megabytes, which is mainly
used for 32-bit targets to limit amounts of preallocated memory,
but can be used to place an upper limit on total amount of VA
memory that can be allocated by DPDK application.
So, for each hugepage size, we get (by default) up to 128G worth
of memory, per socket, split into chunks of up to 32G in size.
The address space is claimed at the start, in eal_common_memory.c.
The actual page allocation code is in eal_memalloc.c (Linux-only),
and largely consists of copied EAL memory init code.
Pages in the list are also indexed by address. That is, in order
to figure out where the page belongs, one can simply look at base
address for a memseg list. Similarly, figuring out IOVA address
of a memzone is a matter of finding the right memseg list, getting
offset and dividing by page size to get the appropriate memseg.
This commit also removes rte_eal_dump_physmem_layout() call,
according to deprecation notice [1], and removes that deprecation
notice as well.
On 32-bit targets due to limited VA space, DPDK will no longer
spread memory to different sockets like before. Instead, it will
(by default) allocate all of the memory on socket where master
lcore is. To override this behavior, --socket-mem must be used.
The rest of the changes are really ripple effects from the memseg
change - heap changes, compile fixes, and rewrites to support
fbarray-backed memseg lists. Due to earlier switch to _walk()
functions, most of the changes are simple fixes, however some
of the _walk() calls were switched to memseg list walk, where
it made sense to do so.
Additionally, we are also switching locks from flock() to fcntl().
Down the line, we will be introducing single-file segments option,
and we cannot use flock() locks to lock parts of the file. Therefore,
we will use fcntl() locks for legacy mem as well, in case someone is
unfortunate enough to accidentally start legacy mem primary process
alongside an already working non-legacy mem-based primary process.
[1] http://dpdk.org/dev/patchwork/patch/34002/
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
2018-04-11 13:30:24 +01:00
|
|
|
if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
|
2018-04-11 13:30:04 +01:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
static void
|
|
|
|
eal_check_mem_on_local_socket(void)
|
|
|
|
{
|
2018-04-11 13:30:04 +01:00
|
|
|
int socket_id;
|
2020-06-29 15:37:32 +03:00
|
|
|
const struct rte_config *config = rte_eal_get_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-10-15 15:57:19 -07:00
|
|
|
socket_id = rte_lcore_to_socket_id(config->main_lcore);
|
2014-02-10 11:49:10 +00:00
|
|
|
|
mem: replace memseg with memseg lists
Before, we were aggregating multiple pages into one memseg, so the
number of memsegs was small. Now, each page gets its own memseg,
so the list of memsegs is huge. To accommodate the new memseg list
size and to keep the under-the-hood workings sane, the memseg list
is now not just a single list, but multiple lists. To be precise,
each hugepage size available on the system gets one or more memseg
lists, per socket.
In order to support dynamic memory allocation, we reserve all
memory in advance (unless we're in 32-bit legacy mode, in which
case we do not preallocate memory). As in, we do an anonymous
mmap() of the entire maximum size of memory per hugepage size, per
socket (which is limited to either RTE_MAX_MEMSEG_PER_TYPE pages or
RTE_MAX_MEM_MB_PER_TYPE megabytes worth of memory, whichever is the
smaller one), split over multiple lists (which are limited to
either RTE_MAX_MEMSEG_PER_LIST memsegs or RTE_MAX_MEM_MB_PER_LIST
megabytes per list, whichever is the smaller one). There is also
a global limit of CONFIG_RTE_MAX_MEM_MB megabytes, which is mainly
used for 32-bit targets to limit amounts of preallocated memory,
but can be used to place an upper limit on total amount of VA
memory that can be allocated by DPDK application.
So, for each hugepage size, we get (by default) up to 128G worth
of memory, per socket, split into chunks of up to 32G in size.
The address space is claimed at the start, in eal_common_memory.c.
The actual page allocation code is in eal_memalloc.c (Linux-only),
and largely consists of copied EAL memory init code.
Pages in the list are also indexed by address. That is, in order
to figure out where the page belongs, one can simply look at base
address for a memseg list. Similarly, figuring out IOVA address
of a memzone is a matter of finding the right memseg list, getting
offset and dividing by page size to get the appropriate memseg.
This commit also removes rte_eal_dump_physmem_layout() call,
according to deprecation notice [1], and removes that deprecation
notice as well.
On 32-bit targets due to limited VA space, DPDK will no longer
spread memory to different sockets like before. Instead, it will
(by default) allocate all of the memory on socket where master
lcore is. To override this behavior, --socket-mem must be used.
The rest of the changes are really ripple effects from the memseg
change - heap changes, compile fixes, and rewrites to support
fbarray-backed memseg lists. Due to earlier switch to _walk()
functions, most of the changes are simple fixes, however some
of the _walk() calls were switched to memseg list walk, where
it made sense to do so.
Additionally, we are also switching locks from flock() to fcntl().
Down the line, we will be introducing single-file segments option,
and we cannot use flock() locks to lock parts of the file. Therefore,
we will use fcntl() locks for legacy mem as well, in case someone is
unfortunate enough to accidentally start legacy mem primary process
alongside an already working non-legacy mem-based primary process.
[1] http://dpdk.org/dev/patchwork/patch/34002/
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
2018-04-11 13:30:24 +01:00
|
|
|
if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
|
2020-10-15 15:57:19 -07:00
|
|
|
RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n");
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
mem: replace memseg with memseg lists
Before, we were aggregating multiple pages into one memseg, so the
number of memsegs was small. Now, each page gets its own memseg,
so the list of memsegs is huge. To accommodate the new memseg list
size and to keep the under-the-hood workings sane, the memseg list
is now not just a single list, but multiple lists. To be precise,
each hugepage size available on the system gets one or more memseg
lists, per socket.
In order to support dynamic memory allocation, we reserve all
memory in advance (unless we're in 32-bit legacy mode, in which
case we do not preallocate memory). As in, we do an anonymous
mmap() of the entire maximum size of memory per hugepage size, per
socket (which is limited to either RTE_MAX_MEMSEG_PER_TYPE pages or
RTE_MAX_MEM_MB_PER_TYPE megabytes worth of memory, whichever is the
smaller one), split over multiple lists (which are limited to
either RTE_MAX_MEMSEG_PER_LIST memsegs or RTE_MAX_MEM_MB_PER_LIST
megabytes per list, whichever is the smaller one). There is also
a global limit of CONFIG_RTE_MAX_MEM_MB megabytes, which is mainly
used for 32-bit targets to limit amounts of preallocated memory,
but can be used to place an upper limit on total amount of VA
memory that can be allocated by DPDK application.
So, for each hugepage size, we get (by default) up to 128G worth
of memory, per socket, split into chunks of up to 32G in size.
The address space is claimed at the start, in eal_common_memory.c.
The actual page allocation code is in eal_memalloc.c (Linux-only),
and largely consists of copied EAL memory init code.
Pages in the list are also indexed by address. That is, in order
to figure out where the page belongs, one can simply look at base
address for a memseg list. Similarly, figuring out IOVA address
of a memzone is a matter of finding the right memseg list, getting
offset and dividing by page size to get the appropriate memseg.
This commit also removes rte_eal_dump_physmem_layout() call,
according to deprecation notice [1], and removes that deprecation
notice as well.
On 32-bit targets due to limited VA space, DPDK will no longer
spread memory to different sockets like before. Instead, it will
(by default) allocate all of the memory on socket where master
lcore is. To override this behavior, --socket-mem must be used.
The rest of the changes are really ripple effects from the memseg
change - heap changes, compile fixes, and rewrites to support
fbarray-backed memseg lists. Due to earlier switch to _walk()
functions, most of the changes are simple fixes, however some
of the _walk() calls were switched to memseg list walk, where
it made sense to do so.
Additionally, we are also switching locks from flock() to fcntl().
Down the line, we will be introducing single-file segments option,
and we cannot use flock() locks to lock parts of the file. Therefore,
we will use fcntl() locks for legacy mem as well, in case someone is
unfortunate enough to accidentally start legacy mem primary process
alongside an already working non-legacy mem-based primary process.
[1] http://dpdk.org/dev/patchwork/patch/34002/
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
2018-04-11 13:30:24 +01:00
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
static int
|
2020-02-09 16:54:54 +01:00
|
|
|
sync_func(__rte_unused void *arg)
|
2014-02-10 11:49:10 +00:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2014-05-02 16:42:52 -07:00
|
|
|
/* Abstraction for port I/0 privilege */
|
2014-08-26 16:11:39 +02:00
|
|
|
int
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_eal_iopl_init(void)
|
|
|
|
{
|
2018-11-23 18:39:19 +03:00
|
|
|
static int fd = -1;
|
|
|
|
|
|
|
|
if (fd < 0)
|
|
|
|
fd = open("/dev/io", O_RDWR);
|
2015-02-14 09:59:07 -05:00
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
2015-04-16 14:52:07 +03:00
|
|
|
/* keep fd open for iopl */
|
2014-02-10 11:49:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-03-22 16:19:27 -04:00
|
|
|
static void rte_eal_init_alert(const char *msg)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "EAL: FATAL: %s\n", msg);
|
|
|
|
RTE_LOG(ERR, EAL, "%s\n", msg);
|
|
|
|
}
|
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
/* Launch threads, called at application init(). */
|
|
|
|
int
|
|
|
|
rte_eal_init(int argc, char **argv)
|
|
|
|
{
|
|
|
|
int i, fctret, ret;
|
|
|
|
pthread_t thread_id;
|
2020-09-24 13:39:25 +08:00
|
|
|
static uint32_t run_once;
|
|
|
|
uint32_t has_run = 0;
|
2015-02-17 10:08:07 +08:00
|
|
|
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
|
2015-07-28 17:51:44 -07:00
|
|
|
char thread_name[RTE_MAX_THREAD_NAME_LEN];
|
2020-06-29 15:37:32 +03:00
|
|
|
const struct rte_config *config = rte_eal_get_configuration();
|
|
|
|
struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2016-09-23 11:47:31 -03:00
|
|
|
/* checks if the machine is adequate */
|
2017-03-22 16:19:28 -04:00
|
|
|
if (!rte_cpu_is_supported()) {
|
|
|
|
rte_eal_init_alert("unsupported cpu type.");
|
|
|
|
rte_errno = ENOTSUP;
|
|
|
|
return -1;
|
|
|
|
}
|
2016-09-23 11:47:31 -03:00
|
|
|
|
2020-09-24 13:39:25 +08:00
|
|
|
if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0,
|
|
|
|
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
|
2017-03-22 16:19:32 -04:00
|
|
|
rte_eal_init_alert("already called initialization.");
|
|
|
|
rte_errno = EALREADY;
|
2014-02-10 11:49:10 +00:00
|
|
|
return -1;
|
2017-03-22 16:19:32 -04:00
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
thread_id = pthread_self();
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
eal_reset_internal_config(internal_conf);
|
2017-04-18 16:22:21 +02:00
|
|
|
|
eal: add telemetry callbacks
EAL now registers commands to provide some basic info from EAL.
Example:
Connecting to /var/run/dpdk/rte/dpdk_telemetry.v2
{"version": "DPDK 20.05.0-rc0", "pid": 72662, "max_output_len": 16384}
--> /
{"/": ["/", "/eal/app_params", "/eal/params", "/ethdev/link_status", \
"/ethdev/list", "/ethdev/xstats", "/help", "/info", "/rawdev/list", \
"/rawdev/xstats"]}
--> /eal/app_params
{"/eal/app_params": ["-i"]}
--> /eal/params
{"/eal/params": ["./app/dpdk-testpmd"]}
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>
Reviewed-by: Keith Wiles <keith.wiles@intel.com>
2020-04-30 17:01:36 +01:00
|
|
|
/* clone argv to report out later in telemetry */
|
|
|
|
eal_save_args(argc, argv);
|
|
|
|
|
2015-06-08 16:55:52 -05:00
|
|
|
/* set log level as early as possible */
|
2017-04-04 18:40:36 +02:00
|
|
|
eal_log_level_parse(argc, argv);
|
2015-06-08 16:55:52 -05:00
|
|
|
|
2017-03-22 16:19:27 -04:00
|
|
|
if (rte_eal_cpu_init() < 0) {
|
|
|
|
rte_eal_init_alert("Cannot detect lcores.");
|
|
|
|
rte_errno = ENOTSUP;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-10-08 10:43:31 +02:00
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
fctret = eal_parse_args(argc, argv);
|
2017-03-22 16:19:30 -04:00
|
|
|
if (fctret < 0) {
|
|
|
|
rte_eal_init_alert("Invalid 'command line' arguments.");
|
|
|
|
rte_errno = EINVAL;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2017-03-22 16:19:30 -04:00
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2018-04-11 13:30:22 +01:00
|
|
|
/* FreeBSD always uses legacy memory model */
|
2020-06-29 15:37:32 +03:00
|
|
|
internal_conf->legacy_mem = true;
|
2018-04-11 13:30:22 +01:00
|
|
|
|
2017-10-13 11:55:01 +00:00
|
|
|
if (eal_plugins_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init plugins");
|
2017-10-13 11:55:01 +00:00
|
|
|
rte_errno = EINVAL;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2017-10-13 11:55:01 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-04-23 00:33:32 +05:30
|
|
|
if (eal_trace_init() < 0) {
|
|
|
|
rte_eal_init_alert("Cannot init trace");
|
|
|
|
rte_errno = EFAULT;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2020-04-23 00:33:32 +05:30
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-10-06 16:33:43 +05:30
|
|
|
if (eal_option_device_parse()) {
|
|
|
|
rte_errno = ENODEV;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2017-10-06 16:33:43 +05:30
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-06-10 10:08:29 +03:00
|
|
|
if (rte_config_init() < 0) {
|
|
|
|
rte_eal_init_alert("Cannot init config");
|
|
|
|
return -1;
|
|
|
|
}
|
2018-04-24 05:51:20 +00:00
|
|
|
|
2018-06-26 11:53:16 +01:00
|
|
|
if (rte_eal_intr_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init interrupt-handling thread");
|
2018-06-26 11:53:16 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-04-01 16:18:14 +02:00
|
|
|
if (rte_eal_alarm_init() < 0) {
|
2019-06-05 15:18:29 +08:00
|
|
|
rte_eal_init_alert("Cannot init alarm");
|
2019-04-01 16:18:14 +02:00
|
|
|
/* rte_eal_alarm_init sets rte_errno on failure. */
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-04-24 05:51:20 +00:00
|
|
|
/* Put mp channel init before bus scan so that we can init the vdev
|
|
|
|
* bus through mp channel in the secondary process before the bus scan.
|
|
|
|
*/
|
2019-04-25 13:45:14 +01:00
|
|
|
if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("failed to init mp channel");
|
2018-04-24 05:51:20 +00:00
|
|
|
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
|
|
|
|
rte_errno = EFAULT;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-06 16:33:43 +05:30
|
|
|
if (rte_bus_scan()) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot scan the buses for devices");
|
2017-10-06 16:33:43 +05:30
|
|
|
rte_errno = ENODEV;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2017-10-06 16:33:43 +05:30
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-10-03 16:53:13 -04:00
|
|
|
/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->iova_mode == RTE_IOVA_DC) {
|
2018-10-03 16:53:13 -04:00
|
|
|
/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
|
eal: compute IOVA mode based on PA availability
Currently, if the bus selects IOVA as PA, the memory init can fail when
lacking access to physical addresses.
This can be quite hard for normal users to understand what is wrong
since this is the default behavior.
Catch this situation earlier in eal init by validating physical addresses
availability, or select IOVA when no clear preferrence had been expressed.
The bus code is changed so that it reports when it does not care about
the IOVA mode and let the eal init decide.
In Linux implementation, rework rte_eal_using_phys_addrs() so that it can
be called earlier but still avoid a circular dependency with
rte_mem_virt2phys().
In FreeBSD implementation, rte_eal_using_phys_addrs() always returns
false, so the detection part is left as is.
If librte_kni is compiled in and the KNI kmod is loaded,
- if the buses requested VA, force to PA if physical addresses are
available as it was done before,
- else, keep iova as VA, KNI init will fail later.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-06-14 11:39:16 +02:00
|
|
|
enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
|
|
|
|
|
|
|
|
if (iova_mode == RTE_IOVA_DC)
|
|
|
|
iova_mode = RTE_IOVA_PA;
|
|
|
|
rte_eal_get_configuration()->iova_mode = iova_mode;
|
2018-10-03 16:53:13 -04:00
|
|
|
} else {
|
|
|
|
rte_eal_get_configuration()->iova_mode =
|
2020-06-29 15:37:32 +03:00
|
|
|
internal_conf->iova_mode;
|
2018-10-03 16:53:13 -04:00
|
|
|
}
|
2017-10-06 16:33:43 +05:30
|
|
|
|
eal: compute IOVA mode based on PA availability
Currently, if the bus selects IOVA as PA, the memory init can fail when
lacking access to physical addresses.
This can be quite hard for normal users to understand what is wrong
since this is the default behavior.
Catch this situation earlier in eal init by validating physical addresses
availability, or select IOVA when no clear preferrence had been expressed.
The bus code is changed so that it reports when it does not care about
the IOVA mode and let the eal init decide.
In Linux implementation, rework rte_eal_using_phys_addrs() so that it can
be called earlier but still avoid a circular dependency with
rte_mem_virt2phys().
In FreeBSD implementation, rte_eal_using_phys_addrs() always returns
false, so the detection part is left as is.
If librte_kni is compiled in and the KNI kmod is loaded,
- if the buses requested VA, force to PA if physical addresses are
available as it was done before,
- else, keep iova as VA, KNI init will fail later.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
2019-06-14 11:39:16 +02:00
|
|
|
RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
|
|
|
|
rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->no_hugetlbfs == 0) {
|
2018-04-11 13:30:33 +01:00
|
|
|
/* rte_config isn't initialized yet */
|
2020-06-29 15:37:32 +03:00
|
|
|
ret = internal_conf->process_type == RTE_PROC_PRIMARY ?
|
2018-04-11 13:30:33 +01:00
|
|
|
eal_hugepage_info_init() :
|
|
|
|
eal_hugepage_info_read();
|
|
|
|
if (ret < 0) {
|
|
|
|
rte_eal_init_alert("Cannot get hugepage information.");
|
|
|
|
rte_errno = EACCES;
|
2020-09-24 13:39:25 +08:00
|
|
|
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
|
2018-04-11 13:30:33 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2017-03-22 16:19:29 -04:00
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) {
|
|
|
|
if (internal_conf->no_hugetlbfs)
|
|
|
|
internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE;
|
2014-02-10 11:49:10 +00:00
|
|
|
else
|
2020-06-29 15:37:32 +03:00
|
|
|
internal_conf->memory = eal_get_hugepage_mem_size();
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
2020-06-29 15:37:32 +03:00
|
|
|
if (internal_conf->vmware_tsc_map == 1) {
|
2014-02-10 11:49:10 +00:00
|
|
|
#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
|
|
|
|
rte_cycles_vmware_tsc_map = 1;
|
|
|
|
RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
|
|
|
|
"you must have monitor_control.pseudo_perfctr = TRUE\n");
|
|
|
|
#else
|
|
|
|
RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
|
|
|
|
"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2018-04-11 13:30:25 +01:00
|
|
|
/* in secondary processes, memory init may allocate additional fbarrays
|
|
|
|
* not present in primary processes, so to avoid any potential issues,
|
|
|
|
* initialize memzones first.
|
|
|
|
*/
|
|
|
|
if (rte_eal_memzone_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init memzone");
|
2018-04-11 13:30:25 +01:00
|
|
|
rte_errno = ENODEV;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-03-22 16:19:33 -04:00
|
|
|
if (rte_eal_memory_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init memory");
|
2017-03-22 16:19:33 -04:00
|
|
|
rte_errno = ENOMEM;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2018-04-11 13:30:25 +01:00
|
|
|
if (rte_eal_malloc_heap_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init malloc heap");
|
2017-03-22 16:19:31 -04:00
|
|
|
rte_errno = ENODEV;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2017-03-22 16:19:33 -04:00
|
|
|
if (rte_eal_tailqs_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init tail queues for objects");
|
2017-03-22 16:19:33 -04:00
|
|
|
rte_errno = EFAULT;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
2017-03-22 16:19:34 -04:00
|
|
|
if (rte_eal_timer_init() < 0) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot init HPET or TSC timers");
|
2017-03-22 16:19:34 -04:00
|
|
|
rte_errno = ENOTSUP;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
eal_check_mem_on_local_socket();
|
|
|
|
|
2020-07-06 22:52:27 +02:00
|
|
|
if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
|
2020-10-15 15:57:19 -07:00
|
|
|
&lcore_config[config->main_lcore].cpuset) != 0) {
|
2020-07-06 22:52:27 +02:00
|
|
|
rte_eal_init_alert("Cannot set affinity");
|
|
|
|
rte_errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
2020-10-15 15:57:19 -07:00
|
|
|
__rte_thread_init(config->main_lcore,
|
|
|
|
&lcore_config[config->main_lcore].cpuset);
|
2015-02-17 10:08:05 +08:00
|
|
|
|
2020-07-06 22:52:32 +02:00
|
|
|
ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
|
2015-02-17 10:08:07 +08:00
|
|
|
|
2020-10-15 15:57:19 -07:00
|
|
|
RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
|
|
|
|
config->main_lcore, thread_id, cpuset,
|
2015-02-17 10:08:07 +08:00
|
|
|
ret == 0 ? "" : "...");
|
|
|
|
|
2020-10-15 15:57:19 -07:00
|
|
|
RTE_LCORE_FOREACH_WORKER(i) {
|
2014-02-10 11:49:10 +00:00
|
|
|
|
|
|
|
/*
|
2020-10-15 15:57:19 -07:00
|
|
|
* create communication pipes between main thread
|
2014-02-10 11:49:10 +00:00
|
|
|
* and children
|
|
|
|
*/
|
2020-10-15 15:57:19 -07:00
|
|
|
if (pipe(lcore_config[i].pipe_main2worker) < 0)
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_panic("Cannot create pipe\n");
|
2020-10-15 15:57:19 -07:00
|
|
|
if (pipe(lcore_config[i].pipe_worker2main) < 0)
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_panic("Cannot create pipe\n");
|
|
|
|
|
|
|
|
lcore_config[i].state = WAIT;
|
|
|
|
|
|
|
|
/* create a thread for each lcore */
|
|
|
|
ret = pthread_create(&lcore_config[i].thread_id, NULL,
|
|
|
|
eal_thread_loop, NULL);
|
|
|
|
if (ret != 0)
|
|
|
|
rte_panic("Cannot create thread\n");
|
2015-07-28 17:51:44 -07:00
|
|
|
|
|
|
|
/* Set thread_name for aid in debugging. */
|
2018-04-24 16:46:47 +02:00
|
|
|
snprintf(thread_name, sizeof(thread_name),
|
2020-10-15 15:57:19 -07:00
|
|
|
"lcore-worker-%d", i);
|
2016-06-17 14:48:16 +02:00
|
|
|
rte_thread_setname(lcore_config[i].thread_id, thread_name);
|
2020-07-06 22:52:27 +02:00
|
|
|
|
|
|
|
ret = pthread_setaffinity_np(lcore_config[i].thread_id,
|
|
|
|
sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
|
|
|
|
if (ret != 0)
|
|
|
|
rte_panic("Cannot set affinity\n");
|
2014-02-10 11:49:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-10-15 15:57:19 -07:00
|
|
|
* Launch a dummy function on all worker lcores, so that main lcore
|
2014-02-10 11:49:10 +00:00
|
|
|
* knows they are all ready when this function returns.
|
|
|
|
*/
|
2020-10-15 15:57:19 -07:00
|
|
|
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN);
|
2014-02-10 11:49:10 +00:00
|
|
|
rte_eal_mp_wait_lcore();
|
|
|
|
|
2017-07-11 15:19:28 +01:00
|
|
|
/* initialize services so vdevs register service during bus_probe. */
|
|
|
|
ret = rte_service_init();
|
|
|
|
if (ret) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("rte_service_init() failed");
|
2017-07-11 15:19:28 +01:00
|
|
|
rte_errno = ENOEXEC;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-01-18 19:35:22 +05:30
|
|
|
/* Probe all the buses and devices/drivers on them */
|
2017-03-22 16:19:39 -04:00
|
|
|
if (rte_bus_probe()) {
|
2018-10-23 09:29:15 -07:00
|
|
|
rte_eal_init_alert("Cannot probe devices");
|
2017-03-22 16:19:39 -04:00
|
|
|
rte_errno = ENOTSUP;
|
|
|
|
return -1;
|
|
|
|
}
|
2017-01-18 19:35:22 +05:30
|
|
|
|
2017-07-11 15:19:28 +01:00
|
|
|
/* initialize default service/lcore mappings and start running. Ignore
|
|
|
|
* -ENOTSUP, as it indicates no service coremask passed to EAL.
|
|
|
|
*/
|
|
|
|
ret = rte_service_start_with_defaults();
|
|
|
|
if (ret < 0 && ret != -ENOTSUP) {
|
|
|
|
rte_errno = ENOEXEC;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
/*
|
|
|
|
* Clean up unused files in runtime directory. We do this at the end of
|
|
|
|
* init and not at the beginning because we want to clean stuff up
|
|
|
|
* whether we are primary or secondary process, but we cannot remove
|
|
|
|
* primary process' files because secondary should be able to run even
|
|
|
|
* if primary process is dead.
|
2018-12-20 11:11:48 +00:00
|
|
|
*
|
|
|
|
* In no_shconf mode, no runtime directory is created in the first
|
|
|
|
* place, so no cleanup needed.
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
*/
|
2020-06-29 15:37:32 +03:00
|
|
|
if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) {
|
2020-06-10 16:30:24 +02:00
|
|
|
rte_eal_init_alert("Cannot clear runtime directory");
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2020-06-29 15:37:32 +03:00
|
|
|
if (!internal_conf->no_telemetry) {
|
2020-05-22 14:48:39 +01:00
|
|
|
const char *error_str = NULL;
|
2020-04-30 17:01:34 +01:00
|
|
|
if (rte_telemetry_init(rte_eal_get_runtime_dir(),
|
2020-06-29 15:37:32 +03:00
|
|
|
&internal_conf->ctrl_cpuset, &error_str)
|
2020-05-12 16:28:58 +01:00
|
|
|
!= 0) {
|
2020-04-30 17:01:34 +01:00
|
|
|
rte_eal_init_alert(error_str);
|
|
|
|
return -1;
|
|
|
|
}
|
2020-05-22 14:48:39 +01:00
|
|
|
if (error_str != NULL)
|
|
|
|
RTE_LOG(NOTICE, EAL, "%s\n", error_str);
|
2020-04-30 17:01:34 +01:00
|
|
|
}
|
eal: clean up unused files on initialization
When creating process data structures, EAL will create many files
in EAL runtime directory. Because we allow multiple secondary
processes to run, each secondary process gets their own unique
file. With many secondary processes running and exiting on the
system, runtime directory will, over time, create enormous amounts
of sockets, fbarray files and other stuff that just sits there
unused because the process that allocated it has died a long time
ago. This may lead to exhaustion of disk (or RAM) space in the
runtime directory.
Fix this by removing every unlocked file at initialization that
matches either socket or fbarray naming convention. We cannot be
sure of any other files, so we'll leave them alone. Also, remove
similar code from mp socket code.
We do it at the end of init, rather than at the beginning, because
secondary process will use primary process' data structures even
if the primary itself has died, and we don't want to remove those
before we lock them.
Bugzilla ID: 106
Cc: stable@dpdk.org
Reported-by: Vipin Varghese <vipin.varghese@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-11-13 15:54:44 +00:00
|
|
|
|
2019-07-05 14:10:33 +01:00
|
|
|
eal_mcfg_complete();
|
2016-03-09 13:37:24 +00:00
|
|
|
|
2014-02-10 11:49:10 +00:00
|
|
|
return fctret;
|
|
|
|
}
|
|
|
|
|
2019-06-29 13:58:52 +02:00
|
|
|
int
|
2018-01-21 20:48:06 -05:00
|
|
|
rte_eal_cleanup(void)
|
2018-01-29 16:37:30 +00:00
|
|
|
{
|
2020-06-29 15:37:32 +03:00
|
|
|
struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
2018-01-29 16:37:30 +00:00
|
|
|
rte_service_finalize();
|
2018-12-20 20:51:14 +08:00
|
|
|
rte_mp_channel_cleanup();
|
2020-04-23 00:33:32 +05:30
|
|
|
rte_trace_save();
|
|
|
|
eal_trace_fini();
|
2020-06-29 15:37:32 +03:00
|
|
|
eal_cleanup_config(internal_conf);
|
2018-01-29 16:37:30 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-26 12:05:51 +02:00
|
|
|
int rte_eal_create_uio_dev(void)
|
|
|
|
{
|
2020-06-29 15:37:32 +03:00
|
|
|
const struct internal_config *internal_conf =
|
|
|
|
eal_get_internal_configuration();
|
|
|
|
return internal_conf->create_uio_dev;
|
2017-10-26 12:05:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
enum rte_intr_mode
|
|
|
|
rte_eal_vfio_intr_mode(void)
|
|
|
|
{
|
|
|
|
return RTE_INTR_MODE_NONE;
|
|
|
|
}
|
2017-10-26 12:05:52 +02:00
|
|
|
|
2020-07-03 22:57:18 +08:00
|
|
|
void
|
|
|
|
rte_eal_vfio_get_vf_token(__rte_unused rte_uuid_t vf_token)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-11-06 17:08:58 +01:00
|
|
|
int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
|
2017-10-26 12:05:52 +02:00
|
|
|
__rte_unused const char *dev_addr,
|
|
|
|
__rte_unused int *vfio_dev_fd,
|
|
|
|
__rte_unused struct vfio_device_info *device_info)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-11-06 17:08:58 +01:00
|
|
|
int rte_vfio_release_device(__rte_unused const char *sysfs_base,
|
2017-10-26 12:05:52 +02:00
|
|
|
__rte_unused const char *dev_addr,
|
|
|
|
__rte_unused int fd)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-11-06 17:08:58 +01:00
|
|
|
int rte_vfio_enable(__rte_unused const char *modname)
|
2017-10-26 12:05:52 +02:00
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-11-06 17:08:58 +01:00
|
|
|
int rte_vfio_is_enabled(__rte_unused const char *modname)
|
2017-10-26 12:05:52 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-11-06 17:08:58 +01:00
|
|
|
int rte_vfio_noiommu_is_enabled(void)
|
2017-10-26 12:05:52 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2018-01-15 10:41:25 +05:30
|
|
|
|
2018-01-17 19:24:15 +02:00
|
|
|
int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
|
2018-01-15 10:41:25 +05:30
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2018-04-11 13:30:21 +01:00
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-12 11:53:37 +05:30
|
|
|
rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
|
|
|
|
__rte_unused const char *dev_addr,
|
|
|
|
__rte_unused int *iommu_group_num)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-12 11:53:37 +05:30
|
|
|
rte_vfio_get_container_fd(void)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-12 11:53:37 +05:30
|
|
|
rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
2018-04-17 15:06:21 +08:00
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_create(void)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_destroy(__rte_unused int container_fd)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_group_bind(__rte_unused int container_fd,
|
|
|
|
__rte_unused int iommu_group_num)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_group_unbind(__rte_unused int container_fd,
|
|
|
|
__rte_unused int iommu_group_num)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_dma_map(__rte_unused int container_fd,
|
|
|
|
__rte_unused uint64_t vaddr,
|
|
|
|
__rte_unused uint64_t iova,
|
|
|
|
__rte_unused uint64_t len)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-06-08 15:10:11 +05:30
|
|
|
int
|
2018-04-17 15:06:21 +08:00
|
|
|
rte_vfio_container_dma_unmap(__rte_unused int container_fd,
|
|
|
|
__rte_unused uint64_t vaddr,
|
|
|
|
__rte_unused uint64_t iova,
|
|
|
|
__rte_unused uint64_t len)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|