eal: extend --huge-unlink for hugepage file reuse

Expose Linux EAL ability to reuse existing hugepage files
via --huge-unlink=never switch.
Default behavior is unchanged, it can also be specified
using --huge-unlink=existing for consistency.
Old --huge-unlink switch is kept,
it is an alias for --huge-unlink=always.
Add a test case for the --huge-unlink=never mode.

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is contained in:
Dmitry Kozlyuk 2022-02-03 20:13:36 +02:00 committed by David Marchand
parent 32b4771cd8
commit 0dff3f26d6
5 changed files with 99 additions and 6 deletions

View File

@ -1210,6 +1210,11 @@ test_file_prefix(void)
DEFAULT_MEM_SIZE, "--single-file-segments",
"--file-prefix=" memtest1 };
/* primary process with memtest1 and --huge-unlink=never mode */
const char * const argv9[] = {prgname, "-m",
DEFAULT_MEM_SIZE, "--huge-unlink=never",
"--file-prefix=" memtest1 };
/* check if files for current prefix are present */
if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) {
printf("Error - hugepage files for %s were not created!\n", prefix);
@ -1378,6 +1383,25 @@ test_file_prefix(void)
return -1;
}
/* this process will run with --huge-unlink,
* so it should not remove hugepage files when it exits
*/
if (launch_proc(argv9) != 0) {
printf("Error - failed to run with --huge-unlink=never\n");
return -1;
}
/* check if hugefiles for memtest1 are present */
if (process_hugefiles(memtest1, HUGEPAGE_CHECK_EXISTS) == 0) {
printf("Error - hugepage files for %s were deleted!\n",
memtest1);
return -1;
}
if (process_hugefiles(memtest1, HUGEPAGE_DELETE) != 1) {
printf("Error - deleting hugepages failed!\n");
return -1;
}
return 0;
}

View File

@ -84,10 +84,26 @@ Memory-related options
Use specified hugetlbfs directory instead of autodetected ones. This can be
a sub-directory within a hugetlbfs mountpoint.
* ``--huge-unlink``
* ``--huge-unlink[=existing|always|never]``
Unlink hugepage files after creating them (implies no secondary process
support).
No ``--huge-unlink`` option or ``--huge-unlink=existing`` is the default:
existing hugepage files are removed and re-created
to ensure the kernel clears the memory and prevents any data leaks.
With ``--huge-unlink`` (no value) or ``--huge-unlink=always``,
hugepage files are also removed before mapping them,
so that the application leaves no files in hugetlbfs.
This mode implies no multi-process support.
When ``--huge-unlink=never`` is specified, existing hugepage files
are never removed, but are remapped instead, allowing hugepage reuse.
This makes restart faster by saving time to clear memory at initialization,
but it may slow down zeroed allocations later.
Reused hugepages can contain data from previous processes that used them,
which may be a security concern.
Hugepage files created in this mode are also not removed
when all the hugepages mapped from them are freed,
which allows to reuse these files after a restart.
* ``--match-allocations``

View File

@ -283,6 +283,18 @@ to prevent data leaks from previous users of the same hugepage.
EAL ensures this behavior by removing existing backing files at startup
and by recreating them before opening for mapping (as a precaution).
One exception is ``--huge-unlink=never`` mode.
It is used to speed up EAL initialization, usually on application restart.
Clearing memory constitutes more than 95% of hugepage mapping time.
EAL can save it by remapping existing backing files
with all the data left in the mapped hugepages ("dirty" memory).
Such segments are marked with ``RTE_MEMSEG_FLAG_DIRTY``.
Memory allocator detects dirty segments and handles them accordingly,
in particular, it clears memory requested with ``rte_zmalloc*()``.
In this mode EAL also does not remove a backing file
when all pages mapped from it are freed,
because they are intended to be reusable at restart.
Anonymous mapping does not allow multi-process architecture.
This mode does not use hugetlbfs
and thus does not require root permissions for memory management
@ -959,6 +971,7 @@ to be virtually contiguous.
* dirty - this flag is only meaningful when ``state`` is ``FREE``.
It indicates that the content of the element is not fully zero-filled.
Memory from such blocks must be cleared when requested via ``rte_zmalloc*()``.
Dirty elements only appear with ``--huge-unlink=never``.
* pad - this holds the length of the padding present at the start of the block.
In the case of a normal block header, it is added to the address of the end

View File

@ -55,6 +55,13 @@ New Features
Also, make sure to start the actual text at the margin.
=======================================================
* **Added ability to reuse hugepages in Linux.**
It is possible to reuse files in hugetlbfs to speed up hugepage mapping,
which may be useful for fast restart and large allocations.
The new mode is activated with ``--huge-unlink=never``
and has security implications, refer to the user and programmer guides.
* **Updated Cisco enic driver.**
* Added rte_flow support for matching GENEVE packets.

View File

@ -74,7 +74,7 @@ eal_long_options[] = {
{OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM },
{OPT_HELP, 0, NULL, OPT_HELP_NUM },
{OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM },
{OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM },
{OPT_HUGE_UNLINK, 2, NULL, OPT_HUGE_UNLINK_NUM },
{OPT_IOVA_MODE, 1, NULL, OPT_IOVA_MODE_NUM },
{OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
@ -1598,6 +1598,28 @@ available_cores(void)
return str;
}
#define HUGE_UNLINK_NEVER "never"
static int
eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
{
if (arg == NULL || strcmp(arg, "always") == 0) {
out->unlink_before_mapping = true;
return 0;
}
if (strcmp(arg, "existing") == 0) {
/* same as not specifying the option */
return 0;
}
if (strcmp(arg, HUGE_UNLINK_NEVER) == 0) {
RTE_LOG(WARNING, EAL, "Using --"OPT_HUGE_UNLINK"="
HUGE_UNLINK_NEVER" may create data leaks.\n");
out->unlink_existing = false;
return 0;
}
return -1;
}
int
eal_parse_common_option(int opt, const char *optarg,
struct internal_config *conf)
@ -1739,7 +1761,10 @@ eal_parse_common_option(int opt, const char *optarg,
/* long options */
case OPT_HUGE_UNLINK_NUM:
conf->hugepage_file.unlink_before_mapping = true;
if (eal_parse_huge_unlink(optarg, &conf->hugepage_file) < 0) {
RTE_LOG(ERR, EAL, "invalid --"OPT_HUGE_UNLINK" option\n");
return -1;
}
break;
case OPT_NO_HUGE_NUM:
@ -2070,6 +2095,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
"not compatible with --"OPT_HUGE_UNLINK"\n");
return -1;
}
if (!internal_cfg->hugepage_file.unlink_existing &&
internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_IN_MEMORY" is not compatible "
"with --"OPT_HUGE_UNLINK"="HUGE_UNLINK_NEVER"\n");
return -1;
}
if (internal_cfg->legacy_mem &&
internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
@ -2202,7 +2233,9 @@ eal_common_usage(void)
" --"OPT_NO_TELEMETRY" Disable telemetry support\n"
" --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
"\nEAL options for DEBUG use only:\n"
" --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
" --"OPT_HUGE_UNLINK"[=existing|always|never]\n"
" When to unlink files in hugetlbfs\n"
" ('existing' by default, no value means 'always')\n"
" --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
" --"OPT_NO_PCI" Disable PCI\n"
" --"OPT_NO_HPET" Disable HPET\n"