From 0dff3f26d6faad4e51f75e5245f0387ee9bb0c6d Mon Sep 17 00:00:00 2001 From: Dmitry Kozlyuk Date: Thu, 3 Feb 2022 20:13:36 +0200 Subject: [PATCH] eal: extend --huge-unlink for hugepage file reuse Expose Linux EAL ability to reuse existing hugepage files via --huge-unlink=never switch. Default behavior is unchanged, it can also be specified using --huge-unlink=existing for consistency. Old --huge-unlink switch is kept, it is an alias for --huge-unlink=always. Add a test case for the --huge-unlink=never mode. Signed-off-by: Dmitry Kozlyuk Acked-by: Thomas Monjalon Acked-by: Anatoly Burakov --- app/test/test_eal_flags.c | 24 ++++++++++++ doc/guides/linux_gsg/linux_eal_parameters.rst | 22 +++++++++-- .../prog_guide/env_abstraction_layer.rst | 13 +++++++ doc/guides/rel_notes/release_22_03.rst | 7 ++++ lib/eal/common/eal_common_options.c | 39 +++++++++++++++++-- 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c index bc158d7a4a..68deb86aab 100644 --- a/app/test/test_eal_flags.c +++ b/app/test/test_eal_flags.c @@ -1210,6 +1210,11 @@ test_file_prefix(void) DEFAULT_MEM_SIZE, "--single-file-segments", "--file-prefix=" memtest1 }; + /* primary process with memtest1 and --huge-unlink=never mode */ + const char * const argv9[] = {prgname, "-m", + DEFAULT_MEM_SIZE, "--huge-unlink=never", + "--file-prefix=" memtest1 }; + /* check if files for current prefix are present */ if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) { printf("Error - hugepage files for %s were not created!\n", prefix); @@ -1378,6 +1383,25 @@ test_file_prefix(void) return -1; } + /* this process will run with --huge-unlink, + * so it should not remove hugepage files when it exits + */ + if (launch_proc(argv9) != 0) { + printf("Error - failed to run with --huge-unlink=never\n"); + return -1; + } + + /* check if hugefiles for memtest1 are present */ + if (process_hugefiles(memtest1, HUGEPAGE_CHECK_EXISTS) == 0) { + printf("Error - hugepage files for %s were deleted!\n", + memtest1); + return -1; + } + if (process_hugefiles(memtest1, HUGEPAGE_DELETE) != 1) { + printf("Error - deleting hugepages failed!\n"); + return -1; + } + return 0; } diff --git a/doc/guides/linux_gsg/linux_eal_parameters.rst b/doc/guides/linux_gsg/linux_eal_parameters.rst index 74df2611b5..ea8f381391 100644 --- a/doc/guides/linux_gsg/linux_eal_parameters.rst +++ b/doc/guides/linux_gsg/linux_eal_parameters.rst @@ -84,10 +84,26 @@ Memory-related options Use specified hugetlbfs directory instead of autodetected ones. This can be a sub-directory within a hugetlbfs mountpoint. -* ``--huge-unlink`` +* ``--huge-unlink[=existing|always|never]`` - Unlink hugepage files after creating them (implies no secondary process - support). + No ``--huge-unlink`` option or ``--huge-unlink=existing`` is the default: + existing hugepage files are removed and re-created + to ensure the kernel clears the memory and prevents any data leaks. + + With ``--huge-unlink`` (no value) or ``--huge-unlink=always``, + hugepage files are also removed before mapping them, + so that the application leaves no files in hugetlbfs. + This mode implies no multi-process support. + + When ``--huge-unlink=never`` is specified, existing hugepage files + are never removed, but are remapped instead, allowing hugepage reuse. + This makes restart faster by saving time to clear memory at initialization, + but it may slow down zeroed allocations later. + Reused hugepages can contain data from previous processes that used them, + which may be a security concern. + Hugepage files created in this mode are also not removed + when all the hugepages mapped from them are freed, + which allows to reuse these files after a restart. * ``--match-allocations`` diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst index 3391e29329..5f0748fba1 100644 --- a/doc/guides/prog_guide/env_abstraction_layer.rst +++ b/doc/guides/prog_guide/env_abstraction_layer.rst @@ -283,6 +283,18 @@ to prevent data leaks from previous users of the same hugepage. EAL ensures this behavior by removing existing backing files at startup and by recreating them before opening for mapping (as a precaution). +One exception is ``--huge-unlink=never`` mode. +It is used to speed up EAL initialization, usually on application restart. +Clearing memory constitutes more than 95% of hugepage mapping time. +EAL can save it by remapping existing backing files +with all the data left in the mapped hugepages ("dirty" memory). +Such segments are marked with ``RTE_MEMSEG_FLAG_DIRTY``. +Memory allocator detects dirty segments and handles them accordingly, +in particular, it clears memory requested with ``rte_zmalloc*()``. +In this mode EAL also does not remove a backing file +when all pages mapped from it are freed, +because they are intended to be reusable at restart. + Anonymous mapping does not allow multi-process architecture. This mode does not use hugetlbfs and thus does not require root permissions for memory management @@ -959,6 +971,7 @@ to be virtually contiguous. * dirty - this flag is only meaningful when ``state`` is ``FREE``. It indicates that the content of the element is not fully zero-filled. Memory from such blocks must be cleared when requested via ``rte_zmalloc*()``. + Dirty elements only appear with ``--huge-unlink=never``. * pad - this holds the length of the padding present at the start of the block. In the case of a normal block header, it is added to the address of the end diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst index 746f50e84f..a1cf3ec3b3 100644 --- a/doc/guides/rel_notes/release_22_03.rst +++ b/doc/guides/rel_notes/release_22_03.rst @@ -55,6 +55,13 @@ New Features Also, make sure to start the actual text at the margin. ======================================================= +* **Added ability to reuse hugepages in Linux.** + + It is possible to reuse files in hugetlbfs to speed up hugepage mapping, + which may be useful for fast restart and large allocations. + The new mode is activated with ``--huge-unlink=never`` + and has security implications, refer to the user and programmer guides. + * **Updated Cisco enic driver.** * Added rte_flow support for matching GENEVE packets. diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c index cdd2284b0c..45d393b393 100644 --- a/lib/eal/common/eal_common_options.c +++ b/lib/eal/common/eal_common_options.c @@ -74,7 +74,7 @@ eal_long_options[] = { {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM }, {OPT_HELP, 0, NULL, OPT_HELP_NUM }, {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM }, - {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM }, + {OPT_HUGE_UNLINK, 2, NULL, OPT_HUGE_UNLINK_NUM }, {OPT_IOVA_MODE, 1, NULL, OPT_IOVA_MODE_NUM }, {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, @@ -1598,6 +1598,28 @@ available_cores(void) return str; } +#define HUGE_UNLINK_NEVER "never" + +static int +eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out) +{ + if (arg == NULL || strcmp(arg, "always") == 0) { + out->unlink_before_mapping = true; + return 0; + } + if (strcmp(arg, "existing") == 0) { + /* same as not specifying the option */ + return 0; + } + if (strcmp(arg, HUGE_UNLINK_NEVER) == 0) { + RTE_LOG(WARNING, EAL, "Using --"OPT_HUGE_UNLINK"=" + HUGE_UNLINK_NEVER" may create data leaks.\n"); + out->unlink_existing = false; + return 0; + } + return -1; +} + int eal_parse_common_option(int opt, const char *optarg, struct internal_config *conf) @@ -1739,7 +1761,10 @@ eal_parse_common_option(int opt, const char *optarg, /* long options */ case OPT_HUGE_UNLINK_NUM: - conf->hugepage_file.unlink_before_mapping = true; + if (eal_parse_huge_unlink(optarg, &conf->hugepage_file) < 0) { + RTE_LOG(ERR, EAL, "invalid --"OPT_HUGE_UNLINK" option\n"); + return -1; + } break; case OPT_NO_HUGE_NUM: @@ -2070,6 +2095,12 @@ eal_check_common_options(struct internal_config *internal_cfg) "not compatible with --"OPT_HUGE_UNLINK"\n"); return -1; } + if (!internal_cfg->hugepage_file.unlink_existing && + internal_cfg->in_memory) { + RTE_LOG(ERR, EAL, "Option --"OPT_IN_MEMORY" is not compatible " + "with --"OPT_HUGE_UNLINK"="HUGE_UNLINK_NEVER"\n"); + return -1; + } if (internal_cfg->legacy_mem && internal_cfg->in_memory) { RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible " @@ -2202,7 +2233,9 @@ eal_common_usage(void) " --"OPT_NO_TELEMETRY" Disable telemetry support\n" " --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n" "\nEAL options for DEBUG use only:\n" - " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" + " --"OPT_HUGE_UNLINK"[=existing|always|never]\n" + " When to unlink files in hugetlbfs\n" + " ('existing' by default, no value means 'always')\n" " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" " --"OPT_NO_PCI" Disable PCI\n" " --"OPT_NO_HPET" Disable HPET\n"