From 098cc0fea3be3094a5f0691d1ae501c83fe0d687 Mon Sep 17 00:00:00 2001 From: Andrzej Ostruszka Date: Thu, 7 Nov 2019 16:03:09 +0100 Subject: [PATCH] build: add option to enable LTO This patch adds an option to enable link time optimization. In addition to LTO option itself (-flto) fat-lto-objects are being used. This is because during the build pmdinfogen scans the generated ELF objects to find this_pmd_name* symbol in symbol table. Without fat-lto-objects gcc produces ELF only with extra symbols for internal use during linking. Signed-off-by: Andrzej Ostruszka Acked-by: Bruce Richardson --- .travis.yml | 9 +++++ config/common_base | 5 +++ config/meson.build | 13 +++++++ doc/guides/prog_guide/index.rst | 1 + doc/guides/prog_guide/lto.rst | 43 ++++++++++++++++++++++++ doc/guides/rel_notes/release_19_11.rst | 9 +++++ mk/toolchain/gcc/rte.toolchain-compat.mk | 4 +++ mk/toolchain/gcc/rte.vars.mk | 12 +++++++ mk/toolchain/icc/rte.vars.mk | 8 +++++ 9 files changed, 104 insertions(+) create mode 100644 doc/guides/prog_guide/lto.rst diff --git a/.travis.yml b/.travis.yml index 3d6ef2959c..a1c31d407e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -105,6 +105,15 @@ matrix: apt: packages: - *extra_packages + - env: DEF_LIB="static" OPTS="-Db_lto=true" EXTRA_PACKAGES=1 CC=gcc-7 + compiler: gcc + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - *extra_packages + - gcc-7 script: ./.ci/${TRAVIS_OS_NAME}-build.sh diff --git a/config/common_base b/config/common_base index 1858598ede..914277856d 100644 --- a/config/common_base +++ b/config/common_base @@ -49,6 +49,11 @@ CONFIG_RTE_FORCE_INTRINSICS=n # CONFIG_RTE_ARCH_STRICT_ALIGN=n +# +# Enable link time optimization +# +CONFIG_RTE_ENABLE_LTO=n + # # Compile to share library # diff --git a/config/meson.build b/config/meson.build index e1ebdad261..2b1cb92e7e 100644 --- a/config/meson.build +++ b/config/meson.build @@ -225,3 +225,16 @@ add_project_arguments('-D_GNU_SOURCE', language: 'c') if is_freebsd add_project_arguments('-D__BSD_VISIBLE', language: 'c') endif + +if get_option('b_lto') + if cc.has_argument('-ffat-lto-objects') + add_project_arguments('-ffat-lto-objects', language: 'c') + else + error('compiler does not support fat LTO objects - please turn LTO off') + endif + # workaround for gcc bug 81440 + if cc.get_id() == 'gcc' and cc.version().version_compare('<8.0') + add_project_arguments('-Wno-lto-type-mismatch', language: 'c') + add_project_link_arguments('-Wno-lto-type-mismatch', language: 'c') + endif +endif diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index 692409af82..dc4851c57f 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -65,5 +65,6 @@ Programmer's Guide ext_app_lib_make_help perf_opt_guidelines writing_efficient_code + lto profile_app glossary diff --git a/doc/guides/prog_guide/lto.rst b/doc/guides/prog_guide/lto.rst new file mode 100644 index 0000000000..43f4c63379 --- /dev/null +++ b/doc/guides/prog_guide/lto.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Marvell International Ltd. + +Link Time Optimization +====================== + +The DPDK supports compilation with link time optimization turned on. +This depends obviously on the ability of the compiler to do "whole +program" optimization at link time and is available only for compilers +that support that feature. +To be more specific, compiler (in addition to performing LTO) have to +support creation of ELF objects containing both normal code and internal +representation (called fat-lto-objects in gcc and icc). +This is required since during build some code is generated by parsing +produced ELF objects (pmdinfogen). + +The amount of performance gain that one can get from LTO depends on the +compiler and the code that is being compiled. +However LTO is also useful for additional code analysis done by the +compiler. +In particular due to interprocedural analysis compiler can produce +additional warnings about variables that might be used uninitialized. +Some of these warnings might be "false positives" though and you might +need to explicitly initialize variable in order to silence the compiler. + +Please note that turning LTO on causes considerable extension of +build time. + +When using make based build, link time optimization can be enabled for +the whole DPDK by setting: + +.. code-block:: console + + CONFIG_ENABLE_LTO=y + +in config file. + +For the meson based build it can be enabled by setting meson built-in +'b_lto' option: + +.. code-block:: console + + meson build -Db_lto=true diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst index fe11b4b7a6..12ab110ddb 100644 --- a/doc/guides/rel_notes/release_19_11.rst +++ b/doc/guides/rel_notes/release_19_11.rst @@ -248,6 +248,15 @@ New Features traditional l2fwd example. It demonstrates usage of poll and event mode IO mechanism under a single application. +* **Added build support for Link Time Optimization.** + + LTO is an optimization technique used by the compiler to perform whole + program analysis and optimization at link time. In order to do that + compilers store their internal representation of the source code that + the linker uses at the final stage of compilation process. + + See :doc:`../prog_guide/lto` for more information: + Removed Items ------------- diff --git a/mk/toolchain/gcc/rte.toolchain-compat.mk b/mk/toolchain/gcc/rte.toolchain-compat.mk index ea40a11c0c..69a53e5d0d 100644 --- a/mk/toolchain/gcc/rte.toolchain-compat.mk +++ b/mk/toolchain/gcc/rte.toolchain-compat.mk @@ -88,6 +88,10 @@ else MACHINE_CFLAGS := $(filter-out -march% -mtune% -msse%,$(MACHINE_CFLAGS)) endif + ifeq ($(shell test $(GCC_VERSION) -lt 74 && echo 1), 1) + CONFIG_RTE_ENABLE_LTO=n + endif + # Disable thunderx PMD for gcc < 4.7 ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1) CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=d diff --git a/mk/toolchain/gcc/rte.vars.mk b/mk/toolchain/gcc/rte.vars.mk index b852fcfd7e..9fc704193b 100644 --- a/mk/toolchain/gcc/rte.vars.mk +++ b/mk/toolchain/gcc/rte.vars.mk @@ -62,6 +62,18 @@ endif # process cpu flags include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk +ifeq ($(CONFIG_RTE_ENABLE_LTO),y) +# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX' +# exported in symbol table and without this option only internal +# representation is present. +TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects +TOOLCHAIN_LDFLAGS += -flto +# workaround for GCC bug 81440 +ifeq ($(shell test $(GCC_VERSION) -lt 80 && echo 1), 1) +WERROR_FLAGS += -Wno-lto-type-mismatch +endif +endif + # workaround GCC bug with warning "missing initializer" for "= {0}" ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1) WERROR_FLAGS += -Wno-missing-field-initializers diff --git a/mk/toolchain/icc/rte.vars.mk b/mk/toolchain/icc/rte.vars.mk index aa1422bf1f..8aa87aa1ee 100644 --- a/mk/toolchain/icc/rte.vars.mk +++ b/mk/toolchain/icc/rte.vars.mk @@ -54,5 +54,13 @@ endif # process cpu flags include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk +ifeq ($(CONFIG_RTE_ENABLE_LTO),y) +# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX' +# exported in symbol table and without this option only internal +# representation is present. +TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects +TOOLCHAIN_LDFLAGS += -flto +endif + export CC AS AR LD OBJCOPY OBJDUMP STRIP READELF export TOOLCHAIN_CFLAGS TOOLCHAIN_LDFLAGS TOOLCHAIN_ASFLAGS