cd7fc8a84b
This patch provides an option to do rte_memcpy() using 'restrict' qualifier, which can induce GCC to do optimizations by using more efficient instructions, providing some performance gain over memcpy() on some ARM64 platforms/enviroments. The memory copy performance differs between different ARM64 platforms. And a more recent glibc (e.g. 2.23 or later) can provide a better memcpy() performance compared to old glibc versions. It's always suggested to use a more recent glibc if possible, from which the entire system can get benefit. If for some reason an old glibc has to be used, this patch is provided for an alternative. This implementation can improve memory copy on some ARM64 platforms, when an old glibc (e.g. 2.19, 2.17...) is being used. It is disabled by default and needs "RTE_ARCH_ARM64_MEMCPY" defined to activate. It's not always proving better performance than memcpy() so users need to run DPDK unit test "memcpy_perf_autotest" and customize parameters in "customization section" in rte_memcpy_64.h for best performance. Compiler version will also impact the rte_memcpy() performance. It's observed on some platforms and with the same code, GCC 7.2.0 compiled binary can provide better performance than GCC 4.8.5. It's suggested to use GCC 5.4.0 or later. Signed-off-by: Herbert Guan <herbert.guan@arm.com> Acked-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
37 lines
1.1 KiB
Plaintext
37 lines
1.1 KiB
Plaintext
# SPDX-License-Identifier: BSD-3-Clause
|
|
# Copyright(c) 2017 Cavium, Inc
|
|
#
|
|
|
|
#include "common_linuxapp"
|
|
|
|
CONFIG_RTE_MACHINE="armv8a"
|
|
|
|
CONFIG_RTE_ARCH="arm64"
|
|
CONFIG_RTE_ARCH_ARM64=y
|
|
CONFIG_RTE_ARCH_64=y
|
|
|
|
CONFIG_RTE_FORCE_INTRINSICS=y
|
|
|
|
# Maximum available cache line size in arm64 implementations.
|
|
# Setting to maximum available cache line size in generic config
|
|
# to address minimum DMA alignment across all arm64 implementations.
|
|
CONFIG_RTE_CACHE_LINE_SIZE=128
|
|
|
|
# Accelarate rte_memcpy. Be sure to run unit test (memcpy_perf_autotest)
|
|
# to determine the best threshold in code. Refer to notes in source file
|
|
# (lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h) for more info.
|
|
CONFIG_RTE_ARCH_ARM64_MEMCPY=n
|
|
#CONFIG_RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD=2048
|
|
#CONFIG_RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD=512
|
|
# Leave below RTE_ARM64_MEMCPY_xxx options commented out, unless there're
|
|
# strong reasons.
|
|
#CONFIG_RTE_ARM64_MEMCPY_SKIP_GCC_VER_CHECK=n
|
|
#CONFIG_RTE_ARM64_MEMCPY_ALIGN_MASK=0xF
|
|
#CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n
|
|
|
|
CONFIG_RTE_LIBRTE_FM10K_PMD=n
|
|
CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n
|
|
CONFIG_RTE_LIBRTE_AVP_PMD=n
|
|
|
|
CONFIG_RTE_SCHED_VECTOR=n
|