eal: add wait until equal API
The rte_wait_until_equal_xx APIs abstract the functionality of 'polling for a memory location to become equal to a given value'. Add the RTE_ARM_USE_WFE configuration entry for aarch64, disabled by default. When it is enabled, the above APIs will call WFE instruction to save CPU cycles and power. From a VM, when calling this API on aarch64, it may trap in and out to release vCPUs whereas cause high exit latency. Since kernel 4.18.20 an adaptive trapping mechanism is introduced to balance the latency and workload. Signed-off-by: Gavin Hu <gavin.hu@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Steve Capper <steve.capper@arm.com> Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Phil Yang <phil.yang@arm.com> Acked-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Jerin Jacob <jerinj@marvell.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Signed-off-by: David Marchand <david.marchand@redhat.com>
This commit is contained in:
parent
24ffb8c589
commit
1be7855d77
@ -26,6 +26,7 @@ flags_common_default = [
|
|||||||
['RTE_LIBRTE_AVP_PMD', false],
|
['RTE_LIBRTE_AVP_PMD', false],
|
||||||
|
|
||||||
['RTE_SCHED_VECTOR', false],
|
['RTE_SCHED_VECTOR', false],
|
||||||
|
['RTE_ARM_USE_WFE', false],
|
||||||
]
|
]
|
||||||
|
|
||||||
flags_generic = [
|
flags_generic = [
|
||||||
|
@ -110,6 +110,11 @@ CONFIG_RTE_MAX_VFIO_CONTAINERS=64
|
|||||||
CONFIG_RTE_MALLOC_DEBUG=n
|
CONFIG_RTE_MALLOC_DEBUG=n
|
||||||
CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n
|
CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n
|
||||||
CONFIG_RTE_USE_LIBBSD=n
|
CONFIG_RTE_USE_LIBBSD=n
|
||||||
|
# Use WFE instructions to implement the rte_wait_for_equal_xxx APIs,
|
||||||
|
# calling these APIs put the cores in low power state while waiting
|
||||||
|
# for the memory address to become equal to the expected value.
|
||||||
|
# This is supported only by aarch64.
|
||||||
|
CONFIG_RTE_ARM_USE_WFE=n
|
||||||
|
|
||||||
#
|
#
|
||||||
# Recognize/ignore the AVX/AVX512 CPU flags for performance/power testing.
|
# Recognize/ignore the AVX/AVX512 CPU flags for performance/power testing.
|
||||||
|
@ -56,6 +56,11 @@ New Features
|
|||||||
Also, make sure to start the actual text at the margin.
|
Also, make sure to start the actual text at the margin.
|
||||||
=========================================================
|
=========================================================
|
||||||
|
|
||||||
|
* **Added Wait Until Equal API.**
|
||||||
|
|
||||||
|
A new API has been added to wait for a memory location to be updated with a
|
||||||
|
16-bit, 32-bit, 64-bit value.
|
||||||
|
|
||||||
|
|
||||||
Removed Items
|
Removed Items
|
||||||
-------------
|
-------------
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/* SPDX-License-Identifier: BSD-3-Clause
|
/* SPDX-License-Identifier: BSD-3-Clause
|
||||||
* Copyright(c) 2017 Cavium, Inc
|
* Copyright(c) 2017 Cavium, Inc
|
||||||
|
* Copyright(c) 2019 Arm Limited
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _RTE_PAUSE_ARM64_H_
|
#ifndef _RTE_PAUSE_ARM64_H_
|
||||||
@ -10,6 +11,11 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <rte_common.h>
|
#include <rte_common.h>
|
||||||
|
|
||||||
|
#ifdef RTE_ARM_USE_WFE
|
||||||
|
#define RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "generic/rte_pause.h"
|
#include "generic/rte_pause.h"
|
||||||
|
|
||||||
static inline void rte_pause(void)
|
static inline void rte_pause(void)
|
||||||
@ -17,6 +23,133 @@ static inline void rte_pause(void)
|
|||||||
asm volatile("yield" ::: "memory");
|
asm volatile("yield" ::: "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
|
||||||
|
|
||||||
|
/* Send an event to quit WFE. */
|
||||||
|
#define __SEVL() { asm volatile("sevl" : : : "memory"); }
|
||||||
|
|
||||||
|
/* Put processor into low power WFE(Wait For Event) state. */
|
||||||
|
#define __WFE() { asm volatile("wfe" : : : "memory"); }
|
||||||
|
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
uint16_t value;
|
||||||
|
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Atomic exclusive load from addr, it returns the 16-bit content of
|
||||||
|
* *addr while making it 'monitored',when it is written by someone
|
||||||
|
* else, the 'monitored' state is cleared and a event is generated
|
||||||
|
* implicitly to exit WFE.
|
||||||
|
*/
|
||||||
|
#define __LOAD_EXC_16(src, dst, memorder) { \
|
||||||
|
if (memorder == __ATOMIC_RELAXED) { \
|
||||||
|
asm volatile("ldxrh %w[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} else { \
|
||||||
|
asm volatile("ldaxrh %w[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} }
|
||||||
|
|
||||||
|
__LOAD_EXC_16(addr, value, memorder)
|
||||||
|
if (value != expected) {
|
||||||
|
__SEVL()
|
||||||
|
do {
|
||||||
|
__WFE()
|
||||||
|
__LOAD_EXC_16(addr, value, memorder)
|
||||||
|
} while (value != expected);
|
||||||
|
}
|
||||||
|
#undef __LOAD_EXC_16
|
||||||
|
}
|
||||||
|
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
uint32_t value;
|
||||||
|
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Atomic exclusive load from addr, it returns the 32-bit content of
|
||||||
|
* *addr while making it 'monitored',when it is written by someone
|
||||||
|
* else, the 'monitored' state is cleared and a event is generated
|
||||||
|
* implicitly to exit WFE.
|
||||||
|
*/
|
||||||
|
#define __LOAD_EXC_32(src, dst, memorder) { \
|
||||||
|
if (memorder == __ATOMIC_RELAXED) { \
|
||||||
|
asm volatile("ldxr %w[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} else { \
|
||||||
|
asm volatile("ldaxr %w[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} }
|
||||||
|
|
||||||
|
__LOAD_EXC_32(addr, value, memorder)
|
||||||
|
if (value != expected) {
|
||||||
|
__SEVL()
|
||||||
|
do {
|
||||||
|
__WFE()
|
||||||
|
__LOAD_EXC_32(addr, value, memorder)
|
||||||
|
} while (value != expected);
|
||||||
|
}
|
||||||
|
#undef __LOAD_EXC_32
|
||||||
|
}
|
||||||
|
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
uint64_t value;
|
||||||
|
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Atomic exclusive load from addr, it returns the 64-bit content of
|
||||||
|
* *addr while making it 'monitored',when it is written by someone
|
||||||
|
* else, the 'monitored' state is cleared and a event is generated
|
||||||
|
* implicitly to exit WFE.
|
||||||
|
*/
|
||||||
|
#define __LOAD_EXC_64(src, dst, memorder) { \
|
||||||
|
if (memorder == __ATOMIC_RELAXED) { \
|
||||||
|
asm volatile("ldxr %x[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} else { \
|
||||||
|
asm volatile("ldaxr %x[tmp], [%x[addr]]" \
|
||||||
|
: [tmp] "=&r" (dst) \
|
||||||
|
: [addr] "r"(src) \
|
||||||
|
: "memory"); \
|
||||||
|
} }
|
||||||
|
|
||||||
|
__LOAD_EXC_64(addr, value, memorder)
|
||||||
|
if (value != expected) {
|
||||||
|
__SEVL()
|
||||||
|
do {
|
||||||
|
__WFE()
|
||||||
|
__LOAD_EXC_64(addr, value, memorder)
|
||||||
|
} while (value != expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef __LOAD_EXC_64
|
||||||
|
|
||||||
|
#undef __SEVL
|
||||||
|
#undef __WFE
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/* SPDX-License-Identifier: BSD-3-Clause
|
/* SPDX-License-Identifier: BSD-3-Clause
|
||||||
* Copyright(c) 2017 Cavium, Inc
|
* Copyright(c) 2017 Cavium, Inc
|
||||||
|
* Copyright(c) 2019 Arm Limited
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _RTE_PAUSE_H_
|
#ifndef _RTE_PAUSE_H_
|
||||||
@ -12,6 +13,12 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <rte_common.h>
|
||||||
|
#include <rte_atomic.h>
|
||||||
|
#include <rte_compat.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pause CPU execution for a short while
|
* Pause CPU execution for a short while
|
||||||
*
|
*
|
||||||
@ -20,4 +27,102 @@
|
|||||||
*/
|
*/
|
||||||
static inline void rte_pause(void);
|
static inline void rte_pause(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @warning
|
||||||
|
* @b EXPERIMENTAL: this API may change, or be removed, without prior notice
|
||||||
|
*
|
||||||
|
* Wait for *addr to be updated with a 16-bit expected value, with a relaxed
|
||||||
|
* memory ordering model meaning the loads around this API can be reordered.
|
||||||
|
*
|
||||||
|
* @param addr
|
||||||
|
* A pointer to the memory location.
|
||||||
|
* @param expected
|
||||||
|
* A 16-bit expected value to be in the memory location.
|
||||||
|
* @param memorder
|
||||||
|
* Two different memory orders that can be specified:
|
||||||
|
* __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
|
||||||
|
* C++11 memory orders with the same names, see the C++11 standard or
|
||||||
|
* the GCC wiki on atomic synchronization for detailed definition.
|
||||||
|
*/
|
||||||
|
__rte_experimental
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
|
||||||
|
int memorder);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @warning
|
||||||
|
* @b EXPERIMENTAL: this API may change, or be removed, without prior notice
|
||||||
|
*
|
||||||
|
* Wait for *addr to be updated with a 32-bit expected value, with a relaxed
|
||||||
|
* memory ordering model meaning the loads around this API can be reordered.
|
||||||
|
*
|
||||||
|
* @param addr
|
||||||
|
* A pointer to the memory location.
|
||||||
|
* @param expected
|
||||||
|
* A 32-bit expected value to be in the memory location.
|
||||||
|
* @param memorder
|
||||||
|
* Two different memory orders that can be specified:
|
||||||
|
* __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
|
||||||
|
* C++11 memory orders with the same names, see the C++11 standard or
|
||||||
|
* the GCC wiki on atomic synchronization for detailed definition.
|
||||||
|
*/
|
||||||
|
__rte_experimental
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
|
||||||
|
int memorder);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @warning
|
||||||
|
* @b EXPERIMENTAL: this API may change, or be removed, without prior notice
|
||||||
|
*
|
||||||
|
* Wait for *addr to be updated with a 64-bit expected value, with a relaxed
|
||||||
|
* memory ordering model meaning the loads around this API can be reordered.
|
||||||
|
*
|
||||||
|
* @param addr
|
||||||
|
* A pointer to the memory location.
|
||||||
|
* @param expected
|
||||||
|
* A 64-bit expected value to be in the memory location.
|
||||||
|
* @param memorder
|
||||||
|
* Two different memory orders that can be specified:
|
||||||
|
* __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
|
||||||
|
* C++11 memory orders with the same names, see the C++11 standard or
|
||||||
|
* the GCC wiki on atomic synchronization for detailed definition.
|
||||||
|
*/
|
||||||
|
__rte_experimental
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
|
||||||
|
int memorder);
|
||||||
|
|
||||||
|
#ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
while (__atomic_load_n(addr, memorder) != expected)
|
||||||
|
rte_pause();
|
||||||
|
}
|
||||||
|
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
while (__atomic_load_n(addr, memorder) != expected)
|
||||||
|
rte_pause();
|
||||||
|
}
|
||||||
|
|
||||||
|
static __rte_always_inline void
|
||||||
|
rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
|
||||||
|
int memorder)
|
||||||
|
{
|
||||||
|
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
while (__atomic_load_n(addr, memorder) != expected)
|
||||||
|
rte_pause();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _RTE_PAUSE_H_ */
|
#endif /* _RTE_PAUSE_H_ */
|
||||||
|
Loading…
Reference in New Issue
Block a user