eal: add wrapper for C11 atomic thread fence

Provide a wrapper for __atomic_thread_fence builtins to support
optimized code for __ATOMIC_SEQ_CST memory order for x86 platforms.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
Phil Yang 2020-07-17 18:14:36 +08:00 committed by David Marchand
parent 703a62a602
commit 672a150563
5 changed files with 39 additions and 0 deletions

View File

@ -37,6 +37,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
__atomic_thread_fence(memory_order);
}
#ifdef __cplusplus
}
#endif

View File

@ -41,6 +41,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
__atomic_thread_fence(memory_order);
}
/*------------------------ 128 bit atomic operations -------------------------*/
#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)

View File

@ -158,6 +158,11 @@ static inline void rte_cio_rmb(void);
asm volatile ("" : : : "memory"); \
} while(0)
/**
* Synchronization fence between threads based on the specified memory order.
*/
static inline void rte_atomic_thread_fence(int memory_order);
/*------------------------- 16 bit atomic operations -------------------------*/
/**

View File

@ -40,6 +40,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
__atomic_thread_fence(memory_order);
}
/*------------------------- 16 bit atomic operations -------------------------*/
/* To be compatible with Power7, use GCC built-in functions for 16 bit
* operations */

View File

@ -83,6 +83,22 @@ rte_smp_mb(void)
#define rte_cio_rmb() rte_compiler_barrier()
/**
* Synchronization fence between threads based on the specified memory order.
*
* On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence'
* which is quite expensive. The optimized implementation of rte_smp_mb is
* used instead.
*/
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
if (memory_order == __ATOMIC_SEQ_CST)
rte_smp_mb();
else
__atomic_thread_fence(memory_order);
}
/*------------------------- 16 bit atomic operations -------------------------*/
#ifndef RTE_FORCE_INTRINSICS