spinlock: add HTM lock elision for x86
This patch adds methods that use hardware memory transactions (HTM) on fast-path for spinlocks (a.k.a. lock elision). Here the methods are implemented for x86 using Restricted Transactional Memory instructions (Intel(r) Transactional Synchronization Extensions). The implementation fall-backs to the normal spinlock if HTM is not available or memory transactions fail. This is not a replacement for all spinlock usages since not all critical sections protected by spinlocks are friendly to HTM. For example, an attempt to perform a HW I/O operation inside a hardware memory transaction always aborts the transaction since the CPU is not able to roll-back should the transaction fail. Therefore, hardware transactional locks are not advised to be used around rte_eth_rx_burst() and rte_eth_tx_burst() calls. Signed-off-by: Roman Dementiev <roman.dementiev@intel.com> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
parent
9e46f6c5d8
commit
ba7468997e
@ -66,6 +66,47 @@ rte_spinlock_trylock(rte_spinlock_t *sl)
|
||||
|
||||
#endif
|
||||
|
||||
static inline int rte_tm_supported(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_lock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
rte_spinlock_lock(sl); /* fall-back */
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_spinlock_trylock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
return rte_spinlock_trylock(sl);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_unlock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
rte_spinlock_unlock(sl);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
rte_spinlock_recursive_lock(slr); /* fall-back */
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
rte_spinlock_recursive_unlock(slr);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
return rte_spinlock_recursive_trylock(slr);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
73
lib/librte_eal/common/include/arch/x86/rte_rtm.h
Normal file
73
lib/librte_eal/common/include/arch/x86/rte_rtm.h
Normal file
@ -0,0 +1,73 @@
|
||||
#ifndef _RTE_RTM_H_
|
||||
#define _RTE_RTM_H_ 1
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012,2013 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that: (1) source code distributions
|
||||
* retain the above copyright notice and this paragraph in its entirety, (2)
|
||||
* distributions including binary code include the above copyright notice and
|
||||
* this paragraph in its entirety in the documentation or other materials
|
||||
* provided with the distribution
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* Official RTM intrinsics interface matching gcc/icc, but works
|
||||
on older gcc compatible compilers and binutils. */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#define RTE_XBEGIN_STARTED (~0u)
|
||||
#define RTE_XABORT_EXPLICIT (1 << 0)
|
||||
#define RTE_XABORT_RETRY (1 << 1)
|
||||
#define RTE_XABORT_CONFLICT (1 << 2)
|
||||
#define RTE_XABORT_CAPACITY (1 << 3)
|
||||
#define RTE_XABORT_DEBUG (1 << 4)
|
||||
#define RTE_XABORT_NESTED (1 << 5)
|
||||
#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff)
|
||||
|
||||
static __attribute__((__always_inline__)) inline
|
||||
unsigned int rte_xbegin(void)
|
||||
{
|
||||
unsigned int ret = RTE_XBEGIN_STARTED;
|
||||
|
||||
asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __attribute__((__always_inline__)) inline
|
||||
void rte_xend(void)
|
||||
{
|
||||
asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
|
||||
}
|
||||
|
||||
static __attribute__((__always_inline__)) inline
|
||||
void rte_xabort(const unsigned int status)
|
||||
{
|
||||
asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
|
||||
}
|
||||
|
||||
static __attribute__((__always_inline__)) inline
|
||||
int rte_xtest(void)
|
||||
{
|
||||
unsigned char out;
|
||||
|
||||
asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" :
|
||||
"=r" (out) :: "memory");
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _RTE_RTM_H_ */
|
@ -39,6 +39,13 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#include "generic/rte_spinlock.h"
|
||||
#include "rte_rtm.h"
|
||||
#include "rte_cpuflags.h"
|
||||
#include "rte_branch_prediction.h"
|
||||
#include "rte_common.h"
|
||||
|
||||
#define RTE_RTM_MAX_RETRIES (10)
|
||||
#define RTE_XABORT_LOCK_BUSY (0xff)
|
||||
|
||||
#ifndef RTE_FORCE_INTRINSICS
|
||||
static inline void
|
||||
@ -87,6 +94,106 @@ rte_spinlock_trylock (rte_spinlock_t *sl)
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint8_t rtm_supported; /* cache the flag to avoid the overhead
|
||||
of the rte_cpu_get_flag_enabled function */
|
||||
|
||||
static inline void __attribute__((constructor))
|
||||
rte_rtm_init(void)
|
||||
{
|
||||
rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
|
||||
}
|
||||
|
||||
static inline int rte_tm_supported(void)
|
||||
{
|
||||
return rtm_supported;
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_try_tm(volatile int *lock)
|
||||
{
|
||||
if (!rtm_supported)
|
||||
return 0;
|
||||
|
||||
int retries = RTE_RTM_MAX_RETRIES;
|
||||
|
||||
while (likely(retries--)) {
|
||||
|
||||
unsigned int status = rte_xbegin();
|
||||
|
||||
if (likely(RTE_XBEGIN_STARTED == status)) {
|
||||
if (unlikely(*lock))
|
||||
rte_xabort(RTE_XABORT_LOCK_BUSY);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
while (*lock)
|
||||
rte_pause();
|
||||
|
||||
if ((status & RTE_XABORT_EXPLICIT) &&
|
||||
(RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))
|
||||
continue;
|
||||
|
||||
if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_lock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
if (likely(rte_try_tm(&sl->locked)))
|
||||
return;
|
||||
|
||||
rte_spinlock_lock(sl); /* fall-back */
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_spinlock_trylock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
if (likely(rte_try_tm(&sl->locked)))
|
||||
return 1;
|
||||
|
||||
return rte_spinlock_trylock(sl);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_unlock_tm(rte_spinlock_t *sl)
|
||||
{
|
||||
if (unlikely(sl->locked))
|
||||
rte_spinlock_unlock(sl);
|
||||
else
|
||||
rte_xend();
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
if (likely(rte_try_tm(&slr->sl.locked)))
|
||||
return;
|
||||
|
||||
rte_spinlock_recursive_lock(slr); /* fall-back */
|
||||
}
|
||||
|
||||
static inline void
|
||||
rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
if (unlikely(slr->sl.locked))
|
||||
rte_spinlock_recursive_unlock(slr);
|
||||
else
|
||||
rte_xend();
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
|
||||
{
|
||||
if (likely(rte_try_tm(&slr->sl.locked)))
|
||||
return 1;
|
||||
|
||||
return rte_spinlock_recursive_trylock(slr);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -144,6 +144,59 @@ static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
|
||||
return sl->locked;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if hardware transactional memory (lock elision) is supported
|
||||
*
|
||||
* @return
|
||||
* 1 if the hardware transactional memory is supported; 0 otherwise.
|
||||
*/
|
||||
static inline int rte_tm_supported(void);
|
||||
|
||||
/**
|
||||
* Try to execute critical section in a hardware memory transaction,
|
||||
* if it fails or not available take the spinlock.
|
||||
*
|
||||
* NOTE: An attempt to perform a HW I/O operation inside a hardware memory
|
||||
* transaction always aborts the transaction since the CPU is not able to
|
||||
* roll-back should the transaction fail. Therefore, hardware transactional
|
||||
* locks are not advised to be used around rte_eth_rx_burst() and
|
||||
* rte_eth_tx_burst() calls.
|
||||
*
|
||||
* @param sl
|
||||
* A pointer to the spinlock.
|
||||
*/
|
||||
static inline void
|
||||
rte_spinlock_lock_tm(rte_spinlock_t *sl);
|
||||
|
||||
/**
|
||||
* Commit hardware memory transaction or release the spinlock if
|
||||
* the spinlock is used as a fall-back
|
||||
*
|
||||
* @param sl
|
||||
* A pointer to the spinlock.
|
||||
*/
|
||||
static inline void
|
||||
rte_spinlock_unlock_tm(rte_spinlock_t *sl);
|
||||
|
||||
/**
|
||||
* Try to execute critical section in a hardware memory transaction,
|
||||
* if it fails or not available try to take the lock.
|
||||
*
|
||||
* NOTE: An attempt to perform a HW I/O operation inside a hardware memory
|
||||
* transaction always aborts the transaction since the CPU is not able to
|
||||
* roll-back should the transaction fail. Therefore, hardware transactional
|
||||
* locks are not advised to be used around rte_eth_rx_burst() and
|
||||
* rte_eth_tx_burst() calls.
|
||||
*
|
||||
* @param sl
|
||||
* A pointer to the spinlock.
|
||||
* @return
|
||||
* 1 if the hardware memory transaction is successfully started
|
||||
* or lock is successfully taken; 0 otherwise.
|
||||
*/
|
||||
static inline int
|
||||
rte_spinlock_trylock_tm(rte_spinlock_t *sl);
|
||||
|
||||
/**
|
||||
* The rte_spinlock_recursive_t type.
|
||||
*/
|
||||
@ -223,4 +276,50 @@ static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr)
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try to execute critical section in a hardware memory transaction,
|
||||
* if it fails or not available take the recursive spinlocks
|
||||
*
|
||||
* NOTE: An attempt to perform a HW I/O operation inside a hardware memory
|
||||
* transaction always aborts the transaction since the CPU is not able to
|
||||
* roll-back should the transaction fail. Therefore, hardware transactional
|
||||
* locks are not advised to be used around rte_eth_rx_burst() and
|
||||
* rte_eth_tx_burst() calls.
|
||||
*
|
||||
* @param slr
|
||||
* A pointer to the recursive spinlock.
|
||||
*/
|
||||
static inline void rte_spinlock_recursive_lock_tm(
|
||||
rte_spinlock_recursive_t *slr);
|
||||
|
||||
/**
|
||||
* Commit hardware memory transaction or release the recursive spinlock
|
||||
* if the recursive spinlock is used as a fall-back
|
||||
*
|
||||
* @param slr
|
||||
* A pointer to the recursive spinlock.
|
||||
*/
|
||||
static inline void rte_spinlock_recursive_unlock_tm(
|
||||
rte_spinlock_recursive_t *slr);
|
||||
|
||||
/**
|
||||
* Try to execute critical section in a hardware memory transaction,
|
||||
* if it fails or not available try to take the recursive lock
|
||||
*
|
||||
* NOTE: An attempt to perform a HW I/O operation inside a hardware memory
|
||||
* transaction always aborts the transaction since the CPU is not able to
|
||||
* roll-back should the transaction fail. Therefore, hardware transactional
|
||||
* locks are not advised to be used around rte_eth_rx_burst() and
|
||||
* rte_eth_tx_burst() calls.
|
||||
*
|
||||
* @param slr
|
||||
* A pointer to the recursive spinlock.
|
||||
* @return
|
||||
* 1 if the hardware memory transaction is successfully started
|
||||
* or lock is successfully taken; 0 otherwise.
|
||||
*/
|
||||
static inline int rte_spinlock_recursive_trylock_tm(
|
||||
rte_spinlock_recursive_t *slr);
|
||||
|
||||
#endif /* _RTE_SPINLOCK_H_ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user