Remove unneeded data dependency, currently imposed by
atomic_load_acq(9), on it source, for x86. Right now, atomic_load_acq() on x86 is sequentially consistent with other atomics, code ensures this by doing store/load barrier by performing locked nop on the source. Provide separate primitive __storeload_barrier(), which is implemented as the locked nop done on a cpu-private variable, and put __storeload_barrier() before load, to keep seq_cst semantic but avoid introducing false dependency on the no-modification of the source for its later use. Note that seq_cst property of x86 atomic_load_acq() is not documented and not carried by atomics implementations on other architectures, although some kernel code relies on the behaviour. This commit does not intend to change this. Reviewed by: alc Discussed with: bde Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks
This commit is contained in:
parent
af38028d11
commit
7626d062c3
@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
|
||||
#undef ATOMIC_ASM
|
||||
|
||||
/* Make atomic.h generate public functions */
|
||||
static __inline void __storeload_barrier(void);
|
||||
#define WANT_FUNCTIONS
|
||||
#define static
|
||||
#undef __inline
|
||||
|
@ -93,6 +93,8 @@ _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
|
||||
"OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
|
||||
_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
|
||||
"OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
|
||||
_Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
|
||||
"OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf.");
|
||||
|
||||
struct savefpu *
|
||||
get_pcb_user_save_td(struct thread *td)
|
||||
|
@ -85,7 +85,7 @@ u_long atomic_fetchadd_long(volatile u_long *p, u_long v);
|
||||
int atomic_testandset_int(volatile u_int *p, u_int v);
|
||||
int atomic_testandset_long(volatile u_long *p, u_int v);
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
#define ATOMIC_LOAD(TYPE) \
|
||||
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
|
||||
@ -245,54 +245,80 @@ atomic_testandset_long(volatile u_long *p, u_int v)
|
||||
* We assume that a = b will do atomic loads and stores. Due to the
|
||||
* IA32 memory model, a simple store guarantees release semantics.
|
||||
*
|
||||
* However, loads may pass stores, so for atomic_load_acq we have to
|
||||
* ensure a Store/Load barrier to do the load in SMP kernels. We use
|
||||
* "lock cmpxchg" as recommended by the AMD Software Optimization
|
||||
* Guide, and not mfence. For UP kernels, however, the cache of the
|
||||
* single processor is always consistent, so we only need to take care
|
||||
* of the compiler.
|
||||
* However, a load may pass a store if they are performed on distinct
|
||||
* addresses, so for atomic_load_acq we introduce a Store/Load barrier
|
||||
* before the load in SMP kernels. We use "lock addl $0,mem", as
|
||||
* recommended by the AMD Software Optimization Guide, and not mfence.
|
||||
* In the kernel, we use a private per-cpu cache line as the target
|
||||
* for the locked addition, to avoid introducing false data
|
||||
* dependencies. In userspace, a word in the red zone on the stack
|
||||
* (-8(%rsp)) is utilized.
|
||||
*
|
||||
* For UP kernels, however, the memory of the single processor is
|
||||
* always consistent, so we only need to stop the compiler from
|
||||
* reordering accesses in a way that violates the semantics of acquire
|
||||
* and release.
|
||||
*/
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
static __inline void \
|
||||
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
|
||||
{ \
|
||||
__compiler_membar(); \
|
||||
*p = v; \
|
||||
} \
|
||||
|
||||
#if defined(_KERNEL)
|
||||
|
||||
/*
|
||||
* OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
|
||||
*
|
||||
* The open-coded number is used instead of the symbolic expression to
|
||||
* avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
|
||||
* An assertion in amd64/vm_machdep.c ensures that the value is correct.
|
||||
*/
|
||||
#define OFFSETOF_MONITORBUF 0x180
|
||||
|
||||
#if defined(SMP)
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__asm __volatile("lock; addl $0,%%gs:%0"
|
||||
: "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
|
||||
}
|
||||
#else /* _KERNEL && UP */
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__compiler_membar();
|
||||
}
|
||||
#endif /* SMP */
|
||||
#else /* !_KERNEL */
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc");
|
||||
}
|
||||
#endif /* _KERNEL*/
|
||||
|
||||
#define ATOMIC_LOAD(TYPE) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE res; \
|
||||
\
|
||||
__storeload_barrier(); \
|
||||
res = *p; \
|
||||
__compiler_membar(); \
|
||||
return (res); \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#if defined(_KERNEL) && !defined(SMP)
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE tmp; \
|
||||
\
|
||||
tmp = *p; \
|
||||
__compiler_membar(); \
|
||||
return (tmp); \
|
||||
} \
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
static __inline void \
|
||||
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
|
||||
{ \
|
||||
\
|
||||
__compiler_membar(); \
|
||||
*p = v; \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#else /* !(_KERNEL && !SMP) */
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE res; \
|
||||
\
|
||||
__asm __volatile(MPLOCKED LOP \
|
||||
: "=a" (res), /* 0 */ \
|
||||
"+m" (*p) /* 1 */ \
|
||||
: : "memory", "cc"); \
|
||||
return (res); \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#endif /* _KERNEL && !SMP */
|
||||
|
||||
#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
|
||||
|
||||
ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v);
|
||||
@ -315,20 +341,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v);
|
||||
ATOMIC_ASM(add, long, "addq %1,%0", "ir", v);
|
||||
ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v);
|
||||
|
||||
ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
|
||||
ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
|
||||
ATOMIC_LOAD(int, "cmpxchgl %0,%1");
|
||||
ATOMIC_LOAD(long, "cmpxchgq %0,%1");
|
||||
#define ATOMIC_LOADSTORE(TYPE) \
|
||||
ATOMIC_LOAD(TYPE); \
|
||||
ATOMIC_STORE(TYPE)
|
||||
|
||||
ATOMIC_STORE(char);
|
||||
ATOMIC_STORE(short);
|
||||
ATOMIC_STORE(int);
|
||||
ATOMIC_STORE(long);
|
||||
ATOMIC_LOADSTORE(char);
|
||||
ATOMIC_LOADSTORE(short);
|
||||
ATOMIC_LOADSTORE(int);
|
||||
ATOMIC_LOADSTORE(long);
|
||||
|
||||
#undef ATOMIC_ASM
|
||||
#undef ATOMIC_LOAD
|
||||
#undef ATOMIC_STORE
|
||||
|
||||
#undef ATOMIC_LOADSTORE
|
||||
#ifndef WANT_FUNCTIONS
|
||||
|
||||
/* Read the current value and store a new value in the destination. */
|
||||
|
@ -111,6 +111,8 @@ _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
|
||||
"OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
|
||||
_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
|
||||
"OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
|
||||
_Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
|
||||
"OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf.");
|
||||
|
||||
static void cpu_reset_real(void);
|
||||
#ifdef SMP
|
||||
|
@ -87,7 +87,7 @@ int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
|
||||
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
|
||||
int atomic_testandset_int(volatile u_int *p, u_int v);
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
#define ATOMIC_LOAD(TYPE) \
|
||||
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
|
||||
@ -228,54 +228,79 @@ atomic_testandset_int(volatile u_int *p, u_int v)
|
||||
* We assume that a = b will do atomic loads and stores. Due to the
|
||||
* IA32 memory model, a simple store guarantees release semantics.
|
||||
*
|
||||
* However, loads may pass stores, so for atomic_load_acq we have to
|
||||
* ensure a Store/Load barrier to do the load in SMP kernels. We use
|
||||
* "lock cmpxchg" as recommended by the AMD Software Optimization
|
||||
* Guide, and not mfence. For UP kernels, however, the cache of the
|
||||
* single processor is always consistent, so we only need to take care
|
||||
* of the compiler.
|
||||
* However, a load may pass a store if they are performed on distinct
|
||||
* addresses, so for atomic_load_acq we introduce a Store/Load barrier
|
||||
* before the load in SMP kernels. We use "lock addl $0,mem", as
|
||||
* recommended by the AMD Software Optimization Guide, and not mfence.
|
||||
* In the kernel, we use a private per-cpu cache line as the target
|
||||
* for the locked addition, to avoid introducing false data
|
||||
* dependencies. In userspace, a word at the top of the stack is
|
||||
* utilized.
|
||||
*
|
||||
* For UP kernels, however, the memory of the single processor is
|
||||
* always consistent, so we only need to stop the compiler from
|
||||
* reordering accesses in a way that violates the semantics of acquire
|
||||
* and release.
|
||||
*/
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
static __inline void \
|
||||
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
|
||||
{ \
|
||||
__compiler_membar(); \
|
||||
*p = v; \
|
||||
} \
|
||||
#if defined(_KERNEL)
|
||||
|
||||
/*
|
||||
* OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
|
||||
*
|
||||
* The open-coded number is used instead of the symbolic expression to
|
||||
* avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
|
||||
* An assertion in i386/vm_machdep.c ensures that the value is correct.
|
||||
*/
|
||||
#define OFFSETOF_MONITORBUF 0x180
|
||||
|
||||
#if defined(SMP)
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__asm __volatile("lock; addl $0,%%fs:%0"
|
||||
: "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
|
||||
}
|
||||
#else /* _KERNEL && UP */
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__compiler_membar();
|
||||
}
|
||||
#endif /* SMP */
|
||||
#else /* !_KERNEL */
|
||||
static __inline void
|
||||
__storeload_barrier(void)
|
||||
{
|
||||
|
||||
__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
|
||||
}
|
||||
#endif /* _KERNEL*/
|
||||
|
||||
#define ATOMIC_LOAD(TYPE) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE res; \
|
||||
\
|
||||
__storeload_barrier(); \
|
||||
res = *p; \
|
||||
__compiler_membar(); \
|
||||
return (res); \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#if defined(_KERNEL) && !defined(SMP)
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE tmp; \
|
||||
\
|
||||
tmp = *p; \
|
||||
__compiler_membar(); \
|
||||
return (tmp); \
|
||||
} \
|
||||
#define ATOMIC_STORE(TYPE) \
|
||||
static __inline void \
|
||||
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
|
||||
{ \
|
||||
\
|
||||
__compiler_membar(); \
|
||||
*p = v; \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#else /* !(_KERNEL && !SMP) */
|
||||
|
||||
#define ATOMIC_LOAD(TYPE, LOP) \
|
||||
static __inline u_##TYPE \
|
||||
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
|
||||
{ \
|
||||
u_##TYPE res; \
|
||||
\
|
||||
__asm __volatile(MPLOCKED LOP \
|
||||
: "=a" (res), /* 0 */ \
|
||||
"+m" (*p) /* 1 */ \
|
||||
: : "memory", "cc"); \
|
||||
return (res); \
|
||||
} \
|
||||
struct __hack
|
||||
|
||||
#endif /* _KERNEL && !SMP */
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#ifdef WANT_FUNCTIONS
|
||||
@ -511,19 +536,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v);
|
||||
ATOMIC_ASM(add, long, "addl %1,%0", "ir", v);
|
||||
ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v);
|
||||
|
||||
ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
|
||||
ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
|
||||
ATOMIC_LOAD(int, "cmpxchgl %0,%1");
|
||||
ATOMIC_LOAD(long, "cmpxchgl %0,%1");
|
||||
#define ATOMIC_LOADSTORE(TYPE) \
|
||||
ATOMIC_LOAD(TYPE); \
|
||||
ATOMIC_STORE(TYPE)
|
||||
|
||||
ATOMIC_STORE(char);
|
||||
ATOMIC_STORE(short);
|
||||
ATOMIC_STORE(int);
|
||||
ATOMIC_STORE(long);
|
||||
ATOMIC_LOADSTORE(char);
|
||||
ATOMIC_LOADSTORE(short);
|
||||
ATOMIC_LOADSTORE(int);
|
||||
ATOMIC_LOADSTORE(long);
|
||||
|
||||
#undef ATOMIC_ASM
|
||||
#undef ATOMIC_LOAD
|
||||
#undef ATOMIC_STORE
|
||||
#undef ATOMIC_LOADSTORE
|
||||
|
||||
#ifndef WANT_FUNCTIONS
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user