diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index e167b54f979e..99a94b77bb15 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -81,8 +81,9 @@ int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -210,37 +211,43 @@ atomic_fetchadd_long(volatile u_long *p, u_long v) return (v); } +/* + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. + */ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + #if defined(_KERNEL) && !defined(SMP) -/* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. - */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE tmp; \ \ tmp = *p; \ - __asm __volatile ("" : : : "memory"); \ + __asm __volatile("" : : : "memory"); \ return (tmp); \ -} \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile ("" : : : "memory"); \ - *p = v; \ } \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -253,19 +260,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ : "memory", "cc"); \ \ return (res); \ -} \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ } \ struct __hack @@ -293,13 +287,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgq %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index 9cb96d29f108..6ef59622649f 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -32,9 +32,9 @@ #error this file needs sys/cdefs.h as a prerequisite #endif -#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") +#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") /* * Various simple operations on memory, each of which is atomic in the @@ -79,8 +79,9 @@ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -280,16 +281,29 @@ atomic_fetchadd_int(volatile u_int *p, u_int v) return (v); } +/* + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. + */ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + #if defined(_KERNEL) && !defined(SMP) -/* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. - */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -298,19 +312,12 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ tmp = *p; \ __asm __volatile("" : : : "memory"); \ return (tmp); \ -} \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile("" : : : "memory"); \ - *p = v; \ } \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -323,19 +330,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ : "memory", "cc"); \ \ return (res); \ -} \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ } \ struct __hack @@ -363,13 +357,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgl %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS