x86 atomics: use lock prefix unconditionally

Atomics have significant other use besides providing in-system
primitives for safe memory updates.  They are used for implementing
communication with out of system software or hardware following some
protocols.

For instance, even UP kernel might require a protocol using atomics to
communicate with the software-emulated device on SMP hypervisor.  Or
real hardware might need atomic accesses as part of the proper
management protocol.

Another point is that UP configurations on x86 are extinct, so slight
performance hit by unconditionally use proper atomics is not important.
It is compensated by less code clutter, which in fact improves the
UP/i386 lifetime expectations.

Requested by:	Elliott Mitchell <ehem+freebsd@m5p.com>
Reviewed by:	Elliott Mitchell, imp, jhb, markj, royger
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D34153
This commit is contained in:
Konstantin Belousov 2022-02-03 11:51:36 +02:00
parent cbf999e75d
commit 9c0b759bf9
2 changed files with 32 additions and 87 deletions

View File

@ -143,16 +143,10 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
#else /* !__GNUCLIKE_ASM */
/*
* For userland, always use lock prefixes so that the binaries will run
* on both SMP and !SMP systems.
*/
#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
#define MPLOCKED "lock ; "
#else
#define MPLOCKED
#endif
/*
* Always use lock prefixes. The result is slighly less optimal for
* UP systems, but it matters less now, and sometimes UP is emulated
* over SMP.
*
* The assembly is volatilized to avoid code chunk removal by the compiler.
* GCC aggressively reorders operations and memory clobbering is necessary
* in order to avoid that for memory barriers.
@ -161,7 +155,7 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
static __inline void \
atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
__asm __volatile("lock; " OP \
: "+m" (*p) \
: CONS (V) \
: "cc"); \
@ -170,7 +164,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
static __inline void \
atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
__asm __volatile("lock; " OP \
: "+m" (*p) \
: CONS (V) \
: "memory", "cc"); \
@ -199,8 +193,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
u_char res; \
\
__asm __volatile( \
" " MPLOCKED " " \
" cmpxchg %3,%1 ; " \
" lock; cmpxchg %3,%1 ; " \
"# atomic_cmpset_" #TYPE " " \
: "=@cce" (res), /* 0 */ \
"+m" (*dst), /* 1 */ \
@ -216,8 +209,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
u_char res; \
\
__asm __volatile( \
" " MPLOCKED " " \
" cmpxchg %3,%1 ; " \
" lock; cmpxchg %3,%1 ; " \
"# atomic_fcmpset_" #TYPE " " \
: "=@cce" (res), /* 0 */ \
"+m" (*dst), /* 1 */ \
@ -241,8 +233,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v)
{
__asm __volatile(
" " MPLOCKED " "
" xaddl %0,%1 ; "
" lock; xaddl %0,%1 ; "
"# atomic_fetchadd_int"
: "+r" (v), /* 0 */
"+m" (*p) /* 1 */
@ -259,8 +250,7 @@ atomic_fetchadd_long(volatile u_long *p, u_long v)
{
__asm __volatile(
" " MPLOCKED " "
" xaddq %0,%1 ; "
" lock; xaddq %0,%1 ; "
"# atomic_fetchadd_long"
: "+r" (v), /* 0 */
"+m" (*p) /* 1 */
@ -274,8 +264,7 @@ atomic_testandset_int(volatile u_int *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btsl %2,%1 ; "
" lock; btsl %2,%1 ; "
"# atomic_testandset_int"
: "=@ccc" (res), /* 0 */
"+m" (*p) /* 1 */
@ -290,8 +279,7 @@ atomic_testandset_long(volatile u_long *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btsq %2,%1 ; "
" lock; btsq %2,%1 ; "
"# atomic_testandset_long"
: "=@ccc" (res), /* 0 */
"+m" (*p) /* 1 */
@ -306,8 +294,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btrl %2,%1 ; "
" lock; btrl %2,%1 ; "
"# atomic_testandclear_int"
: "=@ccc" (res), /* 0 */
"+m" (*p) /* 1 */
@ -322,8 +309,7 @@ atomic_testandclear_long(volatile u_long *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btrq %2,%1 ; "
" lock; btrq %2,%1 ; "
"# atomic_testandclear_long"
: "=@ccc" (res), /* 0 */
"+m" (*p) /* 1 */
@ -344,39 +330,18 @@ atomic_testandclear_long(volatile u_long *p, u_int v)
* special address for "mem". In the kernel, we use a private per-cpu
* cache line. In user space, we use a word in the stack's red zone
* (-8(%rsp)).
*
* For UP kernels, however, the memory of the single processor is
* always consistent, so we only need to stop the compiler from
* reordering accesses in a way that violates the semantics of acquire
* and release.
*/
#if defined(_KERNEL)
#if defined(SMP) || defined(KLD_MODULE)
static __inline void
__storeload_barrier(void)
{
#if defined(_KERNEL)
__asm __volatile("lock; addl $0,%%gs:%0"
: "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
}
#else /* _KERNEL && UP */
static __inline void
__storeload_barrier(void)
{
__compiler_membar();
}
#endif /* SMP */
#else /* !_KERNEL */
static __inline void
__storeload_barrier(void)
{
__asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc");
}
#endif /* _KERNEL*/
}
#define ATOMIC_LOAD(TYPE) \
static __inline u_##TYPE \

View File

@ -141,16 +141,10 @@ void atomic_subtract_64(volatile uint64_t *, uint64_t);
#else /* !__GNUCLIKE_ASM */
/*
* For userland, always use lock prefixes so that the binaries will run
* on both SMP and !SMP systems.
*/
#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
#define MPLOCKED "lock ; "
#else
#define MPLOCKED
#endif
/*
* Always use lock prefixes. The result is slighly less optimal for
* UP systems, but it matters less now, and sometimes UP is emulated
* over SMP.
*
* The assembly is volatilized to avoid code chunk removal by the compiler.
* GCC aggressively reorders operations and memory clobbering is necessary
* in order to avoid that for memory barriers.
@ -159,7 +153,7 @@ void atomic_subtract_64(volatile uint64_t *, uint64_t);
static __inline void \
atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
__asm __volatile("lock; " OP \
: "+m" (*p) \
: CONS (V) \
: "cc"); \
@ -168,7 +162,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
static __inline void \
atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
__asm __volatile("lock; " OP \
: "+m" (*p) \
: CONS (V) \
: "memory", "cc"); \
@ -197,8 +191,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
u_char res; \
\
__asm __volatile( \
" " MPLOCKED " " \
" cmpxchg %3,%1 ; " \
" lock; cmpxchg %3,%1 ; " \
" sete %0 ; " \
"# atomic_cmpset_" #TYPE " " \
: "=q" (res), /* 0 */ \
@ -215,8 +208,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
u_char res; \
\
__asm __volatile( \
" " MPLOCKED " " \
" cmpxchg %3,%1 ; " \
" lock; cmpxchg %3,%1 ; " \
" sete %0 ; " \
"# atomic_fcmpset_" #TYPE " " \
: "=q" (res), /* 0 */ \
@ -240,8 +232,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v)
{
__asm __volatile(
" " MPLOCKED " "
" xaddl %0,%1 ; "
" lock; xaddl %0,%1 ; "
"# atomic_fetchadd_int"
: "+r" (v), /* 0 */
"+m" (*p) /* 1 */
@ -255,8 +246,7 @@ atomic_testandset_int(volatile u_int *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btsl %2,%1 ; "
" lock; btsl %2,%1 ; "
" setc %0 ; "
"# atomic_testandset_int"
: "=q" (res), /* 0 */
@ -272,8 +262,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btrl %2,%1 ; "
" lock; btrl %2,%1 ; "
" setc %0 ; "
"# atomic_testandclear_int"
: "=q" (res), /* 0 */
@ -303,11 +292,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
*/
#if defined(_KERNEL)
#if defined(SMP) || defined(KLD_MODULE)
#define __storeload_barrier() __mbk()
#else /* _KERNEL && UP */
#define __storeload_barrier() __compiler_membar()
#endif /* SMP */
#else /* !_KERNEL */
#define __storeload_barrier() __mbu()
#endif /* _KERNEL*/
@ -484,8 +469,7 @@ atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" cmpxchg8b %1 ; "
" lock; cmpxchg8b %1 ; "
" sete %0"
: "=q" (res), /* 0 */
"+m" (*dst), /* 1 */
@ -502,8 +486,7 @@ atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
u_char res;
__asm __volatile(
" " MPLOCKED " "
" cmpxchg8b %1 ; "
" lock; cmpxchg8b %1 ; "
" sete %0"
: "=q" (res), /* 0 */
"+m" (*dst), /* 1 */
@ -522,8 +505,7 @@ atomic_load_acq_64_i586(volatile uint64_t *p)
__asm __volatile(
" movl %%ebx,%%eax ; "
" movl %%ecx,%%edx ; "
" " MPLOCKED " "
" cmpxchg8b %1"
" lock; cmpxchg8b %1"
: "=&A" (res), /* 0 */
"+m" (*p) /* 1 */
: : "memory", "cc");
@ -538,8 +520,7 @@ atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
" movl %%eax,%%ebx ; "
" movl %%edx,%%ecx ; "
"1: "
" " MPLOCKED " "
" cmpxchg8b %0 ; "
" lock; cmpxchg8b %0 ; "
" jne 1b"
: "+m" (*p), /* 0 */
"+A" (v) /* 1 */
@ -554,8 +535,7 @@ atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
" movl %%eax,%%ebx ; "
" movl %%edx,%%ecx ; "
"1: "
" " MPLOCKED " "
" cmpxchg8b %0 ; "
" lock; cmpxchg8b %0 ; "
" jne 1b"
: "+m" (*p), /* 0 */
"+A" (v) /* 1 */