Add 64-bit atomic ops for armv6. The only safe way to access a 64-bit

value shared across multiple cores is with atomic_load_64() and
atomic_store_64(), because the normal 64-bit load/store instructions
are not atomic on 32-bit arm.  Luckily the ldrexd/strexd instructions
that are atomic are fairly cheap on armv6.  Because it's fairly simple
to do, this implements all the ops for 64-bit, not just load/store.

Reviewed by:	andrew, cognet
This commit is contained in:
ian 2014-08-01 22:28:36 +00:00
parent 4a0d502636
commit 5cc59eee8a

View File

@ -145,6 +145,28 @@ atomic_set_32(volatile uint32_t *address, uint32_t setmask)
} }
static __inline void
atomic_set_64(volatile uint64_t *p, uint64_t val)
{
uint64_t tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" orr %Q[tmp], %Q[val]\n"
" orr %R[tmp], %R[val]\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
}
static __inline void static __inline void
atomic_set_long(volatile u_long *address, u_long setmask) atomic_set_long(volatile u_long *address, u_long setmask)
{ {
@ -176,6 +198,28 @@ atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
,"+r" (address), "+r" (setmask) : : "cc", "memory"); ,"+r" (address), "+r" (setmask) : : "cc", "memory");
} }
static __inline void
atomic_clear_64(volatile uint64_t *p, uint64_t val)
{
uint64_t tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" bic %Q[tmp], %Q[val]\n"
" bic %R[tmp], %R[val]\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
}
static __inline void static __inline void
atomic_clear_long(volatile u_long *address, u_long setmask) atomic_clear_long(volatile u_long *address, u_long setmask)
{ {
@ -213,6 +257,35 @@ atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_in
return (ret); return (ret);
} }
static __inline int
atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
{
uint64_t tmp;
uint32_t ret;
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" teq %Q[tmp], %Q[cmp]\n"
" itee eq \n"
" teqeq %R[tmp], %R[cmp]\n"
" movne %[ret], #0\n"
" bne 2f\n"
" strexd %[ret], %[new], [%[ptr]]\n"
" teq %[ret], #0\n"
" it ne \n"
" bne 1b\n"
" mov %[ret], #1\n"
"2: \n"
: [ret] "=&r" (ret),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[cmp] "r" (cmpval),
[new] "r" (newval)
: "cc", "memory");
return (ret);
}
static __inline u_long static __inline u_long
atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
{ {
@ -244,6 +317,15 @@ atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile
return (ret); return (ret);
} }
static __inline uint64_t
atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
{
uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
__do_dmb();
return (ret);
}
static __inline u_long static __inline u_long
atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
{ {
@ -261,6 +343,14 @@ atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile
return (atomic_cmpset_32(p, cmpval, newval)); return (atomic_cmpset_32(p, cmpval, newval));
} }
static __inline uint64_t
atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
{
__do_dmb();
return (atomic_cmpset_64(p, cmpval, newval));
}
static __inline u_long static __inline u_long
atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
{ {
@ -285,6 +375,28 @@ atomic_add_32(volatile u_int32_t *p, u_int32_t val)
,"+r" (p), "+r" (val) : : "cc", "memory"); ,"+r" (p), "+r" (val) : : "cc", "memory");
} }
static __inline void
atomic_add_64(volatile uint64_t *p, uint64_t val)
{
uint64_t tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" adds %Q[tmp], %Q[val]\n"
" adc %R[tmp], %R[val]\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
}
static __inline void static __inline void
atomic_add_long(volatile u_long *p, u_long val) atomic_add_long(volatile u_long *p, u_long val)
{ {
@ -315,6 +427,28 @@ atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
,"+r" (p), "+r" (val) : : "cc", "memory"); ,"+r" (p), "+r" (val) : : "cc", "memory");
} }
static __inline void
atomic_subtract_64(volatile uint64_t *p, uint64_t val)
{
uint64_t tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" subs %Q[tmp], %Q[val]\n"
" sbc %R[tmp], %R[val]\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
}
static __inline void static __inline void
atomic_subtract_long(volatile u_long *p, u_long val) atomic_subtract_long(volatile u_long *p, u_long val)
{ {
@ -334,6 +468,10 @@ ATOMIC_ACQ_REL(clear, 32)
ATOMIC_ACQ_REL(add, 32) ATOMIC_ACQ_REL(add, 32)
ATOMIC_ACQ_REL(subtract, 32) ATOMIC_ACQ_REL(subtract, 32)
ATOMIC_ACQ_REL(set, 32) ATOMIC_ACQ_REL(set, 32)
ATOMIC_ACQ_REL(clear, 64)
ATOMIC_ACQ_REL(add, 64)
ATOMIC_ACQ_REL(subtract, 64)
ATOMIC_ACQ_REL(set, 64)
ATOMIC_ACQ_REL_LONG(clear) ATOMIC_ACQ_REL_LONG(clear)
ATOMIC_ACQ_REL_LONG(add) ATOMIC_ACQ_REL_LONG(add)
ATOMIC_ACQ_REL_LONG(subtract) ATOMIC_ACQ_REL_LONG(subtract)
@ -392,6 +530,116 @@ atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
*p = v; *p = v;
} }
static __inline uint64_t
atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
{
uint64_t ret, tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[ret], [%[ptr]]\n"
" adds %Q[tmp], %Q[ret], %Q[val]\n"
" adc %R[tmp], %R[ret], %R[val]\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [ret] "=&r" (ret),
[exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
return (ret);
}
static __inline uint64_t
atomic_readandclear_64(volatile uint64_t *p)
{
uint64_t ret, tmp;
uint32_t exflag;
__asm __volatile(
"1: \n"
" ldrexd %[ret], [%[ptr]]\n"
" mov %Q[tmp], #0\n"
" mov %R[tmp], #0\n"
" strexd %[exf], %[tmp], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [ret] "=&r" (ret),
[exf] "=&r" (exflag),
[tmp] "=&r" (tmp)
: [ptr] "r" (p)
: "cc", "memory");
return (ret);
}
static __inline uint64_t
atomic_load_64(volatile uint64_t *p)
{
uint64_t ret;
/*
* The only way to atomically load 64 bits is with LDREXD which puts the
* exclusive monitor into the open state, so reset it with CLREX because
* we don't actually need to store anything.
*/
__asm __volatile(
"1: \n"
" ldrexd %[ret], [%[ptr]]\n"
" clrex \n"
: [ret] "=&r" (ret)
: [ptr] "r" (p)
: "cc", "memory");
return (ret);
}
static __inline uint64_t
atomic_load_acq_64(volatile uint64_t *p)
{
uint64_t ret;
ret = atomic_load_64(p);
__do_dmb();
return (ret);
}
static __inline void
atomic_store_64(volatile uint64_t *p, uint64_t val)
{
uint64_t tmp;
uint32_t exflag;
/*
* The only way to atomically store 64 bits is with STREXD, which will
* succeed only if paired up with a preceeding LDREXD using the same
* address, so we read and discard the existing value before storing.
*/
__asm __volatile(
"1: \n"
" ldrexd %[tmp], [%[ptr]]\n"
" strexd %[exf], %[val], [%[ptr]]\n"
" teq %[exf], #0\n"
" it ne \n"
" bne 1b\n"
: [tmp] "=&r" (tmp),
[exf] "=&r" (exflag)
: [ptr] "r" (p),
[val] "r" (val)
: "cc", "memory");
}
static __inline void
atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
{
__do_dmb();
atomic_store_64(p, val);
}
static __inline u_long static __inline u_long
atomic_fetchadd_long(volatile u_long *p, u_long val) atomic_fetchadd_long(volatile u_long *p, u_long val)
{ {