2004-05-14 11:46:45 +00:00
|
|
|
/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
|
|
|
|
|
2005-01-05 21:58:49 +00:00
|
|
|
/*-
|
2004-05-14 11:46:45 +00:00
|
|
|
* Copyright (C) 2003-2004 Olivier Houchard
|
|
|
|
* Copyright (C) 1994-1997 Mark Brinicombe
|
|
|
|
* Copyright (C) 1994 Brini
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software written for Brini by Mark Brinicombe
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* This product includes software developed by Brini.
|
|
|
|
* 4. The name of Brini may not be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _MACHINE_ATOMIC_H_
|
|
|
|
#define _MACHINE_ATOMIC_H_
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
2008-02-05 10:22:33 +00:00
|
|
|
#ifndef _KERNEL
|
|
|
|
#include <machine/sysarch.h>
|
2012-08-15 03:03:03 +00:00
|
|
|
#else
|
|
|
|
#include <machine/cpuconf.h>
|
2008-02-05 10:22:33 +00:00
|
|
|
#endif
|
|
|
|
|
2013-01-07 20:36:51 +00:00
|
|
|
#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
|
|
|
|
#define isb() __asm __volatile("isb" : : : "memory")
|
|
|
|
#define dsb() __asm __volatile("dsb" : : : "memory")
|
|
|
|
#define dmb() __asm __volatile("dmb" : : : "memory")
|
|
|
|
#elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
|
|
|
|
defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) || \
|
|
|
|
defined (__ARM_ARCH_6ZK__)
|
|
|
|
#define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
|
|
|
|
#define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
|
|
|
|
#define dmb() __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
|
|
|
|
#else
|
|
|
|
#define isb()
|
|
|
|
#define dsb()
|
|
|
|
#define dmb()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define mb() dmb()
|
|
|
|
#define wmb() dmb()
|
|
|
|
#define rmb() dmb()
|
2008-11-22 05:55:56 +00:00
|
|
|
|
2004-05-14 11:46:45 +00:00
|
|
|
#ifndef I32_bit
|
|
|
|
#define I32_bit (1 << 7) /* IRQ disable */
|
|
|
|
#endif
|
|
|
|
#ifndef F32_bit
|
|
|
|
#define F32_bit (1 << 6) /* FIQ disable */
|
|
|
|
#endif
|
|
|
|
|
2012-08-15 03:03:03 +00:00
|
|
|
/*
|
|
|
|
* It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
|
|
|
|
* here, but that header can't be included here because this is C
|
|
|
|
* code. I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
|
|
|
|
* out of asm.h so it can be used in both asm and C code. - kientzle@
|
|
|
|
*/
|
|
|
|
#if defined (__ARM_ARCH_7__) || \
|
|
|
|
defined (__ARM_ARCH_7A__) || \
|
|
|
|
defined (__ARM_ARCH_6__) || \
|
|
|
|
defined (__ARM_ARCH_6J__) || \
|
|
|
|
defined (__ARM_ARCH_6K__) || \
|
|
|
|
defined (__ARM_ARCH_6Z__) || \
|
|
|
|
defined (__ARM_ARCH_6ZK__)
|
|
|
|
static __inline void
|
|
|
|
__do_dmb(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
#if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
|
|
|
|
__asm __volatile("dmb" : : : "memory");
|
|
|
|
#else
|
|
|
|
__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ATOMIC_ACQ_REL_LONG(NAME) \
|
|
|
|
static __inline void \
|
|
|
|
atomic_##NAME##_acq_long(__volatile u_long *p, u_long v) \
|
|
|
|
{ \
|
|
|
|
atomic_##NAME##_long(p, v); \
|
|
|
|
__do_dmb(); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static __inline void \
|
|
|
|
atomic_##NAME##_rel_long(__volatile u_long *p, u_long v) \
|
|
|
|
{ \
|
|
|
|
__do_dmb(); \
|
|
|
|
atomic_##NAME##_long(p, v); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ATOMIC_ACQ_REL(NAME, WIDTH) \
|
|
|
|
static __inline void \
|
|
|
|
atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
|
|
|
|
{ \
|
|
|
|
atomic_##NAME##_##WIDTH(p, v); \
|
|
|
|
__do_dmb(); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static __inline void \
|
|
|
|
atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
|
|
|
|
{ \
|
|
|
|
__do_dmb(); \
|
|
|
|
atomic_##NAME##_##WIDTH(p, v); \
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_set_32(volatile uint32_t *address, uint32_t setmask)
|
|
|
|
{
|
|
|
|
uint32_t tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"orr %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
, "+r" (address), "+r" (setmask) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_set_long(volatile u_long *address, u_long setmask)
|
|
|
|
{
|
|
|
|
u_long tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"orr %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
, "+r" (address), "+r" (setmask) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
|
|
|
|
{
|
|
|
|
uint32_t tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"bic %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (address), "+r" (setmask) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_clear_long(volatile u_long *address, u_long setmask)
|
|
|
|
{
|
|
|
|
u_long tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"bic %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (address), "+r" (setmask) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_int32_t
|
|
|
|
atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
|
|
|
|
{
|
|
|
|
uint32_t ret;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%1]\n"
|
|
|
|
"cmp %0, %2\n"
|
|
|
|
"movne %0, #0\n"
|
|
|
|
"bne 2f\n"
|
|
|
|
"strex %0, %3, [%1]\n"
|
|
|
|
"cmp %0, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
"moveq %0, #1\n"
|
|
|
|
"2:"
|
|
|
|
: "=&r" (ret)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
|
|
|
|
"memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
|
|
|
|
{
|
|
|
|
u_long ret;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%1]\n"
|
|
|
|
"cmp %0, %2\n"
|
|
|
|
"movne %0, #0\n"
|
|
|
|
"bne 2f\n"
|
|
|
|
"strex %0, %3, [%1]\n"
|
|
|
|
"cmp %0, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
"moveq %0, #1\n"
|
|
|
|
"2:"
|
|
|
|
: "=&r" (ret)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
|
|
|
|
"memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_int32_t
|
|
|
|
atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
|
|
|
|
{
|
|
|
|
u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
|
|
|
|
{
|
|
|
|
u_long ret = atomic_cmpset_long(p, cmpval, newval);
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_int32_t
|
|
|
|
atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
|
|
|
|
{
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
return (atomic_cmpset_32(p, cmpval, newval));
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
|
|
|
|
{
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
return (atomic_cmpset_long(p, cmpval, newval));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_add_32(volatile u_int32_t *p, u_int32_t val)
|
|
|
|
{
|
|
|
|
uint32_t tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"add %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_add_long(volatile u_long *p, u_long val)
|
|
|
|
{
|
|
|
|
u_long tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"add %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
|
|
|
|
{
|
|
|
|
uint32_t tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"sub %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_subtract_long(volatile u_long *p, u_long val)
|
|
|
|
{
|
|
|
|
u_long tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%2]\n"
|
|
|
|
"sub %0, %0, %3\n"
|
|
|
|
"strex %1, %0, [%2]\n"
|
|
|
|
"cmp %1, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ATOMIC_ACQ_REL(clear, 32)
|
|
|
|
ATOMIC_ACQ_REL(add, 32)
|
|
|
|
ATOMIC_ACQ_REL(subtract, 32)
|
|
|
|
ATOMIC_ACQ_REL(set, 32)
|
|
|
|
ATOMIC_ACQ_REL_LONG(clear)
|
|
|
|
ATOMIC_ACQ_REL_LONG(add)
|
|
|
|
ATOMIC_ACQ_REL_LONG(subtract)
|
|
|
|
ATOMIC_ACQ_REL_LONG(set)
|
|
|
|
|
|
|
|
#undef ATOMIC_ACQ_REL
|
|
|
|
#undef ATOMIC_ACQ_REL_LONG
|
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
|
|
|
|
{
|
|
|
|
uint32_t tmp = 0, tmp2 = 0, ret = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%3]\n"
|
|
|
|
"add %1, %0, %4\n"
|
|
|
|
"strex %2, %1, [%3]\n"
|
|
|
|
"cmp %2, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "+r" (ret), "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
atomic_readandclear_32(volatile u_int32_t *p)
|
|
|
|
{
|
|
|
|
uint32_t ret, tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%3]\n"
|
|
|
|
"mov %1, #0\n"
|
|
|
|
"strex %2, %1, [%3]\n"
|
|
|
|
"cmp %2, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=r" (ret), "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
atomic_load_acq_32(volatile uint32_t *p)
|
|
|
|
{
|
|
|
|
uint32_t v;
|
|
|
|
|
|
|
|
v = *p;
|
|
|
|
__do_dmb();
|
|
|
|
return (v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
|
|
|
|
{
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
*p = v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_fetchadd_long(volatile u_long *p, u_long val)
|
|
|
|
{
|
|
|
|
u_long tmp = 0, tmp2 = 0, ret = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%3]\n"
|
|
|
|
"add %1, %0, %4\n"
|
|
|
|
"strex %2, %1, [%3]\n"
|
|
|
|
"cmp %2, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "+r" (ret), "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (val) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_readandclear_long(volatile u_long *p)
|
|
|
|
{
|
|
|
|
u_long ret, tmp = 0, tmp2 = 0;
|
|
|
|
|
|
|
|
__asm __volatile("1: ldrex %0, [%3]\n"
|
|
|
|
"mov %1, #0\n"
|
|
|
|
"strex %2, %1, [%3]\n"
|
|
|
|
"cmp %2, #0\n"
|
|
|
|
"bne 1b\n"
|
|
|
|
: "=r" (ret), "=&r" (tmp), "+r" (tmp2)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p) : : "cc", "memory");
|
2012-08-15 03:03:03 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_load_acq_long(volatile u_long *p)
|
|
|
|
{
|
|
|
|
u_long v;
|
|
|
|
|
|
|
|
v = *p;
|
|
|
|
__do_dmb();
|
|
|
|
return (v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_store_rel_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
__do_dmb();
|
|
|
|
*p = v;
|
|
|
|
}
|
|
|
|
#else /* < armv6 */
|
|
|
|
|
2004-05-14 11:46:45 +00:00
|
|
|
#define __with_interrupts_disabled(expr) \
|
|
|
|
do { \
|
|
|
|
u_int cpsr_save, tmp; \
|
|
|
|
\
|
|
|
|
__asm __volatile( \
|
|
|
|
"mrs %0, cpsr;" \
|
|
|
|
"orr %1, %0, %2;" \
|
|
|
|
"msr cpsr_all, %1;" \
|
|
|
|
: "=r" (cpsr_save), "=r" (tmp) \
|
2006-04-13 14:25:28 +00:00
|
|
|
: "I" (I32_bit | F32_bit) \
|
2004-05-14 11:46:45 +00:00
|
|
|
: "cc" ); \
|
|
|
|
(expr); \
|
|
|
|
__asm __volatile( \
|
|
|
|
"msr cpsr_all, %0" \
|
|
|
|
: /* no output */ \
|
|
|
|
: "r" (cpsr_save) \
|
|
|
|
: "cc" ); \
|
|
|
|
} while(0)
|
|
|
|
|
2004-11-04 19:14:50 +00:00
|
|
|
static __inline uint32_t
|
|
|
|
__swp(uint32_t val, volatile uint32_t *ptr)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-07-27 20:01:45 +00:00
|
|
|
__asm __volatile("swp %0, %2, [%3]"
|
|
|
|
: "=&r" (val), "=m" (*ptr)
|
2005-10-14 18:36:49 +00:00
|
|
|
: "r" (val), "r" (ptr), "m" (*ptr)
|
2005-07-27 20:01:45 +00:00
|
|
|
: "memory");
|
2004-11-04 19:14:50 +00:00
|
|
|
return (val);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
#ifdef _KERNEL
|
2004-05-14 11:46:45 +00:00
|
|
|
static __inline void
|
2004-11-04 19:14:50 +00:00
|
|
|
atomic_set_32(volatile uint32_t *address, uint32_t setmask)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-04-07 22:03:04 +00:00
|
|
|
__with_interrupts_disabled(*address |= setmask);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
2004-11-04 19:14:50 +00:00
|
|
|
atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-04-07 22:03:04 +00:00
|
|
|
__with_interrupts_disabled(*address &= ~clearmask);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
static __inline u_int32_t
|
|
|
|
atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-04-07 22:03:04 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
__with_interrupts_disabled(
|
|
|
|
{
|
|
|
|
if (*p == cmpval) {
|
|
|
|
*p = newval;
|
|
|
|
ret = 1;
|
|
|
|
} else {
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return (ret);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
2005-04-07 22:03:04 +00:00
|
|
|
atomic_add_32(volatile u_int32_t *p, u_int32_t val)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-04-07 22:03:04 +00:00
|
|
|
__with_interrupts_disabled(*p += val);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
static __inline void
|
|
|
|
atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2005-04-07 22:03:04 +00:00
|
|
|
__with_interrupts_disabled(*p -= val);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
2005-09-27 17:39:11 +00:00
|
|
|
static __inline uint32_t
|
|
|
|
atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
|
|
|
|
{
|
|
|
|
uint32_t value;
|
|
|
|
|
|
|
|
__with_interrupts_disabled(
|
|
|
|
{
|
|
|
|
value = *p;
|
|
|
|
*p += v;
|
|
|
|
});
|
|
|
|
return (value);
|
|
|
|
}
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
#else /* !_KERNEL */
|
|
|
|
|
2004-05-14 11:46:45 +00:00
|
|
|
static __inline u_int32_t
|
2005-04-07 22:03:04 +00:00
|
|
|
atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
|
2004-05-14 11:46:45 +00:00
|
|
|
{
|
2008-02-05 10:22:33 +00:00
|
|
|
register int done, ras_start = ARM_RAS_START;
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"ldr %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"cmp %1, %3\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"streq %4, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"2:\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
"mov %1, #0\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"str %1, [%0]\n"
|
|
|
|
"mov %1, #0xffffffff\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"moveq %1, #1\n"
|
|
|
|
"movne %1, #0\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
: "+r" (ras_start), "=r" (done)
|
2012-10-01 05:12:17 +00:00
|
|
|
,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
|
2004-11-05 23:48:12 +00:00
|
|
|
return (done);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_add_32(volatile u_int32_t *p, u_int32_t val)
|
|
|
|
{
|
2008-02-05 10:22:33 +00:00
|
|
|
int start, ras_start = ARM_RAS_START;
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"ldr %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"add %1, %1, %3\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"str %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"2:\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
"mov %1, #0\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"mov %1, #0xffffffff\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
|
|
|
: "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
|
2006-02-06 18:29:05 +00:00
|
|
|
: : "memory");
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
|
|
|
|
{
|
2008-02-05 10:22:33 +00:00
|
|
|
int start, ras_start = ARM_RAS_START;
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"ldr %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"sub %1, %1, %3\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"str %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"2:\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
"mov %1, #0\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"mov %1, #0xffffffff\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
|
2008-02-05 10:22:33 +00:00
|
|
|
: "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
|
2006-02-06 18:29:05 +00:00
|
|
|
: : "memory");
|
2005-04-07 22:03:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_set_32(volatile uint32_t *address, uint32_t setmask)
|
|
|
|
{
|
2008-02-05 10:22:33 +00:00
|
|
|
int start, ras_start = ARM_RAS_START;
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"ldr %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"orr %1, %1, %3\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"str %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"2:\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
"mov %1, #0\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"mov %1, #0xffffffff\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
|
2008-02-05 10:22:33 +00:00
|
|
|
: "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
|
2006-02-06 18:29:05 +00:00
|
|
|
: : "memory");
|
2005-04-07 22:03:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
|
|
|
|
{
|
2008-02-05 10:22:33 +00:00
|
|
|
int start, ras_start = ARM_RAS_START;
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"ldr %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"bic %1, %1, %3\n"
|
2006-02-05 22:06:12 +00:00
|
|
|
"str %1, [%2]\n"
|
2005-04-07 22:03:04 +00:00
|
|
|
"2:\n"
|
2005-05-24 21:42:31 +00:00
|
|
|
"mov %1, #0\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"mov %1, #0xffffffff\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
|
|
|
: "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
|
2006-02-06 18:29:05 +00:00
|
|
|
: : "memory");
|
2005-04-07 22:03:04 +00:00
|
|
|
|
|
|
|
}
|
2005-09-27 17:39:11 +00:00
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
|
|
|
|
{
|
2009-03-31 23:47:18 +00:00
|
|
|
uint32_t start, tmp, ras_start = ARM_RAS_START;
|
2005-09-27 17:39:11 +00:00
|
|
|
|
|
|
|
__asm __volatile("1:\n"
|
|
|
|
"adr %1, 1b\n"
|
|
|
|
"str %1, [%0]\n"
|
Close a race.
The RAS implementation would set the end address, then the start
address. These were used by the kernel to restart a RAS sequence if
it was interrupted. When the thread switching code ran, it would
check these values and adjust the PC and clear them if it did.
However, there's a small flaw in this scheme. Thread T1, sets the end
address and gets preempted. Thread T2 runs and also does a RAS
operation. This resets end to zero. Thread T1 now runs again and
sets start and then begins the RAS sequence, but is preempted before
the RAS sequence executes its last instruction. The kernel code that
would ordinarily restart the RAS sequence doesn't because the PC isn't
between start and 0, so the PC isn't set to the start of the sequence.
So when T1 is resumed again, it is at the wrong location for RAS to
produce the correct results. This causes the wrong results for the
atomic sequence.
The window for the first race is 3 instructions. The window for the
second race is 5-10 instructions depending on the atomic operation.
This makes this failure fairly rare and hard to reproduce.
Mutexs are implemented in libthr using atomic operations. When the
above race would occur, a lock could get stuck locked, causing many
downstream problems, as you might expect.
Also, make sure to reset the start and end address when doing a syscall, or
a malicious process could set them before doing a syscall.
Reviewed by: imp, ups (thanks guys)
Pointy hat to: cognet
MFC After: 3 days
2007-12-02 12:49:28 +00:00
|
|
|
"adr %1, 2f\n"
|
2008-02-05 10:22:33 +00:00
|
|
|
"str %1, [%0, #4]\n"
|
2009-03-31 23:47:18 +00:00
|
|
|
"ldr %1, [%3]\n"
|
|
|
|
"mov %2, %1\n"
|
|
|
|
"add %2, %2, %4\n"
|
|
|
|
"str %2, [%3]\n"
|
2005-09-27 17:39:11 +00:00
|
|
|
"2:\n"
|
2009-03-31 23:47:18 +00:00
|
|
|
"mov %2, #0\n"
|
|
|
|
"str %2, [%0]\n"
|
|
|
|
"mov %2, #0xffffffff\n"
|
|
|
|
"str %2, [%0, #4]\n"
|
|
|
|
: "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
|
2006-02-06 18:29:05 +00:00
|
|
|
: : "memory");
|
2005-09-27 17:39:11 +00:00
|
|
|
return (start);
|
|
|
|
}
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
#endif /* _KERNEL */
|
|
|
|
|
2012-08-15 03:03:03 +00:00
|
|
|
|
|
|
|
static __inline uint32_t
|
|
|
|
atomic_readandclear_32(volatile u_int32_t *p)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (__swp(0, p));
|
|
|
|
}
|
|
|
|
|
|
|
|
#define atomic_cmpset_rel_32 atomic_cmpset_32
|
|
|
|
#define atomic_cmpset_acq_32 atomic_cmpset_32
|
|
|
|
#define atomic_set_rel_32 atomic_set_32
|
|
|
|
#define atomic_set_acq_32 atomic_set_32
|
|
|
|
#define atomic_clear_rel_32 atomic_clear_32
|
|
|
|
#define atomic_clear_acq_32 atomic_clear_32
|
|
|
|
#define atomic_add_rel_32 atomic_add_32
|
|
|
|
#define atomic_add_acq_32 atomic_add_32
|
|
|
|
#define atomic_subtract_rel_32 atomic_subtract_32
|
|
|
|
#define atomic_subtract_acq_32 atomic_subtract_32
|
|
|
|
#define atomic_store_rel_32 atomic_store_32
|
|
|
|
#define atomic_store_rel_long atomic_store_long
|
|
|
|
#define atomic_load_acq_32 atomic_load_32
|
|
|
|
#define atomic_load_acq_long atomic_load_long
|
2013-01-15 22:08:03 +00:00
|
|
|
#define atomic_add_acq_long atomic_add_long
|
|
|
|
#define atomic_add_rel_long atomic_add_long
|
|
|
|
#define atomic_subtract_acq_long atomic_subtract_long
|
|
|
|
#define atomic_subtract_rel_long atomic_subtract_long
|
|
|
|
#define atomic_clear_acq_long atomic_clear_long
|
|
|
|
#define atomic_clear_rel_long atomic_clear_long
|
|
|
|
#define atomic_set_acq_long atomic_set_long
|
|
|
|
#define atomic_set_rel_long atomic_set_long
|
|
|
|
#define atomic_cmpset_acq_long atomic_cmpset_long
|
|
|
|
#define atomic_cmpset_rel_long atomic_cmpset_long
|
|
|
|
#define atomic_load_acq_long atomic_load_long
|
2012-08-15 03:03:03 +00:00
|
|
|
#undef __with_interrupts_disabled
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_add_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
atomic_add_32((volatile uint32_t *)p, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_clear_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
atomic_clear_32((volatile uint32_t *)p, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline int
|
|
|
|
atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline u_long
|
|
|
|
atomic_fetchadd_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (atomic_fetchadd_32((volatile uint32_t *)p, v));
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_readandclear_long(volatile u_long *p)
|
|
|
|
{
|
|
|
|
|
|
|
|
atomic_readandclear_32((volatile uint32_t *)p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_set_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
atomic_set_32((volatile uint32_t *)p, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_subtract_long(volatile u_long *p, u_long v)
|
|
|
|
{
|
|
|
|
|
|
|
|
atomic_subtract_32((volatile uint32_t *)p, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* Arch >= v6 */
|
|
|
|
|
2005-04-07 22:03:04 +00:00
|
|
|
static __inline int
|
|
|
|
atomic_load_32(volatile uint32_t *v)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (*v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
atomic_store_32(volatile uint32_t *dst, uint32_t src)
|
|
|
|
{
|
|
|
|
*dst = src;
|
|
|
|
}
|
|
|
|
|
2012-08-15 03:03:03 +00:00
|
|
|
static __inline int
|
|
|
|
atomic_load_long(volatile u_long *v)
|
2005-04-07 22:03:04 +00:00
|
|
|
{
|
|
|
|
|
2012-08-15 03:03:03 +00:00
|
|
|
return (*v);
|
2004-05-14 11:46:45 +00:00
|
|
|
}
|
|
|
|
|
2012-08-15 03:03:03 +00:00
|
|
|
static __inline void
|
|
|
|
atomic_store_long(volatile u_long *dst, u_long src)
|
|
|
|
{
|
|
|
|
*dst = src;
|
|
|
|
}
|
2004-05-14 11:46:45 +00:00
|
|
|
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_clear_ptr atomic_clear_32
|
|
|
|
#define atomic_set_ptr atomic_set_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_cmpset_ptr atomic_cmpset_32
|
|
|
|
#define atomic_cmpset_rel_ptr atomic_cmpset_rel_32
|
|
|
|
#define atomic_cmpset_acq_ptr atomic_cmpset_acq_32
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_store_ptr atomic_store_32
|
2013-01-15 22:08:03 +00:00
|
|
|
#define atomic_store_rel_ptr atomic_store_rel_32
|
2004-05-14 11:46:45 +00:00
|
|
|
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_add_int atomic_add_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_add_acq_int atomic_add_acq_32
|
|
|
|
#define atomic_add_rel_int atomic_add_rel_32
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_subtract_int atomic_subtract_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_subtract_acq_int atomic_subtract_acq_32
|
|
|
|
#define atomic_subtract_rel_int atomic_subtract_rel_32
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_clear_int atomic_clear_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_clear_acq_int atomic_clear_acq_32
|
|
|
|
#define atomic_clear_rel_int atomic_clear_rel_32
|
2004-11-04 19:14:50 +00:00
|
|
|
#define atomic_set_int atomic_set_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_set_acq_int atomic_set_acq_32
|
|
|
|
#define atomic_set_rel_int atomic_set_rel_32
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_cmpset_int atomic_cmpset_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_cmpset_acq_int atomic_cmpset_acq_32
|
|
|
|
#define atomic_cmpset_rel_int atomic_cmpset_rel_32
|
2007-01-05 02:50:27 +00:00
|
|
|
#define atomic_fetchadd_int atomic_fetchadd_32
|
2004-11-04 19:14:50 +00:00
|
|
|
#define atomic_readandclear_int atomic_readandclear_32
|
2012-08-15 03:03:03 +00:00
|
|
|
#define atomic_load_acq_int atomic_load_acq_32
|
|
|
|
#define atomic_store_rel_int atomic_store_rel_32
|
2004-05-14 11:46:45 +00:00
|
|
|
|
|
|
|
#endif /* _MACHINE_ATOMIC_H_ */
|