From 7a966f2ded110856471f7090b54500a0f324949c Mon Sep 17 00:00:00 2001 From: Andrew Gallatin <gallatin@FreeBSD.org> Date: Wed, 30 Oct 2002 01:41:44 +0000 Subject: [PATCH] Remove extranious memory barriers, and correct the placement of a few others. This provides a 30% reduction in system time and a 6% reduction in wallclock time for a make buildworld on my xp1000 (one 21264). FWIW, I've been running this for nearly 2 months without problems. Portions submitted by: ticso, jhb Tested by: jhb (ds20 dual 21264) --- sys/alpha/alpha/atomic.s | 8 -------- sys/alpha/include/atomic.h | 18 ++++-------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/sys/alpha/alpha/atomic.s b/sys/alpha/alpha/atomic.s index cab7f23b7121..03b866f28381 100644 --- a/sys/alpha/alpha/atomic.s +++ b/sys/alpha/alpha/atomic.s @@ -40,7 +40,6 @@ LEAF(atomic_set_8, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_set_8) @@ -55,7 +54,6 @@ LEAF(atomic_clear_8, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_clear_8) @@ -70,7 +68,6 @@ LEAF(atomic_add_8, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_add_8) @@ -85,7 +82,6 @@ LEAF(atomic_subtract_8, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_subtract_8) @@ -100,7 +96,6 @@ LEAF(atomic_set_16, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_set_16) @@ -115,7 +110,6 @@ LEAF(atomic_clear_16, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_clear_16) @@ -130,7 +124,6 @@ LEAF(atomic_add_16, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_add_16) @@ -145,7 +138,6 @@ LEAF(atomic_subtract_16, 2) or t2, t0, t0 stq_c t0, 0(t1) beq t0, 1f - mb RET 1: br 0b END(atomic_subtract_16) diff --git a/sys/alpha/include/atomic.h b/sys/alpha/include/atomic.h index 3fe8c92967fe..ba92cd6ac735 100644 --- a/sys/alpha/include/atomic.h +++ b/sys/alpha/include/atomic.h @@ -56,7 +56,6 @@ static __inline void atomic_set_32(volatile u_int32_t *p, u_int32_t v) "bis %0, %3, %0\n\t" /* calculate new value */ "stl_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -76,7 +75,6 @@ static __inline void atomic_clear_32(volatile u_int32_t *p, u_int32_t v) "bic %0, %2, %0\n\t" /* calculate new value */ "stl_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -96,7 +94,6 @@ static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t v) "addl %0, %2, %0\n\t" /* calculate new value */ "stl_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -116,7 +113,6 @@ static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t v) "subl %0, %2, %0\n\t" /* calculate new value */ "stl_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -139,7 +135,7 @@ static __inline u_int32_t atomic_readandclear_32(volatile u_int32_t *addr) "beq %1,2f\n\t" /* if the store failed, spin */ "br 3f\n" /* it worked, exit */ "2:\tbr 1b\n" /* *addr not updated, loop */ - "3:\tmb\n" /* it worked */ + "3:\n" /* it worked */ : "=&r"(result), "=&r"(temp), "+m" (*addr) : : "memory"); @@ -158,7 +154,6 @@ static __inline void atomic_set_64(volatile u_int64_t *p, u_int64_t v) "bis %0, %2, %0\n\t" /* calculate new value */ "stq_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -178,7 +173,6 @@ static __inline void atomic_clear_64(volatile u_int64_t *p, u_int64_t v) "bic %0, %2, %0\n\t" /* calculate new value */ "stq_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -198,7 +192,6 @@ static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t v) "addq %0, %2, %0\n\t" /* calculate new value */ "stq_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -218,7 +211,6 @@ static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t v) "subq %0, %2, %0\n\t" /* calculate new value */ "stq_c %0, %1\n\t" /* attempt to store */ "beq %0, 2f\n\t" /* spin if failed */ - "mb\n\t" /* drain to memory */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "2:\tbr 1b\n" /* try again */ ".previous\n" @@ -241,7 +233,7 @@ static __inline u_int64_t atomic_readandclear_64(volatile u_int64_t *addr) "beq %1,2f\n\t" /* if the store failed, spin */ "br 3f\n" /* it worked, exit */ "2:\tbr 1b\n" /* *addr not updated, loop */ - "3:\tmb\n" /* it worked */ + "3:\n" /* it worked */ : "=&r"(result), "=&r"(temp), "+m" (*addr) : : "memory"); @@ -277,7 +269,7 @@ static __inline void \ atomic_##NAME##_acq_##WIDTH(volatile u_int##WIDTH##_t *p, u_int##WIDTH##_t v)\ { \ atomic_##NAME##_##WIDTH(p, v); \ - /* alpha_mb(); */ \ + alpha_mb(); \ } \ \ static __inline void \ @@ -291,7 +283,7 @@ static __inline void \ atomic_##NAME##_acq_##TYPE(volatile u_int##WIDTH##_t *p, u_int##WIDTH##_t v)\ { \ atomic_##NAME##_##WIDTH(p, v); \ - /* alpha_mb(); */ \ + alpha_mb(); \ } \ \ static __inline void \ @@ -382,7 +374,6 @@ atomic_cmpset_32(volatile u_int32_t* p, u_int32_t cmpval, u_int32_t newval) "mov %3, %0\n\t" /* value to store */ "stl_c %0, %1\n\t" /* attempt to store */ "beq %0, 3f\n\t" /* if it failed, spin */ - "mb\n\t" /* drain to memory */ "2:\n" /* done */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "3:\tbr 1b\n" /* try again */ @@ -413,7 +404,6 @@ atomic_cmpset_64(volatile u_int64_t* p, u_int64_t cmpval, u_int64_t newval) "mov %3, %0\n\t" /* value to store */ "stq_c %0, %1\n\t" /* attempt to store */ "beq %0, 3f\n\t" /* if it failed, spin */ - "mb\n\t" /* drain to memory */ "2:\n" /* done */ ".section .text3,\"ax\"\n" /* improve branch prediction */ "3:\tbr 1b\n" /* try again */