Remove extranious memory barriers, and correct the placement of a few others.

This provides a 30% reduction in system time and a 6% reduction in wallclock time
for a make buildworld on my xp1000 (one 21264).

FWIW, I've been running this for nearly 2 months without problems.

Portions submitted by: ticso, jhb
Tested by: jhb (ds20 dual 21264)
This commit is contained in:
Andrew Gallatin 2002-10-30 01:41:44 +00:00
parent 206f3c68cc
commit 7a966f2ded
2 changed files with 4 additions and 22 deletions

View File

@ -40,7 +40,6 @@ LEAF(atomic_set_8, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_set_8)
@ -55,7 +54,6 @@ LEAF(atomic_clear_8, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_clear_8)
@ -70,7 +68,6 @@ LEAF(atomic_add_8, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_add_8)
@ -85,7 +82,6 @@ LEAF(atomic_subtract_8, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_subtract_8)
@ -100,7 +96,6 @@ LEAF(atomic_set_16, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_set_16)
@ -115,7 +110,6 @@ LEAF(atomic_clear_16, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_clear_16)
@ -130,7 +124,6 @@ LEAF(atomic_add_16, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_add_16)
@ -145,7 +138,6 @@ LEAF(atomic_subtract_16, 2)
or t2, t0, t0
stq_c t0, 0(t1)
beq t0, 1f
mb
RET
1: br 0b
END(atomic_subtract_16)

View File

@ -56,7 +56,6 @@ static __inline void atomic_set_32(volatile u_int32_t *p, u_int32_t v)
"bis %0, %3, %0\n\t" /* calculate new value */
"stl_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -76,7 +75,6 @@ static __inline void atomic_clear_32(volatile u_int32_t *p, u_int32_t v)
"bic %0, %2, %0\n\t" /* calculate new value */
"stl_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -96,7 +94,6 @@ static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t v)
"addl %0, %2, %0\n\t" /* calculate new value */
"stl_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -116,7 +113,6 @@ static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t v)
"subl %0, %2, %0\n\t" /* calculate new value */
"stl_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -139,7 +135,7 @@ static __inline u_int32_t atomic_readandclear_32(volatile u_int32_t *addr)
"beq %1,2f\n\t" /* if the store failed, spin */
"br 3f\n" /* it worked, exit */
"2:\tbr 1b\n" /* *addr not updated, loop */
"3:\tmb\n" /* it worked */
"3:\n" /* it worked */
: "=&r"(result), "=&r"(temp), "+m" (*addr)
:
: "memory");
@ -158,7 +154,6 @@ static __inline void atomic_set_64(volatile u_int64_t *p, u_int64_t v)
"bis %0, %2, %0\n\t" /* calculate new value */
"stq_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -178,7 +173,6 @@ static __inline void atomic_clear_64(volatile u_int64_t *p, u_int64_t v)
"bic %0, %2, %0\n\t" /* calculate new value */
"stq_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -198,7 +192,6 @@ static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t v)
"addq %0, %2, %0\n\t" /* calculate new value */
"stq_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -218,7 +211,6 @@ static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t v)
"subq %0, %2, %0\n\t" /* calculate new value */
"stq_c %0, %1\n\t" /* attempt to store */
"beq %0, 2f\n\t" /* spin if failed */
"mb\n\t" /* drain to memory */
".section .text3,\"ax\"\n" /* improve branch prediction */
"2:\tbr 1b\n" /* try again */
".previous\n"
@ -241,7 +233,7 @@ static __inline u_int64_t atomic_readandclear_64(volatile u_int64_t *addr)
"beq %1,2f\n\t" /* if the store failed, spin */
"br 3f\n" /* it worked, exit */
"2:\tbr 1b\n" /* *addr not updated, loop */
"3:\tmb\n" /* it worked */
"3:\n" /* it worked */
: "=&r"(result), "=&r"(temp), "+m" (*addr)
:
: "memory");
@ -277,7 +269,7 @@ static __inline void \
atomic_##NAME##_acq_##WIDTH(volatile u_int##WIDTH##_t *p, u_int##WIDTH##_t v)\
{ \
atomic_##NAME##_##WIDTH(p, v); \
/* alpha_mb(); */ \
alpha_mb(); \
} \
\
static __inline void \
@ -291,7 +283,7 @@ static __inline void \
atomic_##NAME##_acq_##TYPE(volatile u_int##WIDTH##_t *p, u_int##WIDTH##_t v)\
{ \
atomic_##NAME##_##WIDTH(p, v); \
/* alpha_mb(); */ \
alpha_mb(); \
} \
\
static __inline void \
@ -382,7 +374,6 @@ atomic_cmpset_32(volatile u_int32_t* p, u_int32_t cmpval, u_int32_t newval)
"mov %3, %0\n\t" /* value to store */
"stl_c %0, %1\n\t" /* attempt to store */
"beq %0, 3f\n\t" /* if it failed, spin */
"mb\n\t" /* drain to memory */
"2:\n" /* done */
".section .text3,\"ax\"\n" /* improve branch prediction */
"3:\tbr 1b\n" /* try again */
@ -413,7 +404,6 @@ atomic_cmpset_64(volatile u_int64_t* p, u_int64_t cmpval, u_int64_t newval)
"mov %3, %0\n\t" /* value to store */
"stq_c %0, %1\n\t" /* attempt to store */
"beq %0, 3f\n\t" /* if it failed, spin */
"mb\n\t" /* drain to memory */
"2:\n" /* done */
".section .text3,\"ax\"\n" /* improve branch prediction */
"3:\tbr 1b\n" /* try again */