From f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 28 Sep 2006 01:45:21 +0100 Subject: [PATCH] [MIPS] Improve branch prediction in ll/sc atomic operations. Now that finally all supported versions of binutils have functioning support for .subsection use .subsection to tweak the branch prediction I did not modify the R10000 errata variants because it seems unclear if this will invalidate the workaround which actually relies on the cheesy prediction of branch likely to cause a misspredict if the sc was successful. Signed-off-by: Ralf Baechle --- include/asm-mips/atomic.h | 50 ++++++++++++++++++++++++++------- include/asm-mips/bitops.h | 33 ++++++++++++++++++---- include/asm-mips/spinlock.h | 56 +++++++++++++++++++++++++++++-------- include/asm-mips/system.h | 20 ++++++++++--- 4 files changed, 127 insertions(+), 32 deletions(-) diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h index c1a2409bb5..8578869a8b 100644 --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -69,7 +69,10 @@ static __inline__ void atomic_add(int i, atomic_t * v) "1: ll %0, %1 # atomic_add \n" " addu %0, %2 \n" " sc %0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); @@ -111,7 +114,10 @@ static __inline__ void atomic_sub(int i, atomic_t * v) "1: ll %0, %1 # atomic_sub \n" " subu %0, %2 \n" " sc %0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); @@ -155,8 +161,11 @@ static __inline__ int atomic_add_return(int i, atomic_t * v) "1: ll %1, %2 # atomic_add_return \n" " addu %0, %1, %3 \n" " sc %0, %2 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " addu %0, %1, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -204,8 +213,11 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v) "1: ll %1, %2 # atomic_sub_return \n" " subu %0, %1, %3 \n" " sc %0, %2 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " subu %0, %1, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -267,10 +279,13 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " bltz %0, 1f \n" " sc %0, %2 \n" " .set noreorder \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " subu %0, %1, %3 \n" " .set reorder \n" "1: \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -429,7 +444,10 @@ static __inline__ void atomic64_add(long i, atomic64_t * v) "1: lld %0, %1 # atomic64_add \n" " addu %0, %2 \n" " scd %0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); @@ -471,7 +489,10 @@ static __inline__ void atomic64_sub(long i, atomic64_t * v) "1: lld %0, %1 # atomic64_sub \n" " subu %0, %2 \n" " scd %0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter)); @@ -515,8 +536,11 @@ static __inline__ long atomic64_add_return(long i, atomic64_t * v) "1: lld %1, %2 # atomic64_add_return \n" " addu %0, %1, %3 \n" " scd %0, %2 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " addu %0, %1, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -564,8 +588,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) "1: lld %1, %2 # atomic64_sub_return \n" " subu %0, %1, %3 \n" " scd %0, %2 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " subu %0, %1, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -627,10 +654,13 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " bltz %0, 1f \n" " scd %0, %2 \n" " .set noreorder \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" " dsubu %0, %1, %3 \n" " .set reorder \n" "1: \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h index 06445de132..06c08228a5 100644 --- a/include/asm-mips/bitops.h +++ b/include/asm-mips/bitops.h @@ -68,7 +68,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) "1: " __LL "%0, %1 # set_bit \n" " or %0, %2 \n" " " __SC "%0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (*m) : "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m)); @@ -116,7 +119,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) "1: " __LL "%0, %1 # clear_bit \n" " and %0, %2 \n" " " __SC "%0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (*m) : "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m)); @@ -166,7 +172,10 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) "1: " __LL "%0, %1 # change_bit \n" " xor %0, %2 \n" " " __SC "%0, %1 \n" - " beqz %0, 1b \n" + " beqz %0, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (temp), "=m" (*m) : "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m)); @@ -222,8 +231,12 @@ static inline int test_and_set_bit(unsigned long nr, "1: " __LL "%0, %1 # test_and_set_bit \n" " or %2, %0, %3 \n" " " __SC "%2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" " and %2, %0, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " nop \n" + " .previous \n" " .set pop \n" : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << (nr & SZLONG_MASK)), "m" (*m) @@ -290,8 +303,12 @@ static inline int test_and_clear_bit(unsigned long nr, " or %2, %0, %3 \n" " xor %2, %3 \n" " " __SC "%2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" " and %2, %0, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " nop \n" + " .previous \n" " .set pop \n" : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << (nr & SZLONG_MASK)), "m" (*m) @@ -356,8 +373,12 @@ static inline int test_and_change_bit(unsigned long nr, "1: " __LL "%0, %1 # test_and_change_bit \n" " xor %2, %0, %3 \n" " " __SC "\t%2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" " and %2, %0, %3 \n" + " .subsection 2 \n" + "2: b 1b \n" + " nop \n" + " .previous \n" " .set pop \n" : "=&r" (temp), "=m" (*m), "=&r" (res) : "r" (1UL << (nr & SZLONG_MASK)), "m" (*m) diff --git a/include/asm-mips/spinlock.h b/include/asm-mips/spinlock.h index fc3217fc11..f1755d28a3 100644 --- a/include/asm-mips/spinlock.h +++ b/include/asm-mips/spinlock.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999, 2000, 06 by Ralf Baechle + * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ #ifndef _ASM_SPINLOCK_H @@ -49,11 +49,18 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) __asm__ __volatile__( " .set noreorder # __raw_spin_lock \n" "1: ll %1, %2 \n" - " bnez %1, 1b \n" + " bnez %1, 2f \n" " li %1, 1 \n" " sc %1, %0 \n" - " beqz %1, 1b \n" + " beqz %1, 2f \n" " nop \n" + " .subsection 2 \n" + "2: ll %1, %2 \n" + " bnez %1, 2b \n" + " li %1, 1 \n" + " b 1b \n" + " nop \n" + " .previous \n" " .set reorder \n" : "=m" (lock->lock), "=&r" (tmp) : "m" (lock->lock) @@ -99,8 +106,12 @@ static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock) "1: ll %0, %3 \n" " ori %2, %0, 1 \n" " sc %2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" " andi %2, %0, 1 \n" + " .subsection 2 \n" + "2: b 1b \n" + " nop \n" + " .previous \n" " .set reorder" : "=&r" (temp), "=m" (lock->lock), "=&r" (res) : "m" (lock->lock) @@ -154,11 +165,18 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) __asm__ __volatile__( " .set noreorder # __raw_read_lock \n" "1: ll %1, %2 \n" - " bltz %1, 1b \n" + " bltz %1, 2f \n" " addu %1, 1 \n" " sc %1, %0 \n" " beqz %1, 1b \n" " nop \n" + " .subsection 2 \n" + "2: ll %1, %2 \n" + " bltz %1, 2b \n" + " addu %1, 1 \n" + " b 1b \n" + " nop \n" + " .previous \n" " .set reorder \n" : "=m" (rw->lock), "=&r" (tmp) : "m" (rw->lock) @@ -192,8 +210,12 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) "1: ll %1, %2 \n" " sub %1, 1 \n" " sc %1, %0 \n" - " beqz %1, 1b \n" + " beqz %1, 2f \n" + " nop \n" + " .subsection 2 \n" + "2: b 1b \n" " nop \n" + " .previous \n" " .set reorder \n" : "=m" (rw->lock), "=&r" (tmp) : "m" (rw->lock) @@ -222,11 +244,18 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) __asm__ __volatile__( " .set noreorder # __raw_write_lock \n" "1: ll %1, %2 \n" - " bnez %1, 1b \n" + " bnez %1, 2f \n" " lui %1, 0x8000 \n" " sc %1, %0 \n" - " beqz %1, 1b \n" + " beqz %1, 2f \n" + " nop \n" + " .subsection 2 \n" + "2: ll %1, %2 \n" + " bnez %1, 2b \n" + " lui %1, 0x8000 \n" + " b 1b \n" " nop \n" + " .previous \n" " .set reorder \n" : "=m" (rw->lock), "=&r" (tmp) : "m" (rw->lock) @@ -322,12 +351,15 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) " bnez %1, 2f \n" " lui %1, 0x8000 \n" " sc %1, %0 \n" - " beqz %1, 1b \n" - " nop \n" + " beqz %1, 3f \n" + " li %2, 1 \n" + "2: \n" __WEAK_ORDERING_MB - " li %2, 1 \n" + " .subsection 2 \n" + "3: b 1b \n" + " li %2, 0 \n" + " .previous \n" " .set reorder \n" - "2: \n" : "=m" (rw->lock), "=&r" (tmp), "=&r" (ret) : "m" (rw->lock) : "memory"); diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index 5e1289c85e..597a3743f6 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h @@ -110,7 +110,10 @@ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val) " move %2, %z4 \n" " .set mips3 \n" " sc %2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (retval), "=m" (*m), "=&r" (dummy) : "R" (*m), "Jr" (val) @@ -155,7 +158,10 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val) "1: lld %0, %3 # xchg_u64 \n" " move %2, %z4 \n" " scd %2, %1 \n" - " beqz %2, 1b \n" + " beqz %2, 2f \n" + " .subsection 2 \n" + "2: b 1b \n" + " .previous \n" " .set mips0 \n" : "=&r" (retval), "=m" (*m), "=&r" (dummy) : "R" (*m), "Jr" (val) @@ -232,8 +238,11 @@ static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, " move $1, %z4 \n" " .set mips3 \n" " sc $1, %1 \n" - " beqz $1, 1b \n" + " beqz $1, 3f \n" "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" " .set pop \n" : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) @@ -283,8 +292,11 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old, " bne %0, %z3, 2f \n" " move $1, %z4 \n" " scd $1, %1 \n" - " beqz $1, 1b \n" + " beqz $1, 3f \n" "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" " .set pop \n" : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) -- 2.39.5