From ffe483d55229fadbaf4cc7316d47024a24ecd1a2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Feb 2006 21:55:10 -0800 Subject: [PATCH] [SPARC64]: Add explicit register args to trap state loading macros. This, as well as making the code cleaner, allows a simplification in the TSB miss handling path. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 8 ++-- arch/sparc64/kernel/etrap.S | 10 ++-- arch/sparc64/kernel/rtrap.S | 2 +- arch/sparc64/kernel/tsb.S | 9 +--- arch/sparc64/kernel/winfixup.S | 18 +++---- include/asm-sparc64/cpudata.h | 88 +++++++++++++++++----------------- 6 files changed, 64 insertions(+), 71 deletions(-) diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index b3511ff5d0..4ca3ea0bea 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,7 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: TRAP_LOAD_THREAD_REG +1: TRAP_LOAD_THREAD_REG(%g6, %g1) ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 @@ -190,7 +190,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -378,7 +378,7 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 - TRAP_LOAD_IRQ_WORK + TRAP_LOAD_IRQ_WORK(%g6, %g1) lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ @@ -422,7 +422,7 @@ setcc: .globl utrap_trap utrap_trap: /* %g3=handler,%g4=level */ - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_UTRAPS], %g1 brnz,pt %g1, invoke_utrap nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index d974d18b15..b5f6bc52d9 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,7 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -100,7 +100,7 @@ etrap_irq: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 - LOAD_PER_CPU_BASE(%g4, %g3, %l1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -124,7 +124,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -179,7 +179,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. .align 64 .globl scetrap scetrap: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %pil, %g2 rdpr %tstate, %g1 sllx %g2, 20, %g3 @@ -250,7 +250,7 @@ scetrap: stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] - LOAD_PER_CPU_BASE(%g4, %g3, %l1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 64bc03610b..61bd45e769 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -226,7 +226,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brz,pt %l3, 1f nop /* Must do this before thread reg is clobbered below. */ - LOAD_PER_CPU_BASE(%i0, %i1, %i2) + LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2) 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index ff6a79beb9..28e38b168d 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -36,14 +36,7 @@ tsb_miss_itlb: nop tsb_miss_page_table_walk: - /* This clobbers %g1 and %g6, preserve them... */ - mov %g1, %g5 - mov %g6, %g2 - - TRAP_LOAD_PGD_PHYS - - mov %g2, %g6 - mov %g5, %g1 + TRAP_LOAD_PGD_PHYS(%g7, %g5) USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 320a762d05..211021ae6e 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -40,7 +40,7 @@ set_pcontext: */ .globl fill_fixup, spill_fixup fill_fixup: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 or %g4, FAULT_CODE_WINFIXUP, %g4 @@ -86,7 +86,7 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -105,7 +105,7 @@ fill_fixup: * do not touch %g7 or %g2 so we handle the two cases fine. */ spill_fixup: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -181,7 +181,7 @@ winfix_mna: wrpr %g3, %tnpc done fill_fixup_mna: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_mna_from_user_common @@ -209,14 +209,14 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) call mem_address_unaligned add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_mna: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -284,7 +284,7 @@ winfix_dax: wrpr %g3, %tnpc done fill_fixup_dax: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_dax_from_user_common @@ -312,14 +312,14 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_dax: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index da54b4f354..c15514f82c 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -107,67 +107,67 @@ extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ .previous; -/* Clobbers %g1, current address space PGD phys address into %g7. */ -#define TRAP_LOAD_PGD_PHYS \ - __GET_CPUID(%g1) \ - sethi %hi(trap_block), %g7; \ - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ - or %g7, %lo(trap_block), %g7; \ - add %g7, %g1, %g7; \ - ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; - -/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ -#define TRAP_LOAD_IRQ_WORK \ - __GET_CPUID(%g1) \ - sethi %hi(__irq_work), %g6; \ - sllx %g1, 6, %g1; \ - or %g6, %lo(__irq_work), %g6; \ - add %g6, %g1, %g6; - -/* Clobbers %g1, loads %g6 with current thread info pointer. */ -#define TRAP_LOAD_THREAD_REG \ - __GET_CPUID(%g1) \ - sethi %hi(trap_block), %g6; \ - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ - or %g6, %lo(trap_block), %g6; \ - ldx [%g6 + %g1], %g6; - -/* Given the current thread info pointer in %g6, load the per-cpu - * area base of the current processor into %g5. REG1, REG2, and REG3 are +/* Clobbers TMP, current address space PGD phys address into DEST. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + add DEST, TMP, DEST; \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +/* Clobbers TMP, loads local processor's IRQ work area into DEST. */ +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(__irq_work), DEST; \ + sllx TMP, 6, TMP; \ + or DEST, %lo(__irq_work), DEST; \ + add DEST, TMP, DEST; + +/* Clobbers TMP, loads DEST with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + ldx [DEST + TMP], DEST; + +/* Given the current thread info pointer in THR, load the per-cpu + * area base of the current processor into DEST. REG1, REG2, and REG3 are * clobbered. * - * You absolutely cannot use %g5 as a temporary in this code. The + * You absolutely cannot use DEST as a temporary in this code. The * reason is that traps can happen during execution, and return from - * trap will load the fully resolved %g5 per-cpu base. This can corrupt + * trap will load the fully resolved DEST per-cpu base. This can corrupt * the calculations done by the macro mid-stream. */ -#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ - ldub [%g6 + TI_CPU], REG1; \ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ + ldub [THR + TI_CPU], REG1; \ sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_base), REG2; \ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ ldx [REG2 + %lo(__per_cpu_base)], REG2; \ sllx REG1, REG3, REG3; \ - add REG3, REG2, %g5; + add REG3, REG2, DEST; #else /* Uniprocessor versions, we know the cpuid is zero. */ -#define TRAP_LOAD_PGD_PHYS \ - sethi %hi(trap_block), %g7; \ - or %g7, %lo(trap_block), %g7; \ - ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + or DEST, %lo(trap_block), DEST; \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; -#define TRAP_LOAD_IRQ_WORK \ - sethi %hi(__irq_work), %g6; \ - or %g6, %lo(__irq_work), %g6; +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + sethi %hi(__irq_work), DEST; \ + or DEST, %lo(__irq_work), DEST; -#define TRAP_LOAD_THREAD_REG \ - sethi %hi(trap_block), %g6; \ - ldx [%g6 + %lo(trap_block)], %g6; +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + ldx [DEST + %lo(trap_block)], DEST; -/* No per-cpu areas on uniprocessor, so no need to load %g5. */ -#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) +/* No per-cpu areas on uniprocessor, so no need to load DEST. */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) #endif /* !(CONFIG_SMP) */ -- 2.39.5