From: Nicolas Pitre Date: Mon, 31 Mar 2008 16:38:31 +0000 (-0400) Subject: [ARM] cache align destination pointer when copying memory for some processors X-Git-Tag: v2.6.27-rc1~850^2~2^8~51 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2239aff6ab2b95af1f628eee7a809f21c41605b3;p=linux-2.6 [ARM] cache align destination pointer when copying memory for some processors The implementation for memory copy functions on ARM had a (disabled) provision for aligning the source pointer before loading registers with data. Turns out that aligning the _destination_ pointer is much more useful, as the read side is already sufficiently helped with the use of preload. So this changes the definition of the CALGN() macro to target the destination pointer instead, and turns it on for Feroceon processors where the gain is very noticeable. Signed-off-by: Nicolas Pitre Signed-off-by: Lennert Buytenhek --- diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index cab355c0c1..139cce6460 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -12,14 +12,6 @@ * published by the Free Software Foundation. */ -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - /* * Theory of operation * ------------------- @@ -82,7 +74,7 @@ stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb r3, ip, #32 ) CALGN( sbcnes r4, r3, r2 ) @ C is always set here CALGN( bcs 2f ) @@ -168,7 +160,7 @@ subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb ip, ip, #32 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 018522c3ff..2e301b7bd8 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -13,14 +13,6 @@ #include #include -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - .text /* @@ -55,7 +47,7 @@ ENTRY(memmove) stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( bcs 2f ) CALGN( adr r4, 6f ) @@ -139,7 +131,7 @@ ENTRY(memmove) subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) CALGN( bcc 15f ) diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h index fce8328208..911393b2c6 100644 --- a/include/asm-arm/assembler.h +++ b/include/asm-arm/assembler.h @@ -55,6 +55,21 @@ #define PLD(code...) #endif +/* + * This can be used to enable code to cacheline align the destination + * pointer when bulk writing to memory. Experiments on StrongARM and + * XScale didn't show this a worthwhile thing to do when the cache is not + * set to write-allocate (this would need further testing on XScale when WA + * is used). + * + * On Feroceon there is much to gain however, regardless of cache mode. + */ +#ifdef CONFIG_CPU_FEROCEON +#define CALGN(code...) code +#else +#define CALGN(code...) +#endif + /* * Enable and disable interrupts */