From f088fc84f94c1a36943e28ad704a9a740a35f877 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 5 Apr 2006 09:45:47 +0100 Subject: [PATCH] [MIPS] FPU affinity for MT ASE. Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 5 +++++ arch/mips/kernel/process.c | 11 +++++++++++ arch/mips/kernel/scall32-o32.S | 11 +++++++++++ arch/mips/kernel/setup.c | 5 ++++- arch/mips/kernel/smp-mt.c | 11 +++++++++++ arch/mips/kernel/traps.c | 30 ++++++++++++++++++++++++++++++ include/asm-mips/cpu-features.h | 2 +- include/asm-mips/fpu.h | 4 ++++ include/asm-mips/processor.h | 16 ++++++++++++++++ include/asm-mips/system.h | 32 ++++++++++++++++++++++++++++++++ 10 files changed, 125 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f9be549645..87f0b79c6b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1464,6 +1464,11 @@ config MIPS_VPE_LOADER endchoice +config MIPS_MT_FPAFF + bool "Dynamic FPU affinity for FP-intensive threads" + depends on MIPS_MT + default y + config MIPS_VPE_LOADER_TOM bool "Load VPE program into memory hidden from linux" depends on MIPS_VPE_LOADER diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8b393df460..199a06e873 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -185,6 +185,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); clear_tsk_thread_flag(p, TIF_USEDFPU); +#ifdef CONFIG_MIPS_MT_FPAFF + /* + * FPU affinity support is cleaner if we track the + * user-visible CPU affinity from the very beginning. + * The generic cpus_allowed mask will already have + * been copied from the parent before copy_thread + * is invoked. + */ + p->thread.user_cpus_allowed = p->cpus_allowed; +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 4e36b87be1..a0ac0e5f61 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -569,8 +569,19 @@ einval: li v0, -EINVAL sys sys_tkill 2 sys sys_sendfile64 5 sys sys_futex 6 +#ifdef CONFIG_MIPS_MT_FPAFF + /* + * For FPU affinity scheduling on MIPS MT processors, we need to + * intercept sys_sched_xxxaffinity() calls until we get a proper hook + * in kernel/sched.c. Considered only temporary we only support these + * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm. + */ + sys mipsmt_sys_sched_setaffinity 3 + sys mipsmt_sys_sched_getaffinity 3 +#else sys sys_sched_setaffinity 3 sys sys_sched_getaffinity 3 /* 4240 */ +#endif /* CONFIG_MIPS_MT_FPAFF */ sys sys_io_setup 2 sys sys_io_destroy 1 sys sys_io_getevents 5 diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index dcbfd27071..bcf1b10e51 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -529,7 +529,10 @@ void __init setup_arch(char **cmdline_p) int __init fpu_disable(char *s) { - cpu_data[0].options &= ~MIPS_CPU_FPU; + int i; + + for (i = 0; i < NR_CPUS; i++) + cpu_data[i].options &= ~MIPS_CPU_FPU; return 1; } diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 19b8e4b31b..57770902b9 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -150,6 +150,11 @@ void plat_smp_setup(void) unsigned long val; int i, num; +#ifdef CONFIG_MIPS_MT_FPAFF + /* If we have an FPU, enroll ourselves in the FPU-full mask */ + if (cpu_has_fpu) + cpu_set(0, mt_fpu_cpumask); +#endif /* CONFIG_MIPS_MT_FPAFF */ if (!cpu_has_mipsmt) return; @@ -312,6 +317,12 @@ void prom_smp_finish(void) { write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ)); +#ifdef CONFIG_MIPS_MT_FPAFF + /* If we have an FPU, enroll ourselves in the FPU-full mask */ + if (cpu_has_fpu) + cpu_set(smp_processor_id(), mt_fpu_cpumask); +#endif /* CONFIG_MIPS_MT_FPAFF */ + local_irq_enable(); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 6336fe8008..e9902d89dc 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -758,6 +758,36 @@ asmlinkage void do_cpu(struct pt_regs *regs) ¤t->thread.fpu.soft); if (sig) force_sig(sig, current); +#ifdef CONFIG_MIPS_MT_FPAFF + else { + /* + * MIPS MT processors may have fewer FPU contexts + * than CPU threads. If we've emulated more than + * some threshold number of instructions, force + * migration to a "CPU" that has FP support. + */ + if(mt_fpemul_threshold > 0 + && ((current->thread.emulated_fp++ + > mt_fpemul_threshold))) { + /* + * If there's no FPU present, or if the + * application has already restricted + * the allowed set to exclude any CPUs + * with FPUs, we'll skip the procedure. + */ + if (cpus_intersects(current->cpus_allowed, + mt_fpu_cpumask)) { + cpumask_t tmask; + + cpus_and(tmask, + current->thread.user_cpus_allowed, + mt_fpu_cpumask); + set_cpus_allowed(current, tmask); + current->thread.mflags |= MF_FPUBOUND; + } + } + } +#endif /* CONFIG_MIPS_MT_FPAFF */ } return; diff --git a/include/asm-mips/cpu-features.h b/include/asm-mips/cpu-features.h index 3f2b6d9ac4..254e11ed24 100644 --- a/include/asm-mips/cpu-features.h +++ b/include/asm-mips/cpu-features.h @@ -40,7 +40,7 @@ #define cpu_has_sb1_cache (cpu_data[0].options & MIPS_CPU_SB1_CACHE) #endif #ifndef cpu_has_fpu -#define cpu_has_fpu (cpu_data[0].options & MIPS_CPU_FPU) +#define cpu_has_fpu (current_cpu_data.options & MIPS_CPU_FPU) #endif #ifndef cpu_has_32fpr #define cpu_has_32fpr (cpu_data[0].options & MIPS_CPU_32FPR) diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h index 9c828b1f82..b0f50015e2 100644 --- a/include/asm-mips/fpu.h +++ b/include/asm-mips/fpu.h @@ -21,6 +21,10 @@ #include #include +#ifdef CONFIG_MIPS_MT_FPAFF +#include +#endif + struct sigcontext; struct sigcontext32; diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h index 786651340d..0fb75f0762 100644 --- a/include/asm-mips/processor.h +++ b/include/asm-mips/processor.h @@ -134,6 +134,12 @@ struct thread_struct { /* Saved fpu/fpu emulator stuff. */ union mips_fpu_union fpu; +#ifdef CONFIG_MIPS_MT_FPAFF + /* Emulated instruction count */ + unsigned long emulated_fp; + /* Saved per-thread scheduler affinity mask */ + cpumask_t user_cpus_allowed; +#endif /* CONFIG_MIPS_MT_FPAFF */ /* Saved state of the DSP ASE, if available. */ struct mips_dsp_state dsp; @@ -159,6 +165,12 @@ struct thread_struct { #define MF_N32 MF_32BIT_ADDR #define MF_N64 0 +#ifdef CONFIG_MIPS_MT_FPAFF +#define FPAFF_INIT 0, INIT_CPUMASK, +#else +#define FPAFF_INIT +#endif /* CONFIG_MIPS_MT_FPAFF */ + #define INIT_THREAD { \ /* \ * saved main processor registers \ @@ -173,6 +185,10 @@ struct thread_struct { * saved fpu/fpu emulator stuff \ */ \ INIT_FPU, \ + /* \ + * fpu affinity state (null if not FPAFF) \ + */ \ + FPAFF_INIT \ /* \ * saved dsp/dsp emulator stuff \ */ \ diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index 39026690d9..261f71d16a 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h @@ -155,6 +155,37 @@ extern asmlinkage void *resume(void *last, void *next, void *next_ti); struct task_struct; +#ifdef CONFIG_MIPS_MT_FPAFF + +/* + * Handle the scheduler resume end of FPU affinity management. We do this + * inline to try to keep the overhead down. If we have been forced to run on + * a "CPU" with an FPU because of a previous high level of FP computation, + * but did not actually use the FPU during the most recent time-slice (CU1 + * isn't set), we undo the restriction on cpus_allowed. + * + * We're not calling set_cpus_allowed() here, because we have no need to + * force prompt migration - we're already switching the current CPU to a + * different thread. + */ + +#define switch_to(prev,next,last) \ +do { \ + if (cpu_has_fpu && \ + (prev->thread.mflags & MF_FPUBOUND) && \ + (!(KSTK_STATUS(prev) & ST0_CU1))) { \ + prev->thread.mflags &= ~MF_FPUBOUND; \ + prev->cpus_allowed = prev->thread.user_cpus_allowed; \ + } \ + if (cpu_has_dsp) \ + __save_dsp(prev); \ + next->thread.emulated_fp = 0; \ + (last) = resume(prev, next, next->thread_info); \ + if (cpu_has_dsp) \ + __restore_dsp(current); \ +} while(0) + +#else #define switch_to(prev,next,last) \ do { \ if (cpu_has_dsp) \ @@ -163,6 +194,7 @@ do { \ if (cpu_has_dsp) \ __restore_dsp(current); \ } while(0) +#endif /* * On SMP systems, when the scheduler does migration-cost autodetection, -- 2.39.5