From: Paul Mackerras Date: Mon, 27 Mar 2006 04:06:20 +0000 (+1100) Subject: powerpc: Simplify pSeries idle loop X-Git-Tag: v2.6.17-rc1~165^2~34 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fbd7740fdfdf9475f92287a84085a1913541cd5d;p=linux-2.6 powerpc: Simplify pSeries idle loop Since pSeries only wants to do something different in the idle loop when there is no work to do, we can simplify the code by implementing ppc_md.power_save functions instead of complete idle loops. There are two versions: one for shared-processor partitions and one for dedicated- processor partitions. With this we also do a cede_processor() call on dedicated processor partitions if the poll_pending() call indicates that the hypervisor has work it wants to do. Signed-off-by: Paul Mackerras --- diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 44d5c7fdcd..213bf98324 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -81,8 +81,8 @@ extern void find_udbg_vterm(void); int fwnmi_active; /* TRUE if an FWNMI handler is present */ -static void pseries_shared_idle(void); -static void pseries_dedicated_idle(void); +static void pseries_shared_idle_sleep(void); +static void pseries_dedicated_idle_sleep(void); struct mpic *pSeries_mpic; @@ -236,14 +236,13 @@ static void __init pSeries_setup_arch(void) vpa_init(boot_cpuid); if (get_lppaca()->shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); - ppc_md.idle_loop = pseries_shared_idle; + ppc_md.power_save = pseries_shared_idle_sleep; } else { printk(KERN_INFO "Using dedicated idle loop\n"); - ppc_md.idle_loop = pseries_dedicated_idle; + ppc_md.power_save = pseries_dedicated_idle_sleep; } } else { printk(KERN_INFO "Using default idle loop\n"); - ppc_md.idle_loop = default_idle; } if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -393,136 +392,87 @@ static int __init pSeries_probe(int platform) DECLARE_PER_CPU(unsigned long, smt_snooze_delay); -static inline void dedicated_idle_sleep(unsigned int cpu) -{ - struct lppaca *plppaca = &lppaca[cpu ^ 1]; - - /* Only sleep if the other thread is not idle */ - if (!(plppaca->idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb__after_clear_bit(); - - /* - * SMT dynamic mode. Cede will result in this thread going - * dormant, if the partner thread is still doing work. Thread - * wakes up if partner goes idle, an interrupt is presented, or - * a prod occurs. Returning from the cede enables external - * interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - } else { - /* - * Give the HV an opportunity at the processor, since we are - * not doing any work. - */ - poll_pending(); - } -} - -static void pseries_dedicated_idle(void) +static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - set_thread_flag(TIF_POLLING_NRFLAG); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - if (!need_resched()) { - start_snooze = get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); - - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay != 0 && - get_tb() > start_snooze) { - HMT_medium(); - dedicated_idle_sleep(cpu); - } - - } - - HMT_medium(); - } - get_lppaca()->idle = 0; - ppc64_runlatch_on(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * We come in with interrupts disabled, and need_resched() + * has been checked recently. If we should poll for a little + * while, do so. + */ + if (*smt_snooze_delay) { + start_snooze = get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); + while (get_tb() < start_snooze) { + if (need_resched() || cpu_is_offline(cpu)) + goto out; + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + local_irq_disable(); + if (need_resched() || cpu_is_offline(cpu)) + goto out; } + + /* + * Cede if the other thread is not idle, so that it can + * go single-threaded. If the other thread is idle, + * we ask the hypervisor if it has pending work it + * wants to do and cede if it does. Otherwise we keep + * polling in order to reduce interrupt latency. + * + * Doing the cede when the other thread is active will + * result in this thread going dormant, meaning the other + * thread gets to run in single-threaded (ST) mode, which + * is slightly faster than SMT mode with this thread at + * very low priority. The cede enables interrupts, which + * doesn't matter here. + */ + if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending) + cede_processor(); + +out: + HMT_medium(); + get_lppaca()->idle = 0; } -static void pseries_shared_idle(void) +static void pseries_shared_idle_sleep(void) { unsigned int cpu = smp_processor_id(); - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); - ppc64_runlatch_off(); - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - - HMT_medium(); - } - - get_lppaca()->idle = 0; - ppc64_runlatch_on(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } + get_lppaca()->idle = 0; } static int pSeries_pci_probe_mode(struct pci_bus *bus) diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index 38ca9ad611..b72c04f3f5 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -9,6 +9,7 @@ #define H_Closed 2 /* Resource closed */ #define H_Constrained 4 /* Resource request constrained to max allowed */ #define H_InProgress 14 /* Kind of like busy */ +#define H_Pending 17 /* returned from H_POLL_PENDING */ #define H_Continue 18 /* Returned from H_Join on success */ #define H_LongBusyStartRange 9900 /* Start of long busy range */ #define H_LongBusyOrder1msec 9900 /* Long busy, hint that 1msec is a good time to retry */