From: Peter Zijlstra Date: Fri, 25 Jan 2008 20:08:31 +0000 (+0100) Subject: sched: rt throttling vs no_hz X-Git-Tag: v2.6.25-rc1~1237^2~22 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=48d5e258216f1c7713633439beb98a38c7290649;p=linux-2.6 sched: rt throttling vs no_hz We need to teach no_hz about the rt throttling because its tick driven. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- diff --git a/include/linux/sched.h b/include/linux/sched.h index 04eecbf024..acadcab89e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -230,6 +230,8 @@ static inline int select_nohz_load_balancer(int cpu) } #endif +extern unsigned long rt_needs_cpu(int cpu); + /* * Only dump TASK_* tasks. (0 for all tasks) */ diff --git a/kernel/sched.c b/kernel/sched.c index 5ea2c533b4..22712b2e05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -442,6 +442,7 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; u64 rt_period_expire; + int rt_throttled; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ @@ -594,6 +595,23 @@ static void update_rq_clock(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +unsigned long rt_needs_cpu(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + u64 delta; + + if (!rq->rt_throttled) + return 0; + + if (rq->clock > rq->rt_period_expire) + return 1; + + delta = rq->rt_period_expire - rq->clock; + do_div(delta, NSEC_PER_SEC / HZ); + + return (unsigned long)delta; +} + /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -7102,9 +7120,11 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) /* delimiter for bitsearch: */ __set_bit(MAX_RT_PRIO, array->bitmap); +#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED + rt_rq->highest_prio = MAX_RT_PRIO; +#endif #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; - rt_rq->highest_prio = MAX_RT_PRIO; rt_rq->overloaded = 0; #endif @@ -7191,6 +7211,7 @@ void __init sched_init(void) &per_cpu(init_sched_rt_entity, i), i, 1); #endif rq->rt_period_expire = 0; + rq->rt_throttled = 0; for (j = 0; j < CPU_LOAD_IDX_MAX; j++) rq->cpu_load[j] = 0; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1144bf5566..8bfdb3f8a5 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -175,7 +175,11 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; if (rt_rq->rt_time > ratio) { + struct rq *rq = rq_of_rt_rq(rt_rq); + + rq->rt_throttled = 1; rt_rq->rt_throttled = 1; + sched_rt_ratio_dequeue(rt_rq); return 1; } @@ -183,18 +187,6 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) return 0; } -static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period) -{ - unsigned long rt_ratio = sched_rt_ratio(rt_rq); - u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; - - rt_rq->rt_time -= min(rt_rq->rt_time, ratio); - if (rt_rq->rt_throttled) { - rt_rq->rt_throttled = 0; - sched_rt_ratio_enqueue(rt_rq); - } -} - static void update_sched_rt_period(struct rq *rq) { struct rt_rq *rt_rq; @@ -204,8 +196,18 @@ static void update_sched_rt_period(struct rq *rq) period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; rq->rt_period_expire += period; - for_each_leaf_rt_rq(rt_rq, rq) - __update_sched_rt_period(rt_rq, period); + for_each_leaf_rt_rq(rt_rq, rq) { + unsigned long rt_ratio = sched_rt_ratio(rt_rq); + u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; + + rt_rq->rt_time -= min(rt_rq->rt_time, ratio); + if (rt_rq->rt_throttled) { + rt_rq->rt_throttled = 0; + sched_rt_ratio_enqueue(rt_rq); + } + } + + rq->rt_throttled = 0; } } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cb89fa8db1..5f9fb645b7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void) void tick_nohz_stop_sched_tick(void) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; + unsigned long rt_jiffies; struct tick_sched *ts; ktime_t last_update, expires, now, delta; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; @@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void) next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + rt_jiffies = rt_needs_cpu(cpu); + if (rt_jiffies && rt_jiffies < delta_jiffies) + delta_jiffies = rt_jiffies; + if (rcu_needs_cpu(cpu)) delta_jiffies = 1; /*