From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 15:00:06 +0000 (+0200)
Subject: sched: remove wait_runtime limit
X-Git-Tag: v2.6.24-rc1~1289^2~108
X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e22f5bbf86d8cce710d5c8ba5bf57832e73aab8c;p=linux-2.6

sched: remove wait_runtime limit

remove the wait_runtime-limit fields and the code depending on it, now
that the math has been changed over to rely on the vruntime metric.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e5c457fba..353630d6ae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -905,7 +905,6 @@ struct sched_entity {
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
 	u64			wait_start_fair;
-	u64			sleep_start_fair;
 
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a4ac0b75f..21cc3b2be0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -180,7 +180,6 @@ struct cfs_rq {
 	u64 exec_clock;
 	u64 min_vruntime;
 	s64 wait_runtime;
-	u64 sleeper_bonus;
 	unsigned long wait_runtime_overruns, wait_runtime_underruns;
 
 	struct rb_root tasks_timeline;
@@ -673,19 +672,6 @@ static inline void resched_task(struct task_struct *p)
 }
 #endif
 
-static u64 div64_likely32(u64 divident, unsigned long divisor)
-{
-#if BITS_PER_LONG == 32
-	if (likely(divident <= 0xffffffffULL))
-		return (u32)divident / divisor;
-	do_div(divident, divisor);
-
-	return divident;
-#else
-	return divident / divisor;
-#endif
-}
-
 #if BITS_PER_LONG == 32
 # define WMULT_CONST	(~0UL)
 #else
@@ -1016,8 +1002,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	if (p->se.wait_start_fair)
 		p->se.wait_start_fair -= fair_clock_offset;
-	if (p->se.sleep_start_fair)
-		p->se.sleep_start_fair -= fair_clock_offset;
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -1592,7 +1576,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.wait_runtime		= 0;
-	p->se.sleep_start_fair		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start		= 0;
@@ -6582,7 +6565,6 @@ void normalize_rt_tasks(void)
 		p->se.wait_runtime		= 0;
 		p->se.exec_start		= 0;
 		p->se.wait_start_fair		= 0;
-		p->se.sleep_start_fair		= 0;
 #ifdef CONFIG_SCHEDSTATS
 		p->se.wait_start		= 0;
 		p->se.sleep_start		= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 62965f0ae3..3350169a7d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -148,7 +148,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	P(wait_runtime);
 	P(wait_runtime_overruns);
 	P(wait_runtime_underruns);
-	P(sleeper_bonus);
 #undef P
 
 	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
@@ -272,7 +271,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.wait_runtime);
 	P(se.wait_start_fair);
 	P(se.exec_start);
-	P(se.sleep_start_fair);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72f202a8be..a94189c42d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -249,41 +249,11 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return period;
 }
 
-static inline void
-limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	long limit = sysctl_sched_runtime_limit;
-
-	/*
-	 * Niced tasks have the same history dynamic range as
-	 * non-niced tasks:
-	 */
-	if (unlikely(se->wait_runtime > limit)) {
-		se->wait_runtime = limit;
-		schedstat_inc(se, wait_runtime_overruns);
-		schedstat_inc(cfs_rq, wait_runtime_overruns);
-	}
-	if (unlikely(se->wait_runtime < -limit)) {
-		se->wait_runtime = -limit;
-		schedstat_inc(se, wait_runtime_underruns);
-		schedstat_inc(cfs_rq, wait_runtime_underruns);
-	}
-}
-
-static inline void
-__add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
-	se->wait_runtime += delta;
-	schedstat_add(se, sum_wait_runtime, delta);
-	limit_wait_runtime(cfs_rq, se);
-}
-
 static void
 add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
 {
-	schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
-	__add_wait_runtime(cfs_rq, se, delta);
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+	se->wait_runtime += delta;
+	schedstat_add(cfs_rq, wait_runtime, delta);
 }
 
 /*
@@ -294,7 +264,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	      unsigned long delta_exec)
 {
-	unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
+	unsigned long delta_fair, delta_mine, delta_exec_weighted;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
@@ -318,14 +288,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	delta_fair = calc_delta_fair(delta_exec, lw);
 	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
 
-	if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) {
-		delta = min((u64)delta_mine, cfs_rq->sleeper_bonus);
-		delta = min(delta, (unsigned long)(
-			(long)sysctl_sched_runtime_limit - curr->wait_runtime));
-		cfs_rq->sleeper_bonus -= delta;
-		delta_mine -= delta;
-	}
-
 	cfs_rq->fair_clock += delta_fair;
 	/*
 	 * We executed delta_exec amount of time on the CPU,
@@ -461,58 +423,8 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
-			      unsigned long delta_fair)
-{
-	unsigned long load = cfs_rq->load.weight;
-	long prev_runtime;
-
-	/*
-	 * Do not boost sleepers if there's too much bonus 'in flight'
-	 * already:
-	 */
-	if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
-		return;
-
-	if (sched_feat(SLEEPER_LOAD_AVG))
-		load = rq_of(cfs_rq)->cpu_load[2];
-
-	/*
-	 * Fix up delta_fair with the effect of us running
-	 * during the whole sleep period:
-	 */
-	if (sched_feat(SLEEPER_AVG))
-		delta_fair = div64_likely32((u64)delta_fair * load,
-						load + se->load.weight);
-
-	delta_fair = calc_weighted(delta_fair, se);
-
-	prev_runtime = se->wait_runtime;
-	__add_wait_runtime(cfs_rq, se, delta_fair);
-	delta_fair = se->wait_runtime - prev_runtime;
-
-	/*
-	 * Track the amount of bonus we've given to sleepers:
-	 */
-	cfs_rq->sleeper_bonus += delta_fair;
-}
-
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	struct task_struct *tsk = task_of(se);
-	unsigned long delta_fair;
-
-	if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) ||
-			 !sched_feat(FAIR_SLEEPERS))
-		return;
-
-	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
-		(u64)(cfs_rq->fair_clock - se->sleep_start_fair));
-
-	__enqueue_sleeper(cfs_rq, se, delta_fair);
-
-	se->sleep_start_fair = 0;
-
 #ifdef CONFIG_SCHEDSTATS
 	if (se->sleep_start) {
 		u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
@@ -544,6 +456,8 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 * time that the task spent sleeping:
 		 */
 		if (unlikely(prof_on == SLEEP_PROFILING)) {
+			struct task_struct *tsk = task_of(se);
+
 			profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
 				     delta >> 20);
 		}
@@ -604,7 +518,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
 	update_stats_dequeue(cfs_rq, se);
 	if (sleep) {
-		se->sleep_start_fair = cfs_rq->fair_clock;
 #ifdef CONFIG_SCHEDSTATS
 		if (entity_is_task(se)) {
 			struct task_struct *tsk = task_of(se);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9b1b0d4ff9..97b15c2740 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -264,17 +264,6 @@ static ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_runtime_limit_ns",
-		.data		= &sysctl_sched_runtime_limit,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_child_runs_first",