From fa85ae2418e6843953107cd6a06f645752829bc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:29 +0100 Subject: [PATCH] sched: rt time limit Very simple time limit on the realtime scheduling classes. Allow the rq's realtime class to consume sched_rt_ratio of every sched_rt_period slice. If the class exceeds this quota the fair class will preempt the realtime class. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/sched.c | 70 ++++++++++++++++++++++++++++++------------- kernel/sched_rt.c | 53 ++++++++++++++++++++++++++++++++ kernel/sysctl.c | 18 ++++++++++- 4 files changed, 122 insertions(+), 21 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 43e0339d65..d5ea144df8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1490,6 +1490,8 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_rt_period; +extern unsigned int sysctl_sched_rt_ratio; #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) extern unsigned int sysctl_sched_min_bal_int_shares; extern unsigned int sysctl_sched_max_bal_int_shares; diff --git a/kernel/sched.c b/kernel/sched.c index 17f93d3eda..e9a7beee9b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -342,13 +342,14 @@ struct cfs_rq { /* Real-Time classes' related field in a runqueue: */ struct rt_rq { struct rt_prio_array active; - int rt_load_balance_idx; - struct list_head *rt_load_balance_head, *rt_load_balance_curr; unsigned long rt_nr_running; +#ifdef CONFIG_SMP unsigned long rt_nr_migratory; - /* highest queued rt task prio */ - int highest_prio; + int highest_prio; /* highest queued rt task prio */ int overloaded; +#endif + u64 rt_time; + u64 rt_throttled; }; #ifdef CONFIG_SMP @@ -415,6 +416,7 @@ struct rq { struct list_head leaf_cfs_rq_list; #endif struct rt_rq rt; + u64 rt_period_expire; /* * This is part of a global counter where only the total sum @@ -600,6 +602,21 @@ const_debug unsigned int sysctl_sched_features = */ const_debug unsigned int sysctl_sched_nr_migrate = 32; +/* + * period over which we measure -rt task cpu usage in ms. + * default: 1s + */ +const_debug unsigned int sysctl_sched_rt_period = 1000; + +#define SCHED_RT_FRAC_SHIFT 16 +#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) + +/* + * ratio of time -rt tasks may consume. + * default: 100% + */ +const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC; + /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): @@ -3674,8 +3691,8 @@ void scheduler_tick(void) rq->clock = next_tick; rq->tick_timestamp = rq->clock; update_cpu_load(rq); - if (curr != rq->idle) /* FIXME: needed? */ - curr->sched_class->task_tick(rq, curr, 0); + curr->sched_class->task_tick(rq, curr, 0); + update_sched_rt_period(rq); spin_unlock(&rq->lock); #ifdef CONFIG_SMP @@ -7041,6 +7058,29 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) cfs_rq->min_vruntime = (u64)(-(1LL << 20)); } +static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) +{ + struct rt_prio_array *array; + int i; + + array = &rt_rq->active; + for (i = 0; i < MAX_RT_PRIO; i++) { + INIT_LIST_HEAD(array->queue + i); + __clear_bit(i, array->bitmap); + } + /* delimiter for bitsearch: */ + __set_bit(MAX_RT_PRIO, array->bitmap); + +#ifdef CONFIG_SMP + rt_rq->rt_nr_migratory = 0; + rt_rq->highest_prio = MAX_RT_PRIO; + rt_rq->overloaded = 0; +#endif + + rt_rq->rt_time = 0; + rt_rq->rt_throttled = 0; +} + void __init sched_init(void) { int highest_cpu = 0; @@ -7051,7 +7091,6 @@ void __init sched_init(void) #endif for_each_possible_cpu(i) { - struct rt_prio_array *array; struct rq *rq; rq = cpu_rq(i); @@ -7083,6 +7122,8 @@ void __init sched_init(void) } init_task_group.shares = init_task_group_load; #endif + init_rt_rq(&rq->rt, rq); + rq->rt_period_expire = 0; for (j = 0; j < CPU_LOAD_IDX_MAX; j++) rq->cpu_load[j] = 0; @@ -7095,22 +7136,11 @@ void __init sched_init(void) rq->cpu = i; rq->migration_thread = NULL; INIT_LIST_HEAD(&rq->migration_queue); - rq->rt.highest_prio = MAX_RT_PRIO; - rq->rt.overloaded = 0; rq_attach_root(rq, &def_root_domain); #endif init_rq_hrtick(rq); - atomic_set(&rq->nr_iowait, 0); - - array = &rq->rt.active; - for (j = 0; j < MAX_RT_PRIO; j++) { - INIT_LIST_HEAD(array->queue + j); - __clear_bit(j, array->bitmap); - } highest_cpu = i; - /* delimiter for bitsearch: */ - __set_bit(MAX_RT_PRIO, array->bitmap); } set_load_weight(&init_task); @@ -7282,7 +7312,7 @@ void set_curr_task(int cpu, struct task_struct *p) #ifdef CONFIG_SMP /* * distribute shares of all task groups among their schedulable entities, - * to reflect load distrbution across cpus. + * to reflect load distribution across cpus. */ static int rebalance_shares(struct sched_domain *sd, int this_cpu) { @@ -7349,7 +7379,7 @@ static int rebalance_shares(struct sched_domain *sd, int this_cpu) * sysctl_sched_max_bal_int_shares represents the maximum interval between * consecutive calls to rebalance_shares() in the same sched domain. * - * These settings allows for the appropriate tradeoff between accuracy of + * These settings allows for the appropriate trade-off between accuracy of * fairness and the associated overhead. * */ diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 83fbbcb801..fd10d965aa 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -45,6 +45,50 @@ static void update_rt_migration(struct rq *rq) } #endif /* CONFIG_SMP */ +static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq) +{ + u64 period, ratio; + + if (sysctl_sched_rt_ratio == SCHED_RT_FRAC) + return 0; + + if (rt_rq->rt_throttled) + return 1; + + period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; + ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; + + if (rt_rq->rt_time > ratio) { + rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time; + return 1; + } + + return 0; +} + +static void update_sched_rt_period(struct rq *rq) +{ + while (rq->clock > rq->rt_period_expire) { + u64 period, ratio; + + period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; + ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; + + rq->rt.rt_time -= min(rq->rt.rt_time, ratio); + rq->rt_period_expire += period; + } + + /* + * When the rt throttle is expired, let them rip. + * (XXX: use hrtick when available) + */ + if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) { + rq->rt.rt_throttled = 0; + if (!sched_rt_ratio_exceeded(rq, &rq->rt)) + resched_task(rq->curr); + } +} + /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -66,6 +110,11 @@ static void update_curr_rt(struct rq *rq) curr->se.sum_exec_runtime += delta_exec; curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); + + rq->rt.rt_time += delta_exec; + update_sched_rt_period(rq); + if (sched_rt_ratio_exceeded(rq, &rq->rt)) + resched_task(curr); } static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) @@ -208,8 +257,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) struct rt_prio_array *array = &rq->rt.active; struct task_struct *next; struct list_head *queue; + struct rt_rq *rt_rq = &rq->rt; int idx; + if (sched_rt_ratio_exceeded(rq, rt_rq)) + return NULL; + idx = sched_find_first_bit(array->bitmap); if (idx >= MAX_RT_PRIO) return NULL; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 96f31c1bc4..3afbd25f43 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -306,7 +306,23 @@ static struct ctl_table kern_table[] = { .procname = "sched_nr_migrate", .data = &sysctl_sched_nr_migrate, .maxlen = sizeof(unsigned int), - .mode = 644, + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_period_ms", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_ratio", + .data = &sysctl_sched_rt_ratio, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = &proc_dointvec, }, #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) -- 2.39.5