X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fsched_fair.c;h=01859f662ab7c834826f9197e93ebe9d44ad548a;hb=8295b6d9e623879344ed0ca7565336e4fd698e42;hp=410b77aea216f959fb93f123df72b53a0ff1c288;hpb=8ca0e14ffb12c257de591571a9e96102acdb1c64;p=linux-2.6 diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 410b77aea2..01859f662a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -25,14 +25,12 @@ * (default: 20ms, units: nanoseconds) * * NOTE: this latency value is not the same as the concept of - * 'timeslice length' - timeslices in CFS are of variable length. - * (to see the precise effective timeslice length of your workload, - * run vmstat and monitor the context-switches field) + * 'timeslice length' - timeslices in CFS are of variable length + * and have no persistent notion like in traditional, time-slice + * based scheduling concepts. * - * On SMP systems the value of this is multiplied by the log2 of the - * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way - * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) - * Targeted preemption latency for CPU-bound tasks: + * (to see the precise effective timeslice length of your workload, + * run vmstat and monitor the context-switches (cs) field) */ const_debug unsigned int sysctl_sched_latency = 20000000ULL; @@ -76,6 +74,8 @@ const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; */ const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL; +const_debug unsigned int sysctl_sched_migration_cost = 500000UL; + /************************************************************** * CFS operations on generic schedulable entities: */ @@ -381,15 +381,6 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) se->exec_start = rq_of(cfs_rq)->clock; } -/* - * We are descheduling a task - update its stats: - */ -static inline void -update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - se->exec_start = 0; -} - /************************************************** * Scheduling class queueing methods: */ @@ -485,9 +476,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) vruntime += sched_vslice_add(cfs_rq, se); if (!initial) { - struct task_struct *p = container_of(se, struct task_struct, se); - - if (sched_feat(NEW_FAIR_SLEEPERS) && p->policy != SCHED_BATCH) + if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se) && + task_of(se)->policy != SCHED_BATCH) vruntime -= sysctl_sched_latency; vruntime = max_t(s64, vruntime, se->vruntime); @@ -527,6 +517,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) update_stats_dequeue(cfs_rq, se); if (sleep) { + se->peer_preempt = 0; #ifdef CONFIG_SCHEDSTATS if (entity_is_task(se)) { struct task_struct *tsk = task_of(se); @@ -554,8 +545,10 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - if (delta_exec > ideal_runtime) + if (delta_exec > ideal_runtime || + (sched_feat(PREEMPT_RESTRICT) && curr->peer_preempt)) resched_task(rq_of(cfs_rq)->curr); + curr->peer_preempt = 0; } static void @@ -609,8 +602,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) if (prev->on_rq) update_curr(cfs_rq); - update_stats_curr_end(cfs_rq, prev); - check_spread(cfs_rq, prev); if (prev->on_rq) { update_stats_wait_start(cfs_rq, prev); @@ -627,7 +618,7 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) */ update_curr(cfs_rq); - if (cfs_rq->nr_running > 1) + if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) check_preempt_tick(cfs_rq, curr); } @@ -828,19 +819,31 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) resched_task(curr); return; } + /* + * Batch tasks do not preempt (their preemption is driven by + * the tick): + */ + if (unlikely(p->policy == SCHED_BATCH)) + return; - while (!is_same_group(se, pse)) { - se = parent_entity(se); - pse = parent_entity(pse); - } + if (sched_feat(WAKEUP_PREEMPT)) { + while (!is_same_group(se, pse)) { + se = parent_entity(se); + pse = parent_entity(pse); + } - delta = se->vruntime - pse->vruntime; - gran = sysctl_sched_wakeup_granularity; - if (unlikely(se->load.weight != NICE_0_LOAD)) - gran = calc_delta_fair(gran, &se->load); + delta = se->vruntime - pse->vruntime; + gran = sysctl_sched_wakeup_granularity; + if (unlikely(se->load.weight != NICE_0_LOAD)) + gran = calc_delta_fair(gran, &se->load); - if (delta > gran) - resched_task(curr); + if (delta > gran) { + int now = !sched_feat(PREEMPT_RESTRICT); + + if (now || p->prio < curr->prio || !se->peer_preempt++) + resched_task(curr); + } + } } static struct task_struct *pick_next_task_fair(struct rq *rq) @@ -873,6 +876,7 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) } } +#ifdef CONFIG_SMP /************************************************** * Fair scheduling class load-balancing methods: */ @@ -933,12 +937,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio) { struct cfs_rq *busy_cfs_rq; - unsigned long load_moved, total_nr_moved = 0, nr_moved; long rem_load_move = max_load_move; struct rq_iterator cfs_rq_iterator; @@ -966,25 +969,48 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, #else # define maxload rem_load_move #endif - /* pass busy_cfs_rq argument into + /* + * pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; - nr_moved = balance_tasks(this_rq, this_cpu, busiest, - max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, &cfs_rq_iterator); - - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; + rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, + maxload, sd, idle, all_pinned, + this_best_prio, + &cfs_rq_iterator); - if (max_nr_move <= 0 || rem_load_move <= 0) + if (rem_load_move <= 0) break; } return max_load_move - rem_load_move; } +static int +move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct cfs_rq *busy_cfs_rq; + struct rq_iterator cfs_rq_iterator; + + cfs_rq_iterator.start = load_balance_start_fair; + cfs_rq_iterator.next = load_balance_next_fair; + + for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { + /* + * pass busy_cfs_rq argument into + * load_balance_[start|next]_fair iterators + */ + cfs_rq_iterator.arg = busy_cfs_rq; + if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, + &cfs_rq_iterator)) + return 1; + } + + return 0; +} +#endif + /* * scheduler tick hitting a task of our scheduling class: */ @@ -999,7 +1025,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) } } -#define swap(a,b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0) +#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0) /* * Share the fairness runtime between parent and child, thus the @@ -1028,11 +1054,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) swap(curr->vruntime, se->vruntime); } - update_stats_enqueue(cfs_rq, se); - check_spread(cfs_rq, se); - check_spread(cfs_rq, curr); - __enqueue_entity(cfs_rq, se); - account_entity_enqueue(cfs_rq, se); + se->peer_preempt = 0; + enqueue_task_fair(rq, p, 0); resched_task(rq->curr); } @@ -1063,7 +1086,10 @@ static const struct sched_class fair_sched_class = { .pick_next_task = pick_next_task_fair, .put_prev_task = put_prev_task_fair, +#ifdef CONFIG_SMP .load_balance = load_balance_fair, + .move_one_task = move_one_task_fair, +#endif .set_curr_task = set_curr_task_fair, .task_tick = task_tick_fair,