-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-/*
- * distribute shares of all task groups among their schedulable entities,
- * to reflect load distribution across cpus.
- */
-static int rebalance_shares(struct sched_domain *sd, int this_cpu)
-{
- struct cfs_rq *cfs_rq;
- struct rq *rq = cpu_rq(this_cpu);
- cpumask_t sdspan = sd->span;
- int balanced = 1;
-
- /* Walk thr' all the task groups that we have */
- for_each_leaf_cfs_rq(rq, cfs_rq) {
- int i;
- unsigned long total_load = 0, total_shares;
- struct task_group *tg = cfs_rq->tg;
-
- /* Gather total task load of this group across cpus */
- for_each_cpu_mask(i, sdspan)
- total_load += tg->cfs_rq[i]->load.weight;
-
- /* Nothing to do if this group has no load */
- if (!total_load)
- continue;
-
- /*
- * tg->shares represents the number of cpu shares the task group
- * is eligible to hold on a single cpu. On N cpus, it is
- * eligible to hold (N * tg->shares) number of cpu shares.
- */
- total_shares = tg->shares * cpus_weight(sdspan);
-
- /*
- * redistribute total_shares across cpus as per the task load
- * distribution.
- */
- for_each_cpu_mask(i, sdspan) {
- unsigned long local_load, local_shares;
-
- local_load = tg->cfs_rq[i]->load.weight;
- local_shares = (local_load * total_shares) / total_load;
- if (!local_shares)
- local_shares = MIN_GROUP_SHARES;
- if (local_shares == tg->se[i]->load.weight)
- continue;
-
- spin_lock_irq(&cpu_rq(i)->lock);
- set_se_shares(tg->se[i], local_shares);
- spin_unlock_irq(&cpu_rq(i)->lock);
- balanced = 0;
- }
- }
-
- return balanced;
-}
-
-/*
- * How frequently should we rebalance_shares() across cpus?
- *
- * The more frequently we rebalance shares, the more accurate is the fairness
- * of cpu bandwidth distribution between task groups. However higher frequency
- * also implies increased scheduling overhead.
- *
- * sysctl_sched_min_bal_int_shares represents the minimum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * sysctl_sched_max_bal_int_shares represents the maximum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * These settings allows for the appropriate trade-off between accuracy of
- * fairness and the associated overhead.
- *
- */
-
-/* default: 8ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
-
-/* default: 128ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
-
-/* kernel thread that runs rebalance_shares() periodically */
-static int load_balance_monitor(void *unused)
-{
- unsigned int timeout = sysctl_sched_min_bal_int_shares;
- struct sched_param schedparm;
- int ret;
-
- /*
- * We don't want this thread's execution to be limited by the shares
- * assigned to default group (init_task_group). Hence make it run
- * as a SCHED_RR RT task at the lowest priority.
- */
- schedparm.sched_priority = 1;
- ret = sched_setscheduler(current, SCHED_RR, &schedparm);
- if (ret)
- printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
- " monitor thread (error = %d) \n", ret);
-
- while (!kthread_should_stop()) {
- int i, cpu, balanced = 1;
-
- /* Prevent cpus going down or coming up */
- get_online_cpus();
- /* lockout changes to doms_cur[] array */
- lock_doms_cur();
- /*
- * Enter a rcu read-side critical section to safely walk rq->sd
- * chain on various cpus and to walk task group list
- * (rq->leaf_cfs_rq_list) in rebalance_shares().
- */
- rcu_read_lock();
-
- for (i = 0; i < ndoms_cur; i++) {
- cpumask_t cpumap = doms_cur[i];
- struct sched_domain *sd = NULL, *sd_prev = NULL;
-
- cpu = first_cpu(cpumap);
-
- /* Find the highest domain at which to balance shares */
- for_each_domain(cpu, sd) {
- if (!(sd->flags & SD_LOAD_BALANCE))
- continue;
- sd_prev = sd;
- }
-
- sd = sd_prev;
- /* sd == NULL? No load balance reqd in this domain */
- if (!sd)
- continue;
-
- balanced &= rebalance_shares(sd, cpu);
- }
-
- rcu_read_unlock();
-
- unlock_doms_cur();
- put_online_cpus();
-
- if (!balanced)
- timeout = sysctl_sched_min_bal_int_shares;
- else if (timeout < sysctl_sched_max_bal_int_shares)
- timeout *= 2;
-
- msleep_interruptible(timeout);
- }
-
- return 0;
-}
-#endif /* CONFIG_SMP */
-