task_t *migration_thread;
struct list_head migration_queue;
- int cpu;
#endif
#ifdef CONFIG_SCHEDSTATS
return prio;
}
-/*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired, and switch periodically
- * regardless, to ensure that highly interactive tasks do not starve
- * the less fortunate for unreasonably long periods.
- */
-static inline int expired_starving(runqueue_t *rq)
-{
- int limit;
-
- /*
- * Arrays were recently switched, all is well
- */
- if (!rq->expired_timestamp)
- return 0;
-
- limit = STARVATION_LIMIT * rq->nr_running;
-
- /*
- * It's time to switch arrays
- */
- if (jiffies - rq->expired_timestamp >= limit)
- return 1;
-
- /*
- * There's a better selection in the expired array
- */
- if (rq->curr->static_prio > rq->best_expired_prio)
- return 1;
-
- /*
- * All is well
- */
- return 0;
-}
-
/*
* __activate_task - move a task to the runqueue.
*/
{
prio_array_t *target = rq->active;
- if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
+ if (batch_task(p))
target = rq->expired;
enqueue_task(p, target);
rq->nr_running++;
* the target CPU.
*/
#ifdef CONFIG_SMP
+
+#ifndef tsk_is_polling
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+#endif
+
static void resched_task(task_t *p)
{
int cpu;
if (cpu == smp_processor_id())
return;
- /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
+ /* NEED_RESCHED must be visible before we test polling */
smp_mb();
- if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
+ if (!tsk_is_polling(p))
smp_send_reschedule(cpu);
}
#else
struct task_struct *t = current;
struct sched_domain *tmp, *sd = NULL;
- for_each_domain(cpu, tmp)
+ for_each_domain(cpu, tmp) {
if (tmp->flags & flag)
sd = tmp;
+ }
while (sd) {
cpumask_t span;
/*
* double_rq_lock - safely lock two runqueues
*
- * We must take them in cpu order to match code in
- * dependent_sleeper and wake_dependent_sleeper.
- *
* Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling.
*/
spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */
} else {
- if (rq1->cpu < rq2->cpu) {
+ if (rq1 < rq2) {
spin_lock(&rq1->lock);
spin_lock(&rq2->lock);
} else {
__acquires(this_rq->lock)
{
if (unlikely(!spin_trylock(&busiest->lock))) {
- if (busiest->cpu < this_rq->cpu) {
+ if (busiest < this_rq) {
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
double_lock_balance(busiest_rq, target_rq);
/* Search for an sd spanning us and the target CPU. */
- for_each_domain(target_cpu, sd)
+ for_each_domain(target_cpu, sd) {
if ((sd->flags & SD_LOAD_BALANCE) &&
cpu_isset(busiest_cpu, sd->span))
break;
+ }
if (unlikely(sd == NULL))
goto out;
return ns;
}
+/*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired:
+ */
+#define EXPIRED_STARVING(rq) \
+ ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
+ (jiffies - (rq)->expired_timestamp >= \
+ STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ ((rq)->curr->static_prio > (rq)->best_expired_prio))
+
/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
if (!rq->expired_timestamp)
rq->expired_timestamp = jiffies;
- if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
enqueue_task(p, rq->expired);
if (p->static_prio < rq->best_expired_prio)
rq->best_expired_prio = p->static_prio;
resched_task(rq->idle);
}
-static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+/*
+ * Called with interrupt disabled and this_rq's runqueue locked.
+ */
+static void wake_sleeping_dependent(int this_cpu)
{
struct sched_domain *tmp, *sd = NULL;
- cpumask_t sibling_map;
int i;
- for_each_domain(this_cpu, tmp)
- if (tmp->flags & SD_SHARE_CPUPOWER)
+ for_each_domain(this_cpu, tmp) {
+ if (tmp->flags & SD_SHARE_CPUPOWER) {
sd = tmp;
+ break;
+ }
+ }
if (!sd)
return;
- /*
- * Unlock the current runqueue because we have to lock in
- * CPU order to avoid deadlocks. Caller knows that we might
- * unlock. We keep IRQs disabled.
- */
- spin_unlock(&this_rq->lock);
-
- sibling_map = sd->span;
-
- for_each_cpu_mask(i, sibling_map)
- spin_lock(&cpu_rq(i)->lock);
- /*
- * We clear this CPU from the mask. This both simplifies the
- * inner loop and keps this_rq locked when we exit:
- */
- cpu_clear(this_cpu, sibling_map);
-
- for_each_cpu_mask(i, sibling_map) {
+ for_each_cpu_mask(i, sd->span) {
runqueue_t *smt_rq = cpu_rq(i);
+ if (i == this_cpu)
+ continue;
+ if (unlikely(!spin_trylock(&smt_rq->lock)))
+ continue;
+
wakeup_busy_runqueue(smt_rq);
+ spin_unlock(&smt_rq->lock);
}
-
- for_each_cpu_mask(i, sibling_map)
- spin_unlock(&cpu_rq(i)->lock);
- /*
- * We exit with this_cpu's rq still held and IRQs
- * still disabled:
- */
}
/*
return p->time_slice * (100 - sd->per_cpu_gain) / 100;
}
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+/*
+ * To minimise lock contention and not have to drop this_rq's runlock we only
+ * trylock the sibling runqueues and bypass those runqueues if we fail to
+ * acquire their lock. As we only trylock the normal locking order does not
+ * need to be obeyed.
+ */
+static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
{
struct sched_domain *tmp, *sd = NULL;
- cpumask_t sibling_map;
- prio_array_t *array;
int ret = 0, i;
- task_t *p;
- for_each_domain(this_cpu, tmp)
- if (tmp->flags & SD_SHARE_CPUPOWER)
+ /* kernel/rt threads do not participate in dependent sleeping */
+ if (!p->mm || rt_task(p))
+ return 0;
+
+ for_each_domain(this_cpu, tmp) {
+ if (tmp->flags & SD_SHARE_CPUPOWER) {
sd = tmp;
+ break;
+ }
+ }
if (!sd)
return 0;
- /*
- * The same locking rules and details apply as for
- * wake_sleeping_dependent():
- */
- spin_unlock(&this_rq->lock);
- sibling_map = sd->span;
- for_each_cpu_mask(i, sibling_map)
- spin_lock(&cpu_rq(i)->lock);
- cpu_clear(this_cpu, sibling_map);
+ for_each_cpu_mask(i, sd->span) {
+ runqueue_t *smt_rq;
+ task_t *smt_curr;
- /*
- * Establish next task to be run - it might have gone away because
- * we released the runqueue lock above:
- */
- if (!this_rq->nr_running)
- goto out_unlock;
- array = this_rq->active;
- if (!array->nr_active)
- array = this_rq->expired;
- BUG_ON(!array->nr_active);
+ if (i == this_cpu)
+ continue;
- p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next,
- task_t, run_list);
+ smt_rq = cpu_rq(i);
+ if (unlikely(!spin_trylock(&smt_rq->lock)))
+ continue;
- for_each_cpu_mask(i, sibling_map) {
- runqueue_t *smt_rq = cpu_rq(i);
- task_t *smt_curr = smt_rq->curr;
+ smt_curr = smt_rq->curr;
- /* Kernel threads do not participate in dependent sleeping */
- if (!p->mm || !smt_curr->mm || rt_task(p))
- goto check_smt_task;
+ if (!smt_curr->mm)
+ goto unlock;
/*
* If a user task with lower static priority than the
if ((jiffies % DEF_TIMESLICE) >
(sd->per_cpu_gain * DEF_TIMESLICE / 100))
ret = 1;
- } else
+ } else {
if (smt_curr->static_prio < p->static_prio &&
!TASK_PREEMPTS_CURR(p, smt_rq) &&
smt_slice(smt_curr, sd) > task_timeslice(p))
ret = 1;
-
-check_smt_task:
- if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
- rt_task(smt_curr))
- continue;
- if (!p->mm) {
- wakeup_busy_runqueue(smt_rq);
- continue;
- }
-
- /*
- * Reschedule a lower priority task on the SMT sibling for
- * it to be put to sleep, or wake it up if it has been put to
- * sleep for priority reasons to see if it should run now.
- */
- if (rt_task(p)) {
- if ((jiffies % DEF_TIMESLICE) >
- (sd->per_cpu_gain * DEF_TIMESLICE / 100))
- resched_task(smt_curr);
- } else {
- if (TASK_PREEMPTS_CURR(p, smt_rq) &&
- smt_slice(p, sd) > task_timeslice(smt_curr))
- resched_task(smt_curr);
- else
- wakeup_busy_runqueue(smt_rq);
}
+unlock:
+ spin_unlock(&smt_rq->lock);
}
-out_unlock:
- for_each_cpu_mask(i, sibling_map)
- spin_unlock(&cpu_rq(i)->lock);
return ret;
}
#else
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static inline void wake_sleeping_dependent(int this_cpu)
{
}
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
+ task_t *p)
{
return 0;
}
cpu = smp_processor_id();
if (unlikely(!rq->nr_running)) {
-go_idle:
idle_balance(cpu, rq);
if (!rq->nr_running) {
next = rq->idle;
rq->expired_timestamp = 0;
- wake_sleeping_dependent(cpu, rq);
- /*
- * wake_sleeping_dependent() might have released
- * the runqueue, so break out if we got new
- * tasks meanwhile:
- */
- if (!rq->nr_running)
- goto switch_tasks;
- }
- } else {
- if (dependent_sleeper(cpu, rq)) {
- next = rq->idle;
+ wake_sleeping_dependent(cpu);
goto switch_tasks;
}
- /*
- * dependent_sleeper() releases and reacquires the runqueue
- * lock, hence go into the idle loop if the rq went
- * empty meanwhile:
- */
- if (unlikely(!rq->nr_running))
- goto go_idle;
}
array = rq->active;
}
}
next->sleep_type = SLEEP_NORMAL;
+ if (dependent_sleeper(cpu, rq, next))
+ next = rq->idle;
switch_tasks:
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);
!capable(CAP_SYS_NICE))
goto out_unlock;
+ retval = security_task_setscheduler(p, 0, NULL);
+ if (retval)
+ goto out_unlock;
+
cpus_allowed = cpuset_cpus_allowed(p);
cpus_and(new_mask, new_mask, cpus_allowed);
retval = set_cpus_allowed(p, new_mask);
if (!p)
goto out_unlock;
- retval = 0;
+ retval = security_task_getscheduler(p);
+ if (retval)
+ goto out_unlock;
+
cpus_and(*mask, p->cpus_allowed, cpu_online_map);
out_unlock:
static inline void __cond_resched(void)
{
+#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+ __might_sleep(__FILE__, __LINE__);
+#endif
/*
* The BKS might be reacquired before we have dropped
* PREEMPT_ACTIVE, which could trigger a second
*/
void __sched io_schedule(void)
{
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ struct runqueue *rq = &__raw_get_cpu_var(runqueues);
atomic_inc(&rq->nr_iowait);
schedule();
long __sched io_schedule_timeout(long timeout)
{
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ struct runqueue *rq = &__raw_get_cpu_var(runqueues);
long ret;
atomic_inc(&rq->nr_iowait);
if (retval)
goto out_unlock;
- jiffies_to_timespec(p->policy & SCHED_FIFO ?
+ jiffies_to_timespec(p->policy == SCHED_FIFO ?
0 : task_timeslice(p), &t);
read_unlock(&tasklist_lock);
retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
* migration_call - callback that gets triggered when a CPU is added.
* Here we can start up the necessary migration thread for the new CPU.
*/
-static int migration_call(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static int __cpuinit migration_call(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
{
int cpu = (long)hcpu;
struct task_struct *p;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
+ if (!cpu_rq(cpu)->migration_thread)
+ break;
/* Unbind it from offline cpu so it can run. Fall thru. */
kthread_bind(cpu_rq(cpu)->migration_thread,
any_online_cpu(cpu_online_map));
/* Register at highest priority so that task migration (migrate_all_tasks)
* happens before everything else.
*/
-static struct notifier_block migration_notifier = {
+static struct notifier_block __cpuinitdata migration_notifier = {
.notifier_call = migration_call,
.priority = 10
};
rq->push_cpu = 0;
rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue);
- rq->cpu = i;
#endif
atomic_set(&rq->nr_iowait, 0);
runqueue_t *rq;
read_lock_irq(&tasklist_lock);
- for_each_process (p) {
+ for_each_process(p) {
if (!rt_task(p))
continue;