[POWERPC] Update PCI nodes in the 83xx/85xx boards device tree

[linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index ef6b6bb3e0b21084eb1aa43877a01f6bfe993784..93cf241cfbe9b62e4580446552873858dc8967a9 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -16,6 +16,12 @@
   *             by Davide Libenzi, preemptible kernel bits by Robert Love.
   *  2003-09-03 Interactivity tuning by Con Kolivas.
   *  2004-04-02 Scheduler domains code by Nick Piggin
+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
+ *              fair scheduling design by Con Kolivas.
+ *  2007-05-05  Load balancing (smp-nice) and other improvements
+ *              by Peter Williams
+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
   */
  
  #include <linux/mm.h>
@@ -295,7 +301,7 @@ struct rq {
         struct lock_class_key rq_lock_key;
  };
  
-static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  static DEFINE_MUTEX(sched_hotcpu_mutex);
  
  static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
@@ -373,6 +379,23 @@ static inline unsigned long long rq_clock(struct rq *rq)
  #define task_rq(p)             cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
  
+/*
+ * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+ * clock constructed from sched_clock():
+ */
+unsigned long long cpu_clock(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long long now;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rq->lock, flags);
+       now = rq_clock(rq);
+       spin_unlock_irqrestore(&rq->lock, flags);
+
+       return now;
+}
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
  /* Change a task's ->cfs_rq if it moves across CPUs */
  static inline void set_task_cfs_rq(struct task_struct *p)
@@ -730,7 +753,9 @@ static void update_curr_load(struct rq *rq, u64 now)
   *
   * The "10% effect" is relative and cumulative: from _any_ nice level,
   * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage.
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
   */
  static const int prio_to_weight[40] = {
  /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
@@ -740,15 +765,22 @@ static const int prio_to_weight[40] = {
  /*  10 */   110,    87,    70,    56,    45,    36,    29,    23,    18,    15,
  };
  
+/*
+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
  static const u32 prio_to_wmult[40] = {
-       48356,   60446,   75558,   94446,  118058,  147573,
-       184467,  230589,  288233,  360285,  450347,
-       562979,  703746,  879575, 1099582, 1374389,
-       717986, 2147483, 2684354, 3355443, 4194304,
-       244160, 6557201, 8196502, 10250518, 12782640,
-       16025997, 19976592, 24970740, 31350126, 39045157,
-       49367440, 61356675, 76695844, 95443717, 119304647,
-       148102320, 186737708, 238609294, 286331153,
+/* -20 */     48356,     60446,     75558,     94446,    118058,
+/* -15 */    147573,    184467,    230589,    288233,    360285,
+/* -10 */    450347,    562979,    703746,    879575,   1099582,
+/*  -5 */   1374389,   1717986,   2147483,   2684354,   3355443,
+/*   0 */   4194304,   5244160,   6557201,   8196502,  10250518,
+/*   5 */  12782640,  16025997,  19976592,  24970740,  31350126,
+/*  10 */  39045157,  49367440,  61356675,  76695844,  95443717,
+/*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
  };
  
  static inline void
@@ -2220,7 +2252,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  
                         rq = cpu_rq(i);
  
-                       if (*sd_idle && !idle_cpu(i))
+                       if (*sd_idle && rq->nr_running)
                                 *sd_idle = 0;
  
                         /* Bias balancing toward cpus of our domain */
@@ -2242,9 +2274,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                 /*
                  * First idle cpu or the first cpu(busiest) in this sched group
                  * is eligible for doing load balancing at this and above
-                * domains.
+                * domains. In the newly idle case, we will allow all the cpu's
+                * to do the newly idle load balance.
                  */
-               if (local_group && balance_cpu != this_cpu && balance) {
+               if (idle != CPU_NEWLY_IDLE && local_group &&
+                   balance_cpu != this_cpu && balance) {
                         *balance = 0;
                         goto ret;
                 }
@@ -2662,6 +2696,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
         unsigned long imbalance;
         int nr_moved = 0;
         int sd_idle = 0;
+       int all_pinned = 0;
         cpumask_t cpus = CPU_MASK_ALL;
  
         /*
@@ -2700,10 +2735,11 @@ redo:
                 double_lock_balance(this_rq, busiest);
                 nr_moved = move_tasks(this_rq, this_cpu, busiest,
                                         minus_1_or_zero(busiest->nr_running),
-                                       imbalance, sd, CPU_NEWLY_IDLE, NULL);
+                                       imbalance, sd, CPU_NEWLY_IDLE,
+                                       &all_pinned);
                 spin_unlock(&busiest->lock);
  
-               if (!nr_moved) {
+               if (unlikely(all_pinned)) {
                         cpu_clear(cpu_of(busiest), cpus);
                         if (!cpus_empty(cpus))
                                 goto redo;
@@ -3699,74 +3735,85 @@ out:
  }
  EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
  
-
-#define        SLEEP_ON_VAR                                    \
-       unsigned long flags;                            \
-       wait_queue_t wait;                              \
-       init_waitqueue_entry(&wait, current);
-
-#define SLEEP_ON_HEAD                                  \
-       spin_lock_irqsave(&q->lock,flags);              \
-       __add_wait_queue(q, &wait);                     \
+static inline void
+sleep_on_head(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags)
+{
+       spin_lock_irqsave(&q->lock, *flags);
+       __add_wait_queue(q, wait);
         spin_unlock(&q->lock);
+}
  
-#define        SLEEP_ON_TAIL                                   \
-       spin_lock_irq(&q->lock);                        \
-       __remove_wait_queue(q, &wait);                  \
-       spin_unlock_irqrestore(&q->lock, flags);
+static inline void
+sleep_on_tail(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags)
+{
+       spin_lock_irq(&q->lock);
+       __remove_wait_queue(q, wait);
+       spin_unlock_irqrestore(&q->lock, *flags);
+}
  
-void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
+void __sched interruptible_sleep_on(wait_queue_head_t *q)
  {
-       SLEEP_ON_VAR
+       unsigned long flags;
+       wait_queue_t wait;
+
+       init_waitqueue_entry(&wait, current);
  
         current->state = TASK_INTERRUPTIBLE;
  
-       SLEEP_ON_HEAD
+       sleep_on_head(q, &wait, &flags);
         schedule();
-       SLEEP_ON_TAIL
+       sleep_on_tail(q, &wait, &flags);
  }
  EXPORT_SYMBOL(interruptible_sleep_on);
  
-long fastcall __sched
+long __sched
  interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
  {
-       SLEEP_ON_VAR
+       unsigned long flags;
+       wait_queue_t wait;
+
+       init_waitqueue_entry(&wait, current);
  
         current->state = TASK_INTERRUPTIBLE;
  
-       SLEEP_ON_HEAD
+       sleep_on_head(q, &wait, &flags);
         timeout = schedule_timeout(timeout);
-       SLEEP_ON_TAIL
+       sleep_on_tail(q, &wait, &flags);
  
         return timeout;
  }
  EXPORT_SYMBOL(interruptible_sleep_on_timeout);
  
-void fastcall __sched sleep_on(wait_queue_head_t *q)
+void __sched sleep_on(wait_queue_head_t *q)
  {
-       SLEEP_ON_VAR
+       unsigned long flags;
+       wait_queue_t wait;
+
+       init_waitqueue_entry(&wait, current);
  
         current->state = TASK_UNINTERRUPTIBLE;
  
-       SLEEP_ON_HEAD
+       sleep_on_head(q, &wait, &flags);
         schedule();
-       SLEEP_ON_TAIL
+       sleep_on_tail(q, &wait, &flags);
  }
  EXPORT_SYMBOL(sleep_on);
  
-long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
+long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
  {
-       SLEEP_ON_VAR
+       unsigned long flags;
+       wait_queue_t wait;
+
+       init_waitqueue_entry(&wait, current);
  
         current->state = TASK_UNINTERRUPTIBLE;
  
-       SLEEP_ON_HEAD
+       sleep_on_head(q, &wait, &flags);
         timeout = schedule_timeout(timeout);
-       SLEEP_ON_TAIL
+       sleep_on_tail(q, &wait, &flags);
  
         return timeout;
  }
-
  EXPORT_SYMBOL(sleep_on_timeout);
  
  #ifdef CONFIG_RT_MUTEXES
@@ -4630,14 +4677,14 @@ static void show_task(struct task_struct *p)
         state = p->state ? __ffs(p->state) + 1 : 0;
         printk("%-13.13s %c", p->comm,
                 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
-#if (BITS_PER_LONG == 32)
+#if BITS_PER_LONG == 32
         if (state == TASK_RUNNING)
-               printk(" running ");
+               printk(" running  ");
         else
-               printk(" %08lX ", thread_saved_pc(p));
+               printk(" %08lx ", thread_saved_pc(p));
  #else
         if (state == TASK_RUNNING)
-               printk("  running task   ");
+               printk("  running task    ");
         else
                 printk(" %016lx ", thread_saved_pc(p));
  #endif
@@ -4649,11 +4696,7 @@ static void show_task(struct task_struct *p)
                 free = (unsigned long)n - (unsigned long)end_of_stack(p);
         }
  #endif
-       printk("%5lu %5d %6d", free, p->pid, p->parent->pid);
-       if (!p->mm)
-               printk(" (L-TLB)\n");
-       else
-               printk(" (NOTLB)\n");
+       printk("%5lu %5d %6d\n", free, p->pid, p->parent->pid);
  
         if (state != TASK_RUNNING)
                 show_stack(p, NULL);
@@ -4663,14 +4706,12 @@ void show_state_filter(unsigned long state_filter)
  {
         struct task_struct *g, *p;
  
-#if (BITS_PER_LONG == 32)
-       printk("\n"
-              "                         free                        sibling\n");
-       printk("  task             PC    stack   pid father child younger older\n");
+#if BITS_PER_LONG == 32
+       printk(KERN_INFO
+               "  task                PC stack   pid father\n");
  #else
-       printk("\n"
-              "                                 free                        sibling\n");
-       printk("  task                 PC        stack   pid father child younger older\n");
+       printk(KERN_INFO
+               "  task                        PC stack   pid father\n");
  #endif
         read_lock(&tasklist_lock);
         do_each_thread(g, p) {
@@ -4761,7 +4802,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  static inline void sched_init_granularity(void)
  {
         unsigned int factor = 1 + ilog2(num_online_cpus());
-       const unsigned long gran_limit = 10000000;
+       const unsigned long gran_limit = 100000000;
  
         sysctl_sched_granularity *= factor;
         if (sysctl_sched_granularity > gran_limit)
@@ -4892,8 +4933,6 @@ static int migration_thread(void *data)
                 struct migration_req *req;
                 struct list_head *head;
  
-               try_to_freeze();
-
                 spin_lock_irq(&rq->lock);
  
                 if (cpu_is_offline(cpu)) {
@@ -5127,7 +5166,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
                 if (IS_ERR(p))
                         return NOTIFY_BAD;
-               p->flags |= PF_NOFREEZE;
                 kthread_bind(p, cpu);
                 /* Must be high prio: stop_machine expects to yield to it. */
                 rq = task_rq_lock(p, &flags);