[PATCH] Fix function/macro name collision on i386 oprofile

[linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 6e452eb95ac3f068bd12d9d5f5125000d1be4a2b..5f889d0cbfcc2e614aef87963ec0bea5ea6a5f21 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -262,7 +262,7 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
  
  /*
   * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
- * See update_sched_domains: synchronize_kernel for details.
+ * See detach_destroy_domains: synchronize_sched for details.
   *
   * The domain tree of any CPU may only be accessed from within
   * preempt-disabled sections.
@@ -673,7 +673,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
         rq->nr_running++;
  }
  
-static void recalc_task_prio(task_t *p, unsigned long long now)
+static int recalc_task_prio(task_t *p, unsigned long long now)
  {
         /* Caller must always ensure 'now >= p->timestamp' */
         unsigned long long __sleep_time = now - p->timestamp;
@@ -732,7 +732,7 @@ static void recalc_task_prio(task_t *p, unsigned long long now)
                 }
         }
  
-       p->prio = effective_prio(p);
+       return effective_prio(p);
  }
  
  /*
@@ -755,7 +755,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
         }
  #endif
  
-       recalc_task_prio(p, now);
+       p->prio = recalc_task_prio(p, now);
  
         /*
          * This checks to make sure it's not an uninterruptible task
@@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void)
         struct list_head *queue;
         unsigned long long now;
         unsigned long run_time;
-       int cpu, idx;
+       int cpu, idx, new_prio;
  
         /*
          * Test if we are atomic.  Since do_exit() needs to call into
@@ -2873,9 +2873,14 @@ go_idle:
                         delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
  
                 array = next->array;
-               dequeue_task(next, array);
-               recalc_task_prio(next, next->timestamp + delta);
-               enqueue_task(next, array);
+               new_prio = recalc_task_prio(next, next->timestamp + delta);
+
+               if (unlikely(next->prio != new_prio)) {
+                       dequeue_task(next, array);
+                       next->prio = new_prio;
+                       enqueue_task(next, array);
+               } else
+                       requeue_task(next, array);
         }
         next->activated = 0;
  switch_tasks:
@@ -3373,8 +3378,8 @@ EXPORT_SYMBOL(set_user_nice);
   */
  int can_nice(const task_t *p, const int nice)
  {
-       /* convert nice value [19,-20] to rlimit style value [0,39] */
-       int nice_rlim = 19 - nice;
+       /* convert nice value [19,-20] to rlimit style value [1,40] */
+       int nice_rlim = 20 - nice;
         return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
                 capable(CAP_SYS_NICE));
  }
@@ -3443,15 +3448,7 @@ int task_nice(const task_t *p)
  {
         return TASK_NICE(p);
  }
-
-/*
- * The only users of task_nice are binfmt_elf and binfmt_elf32.
- * binfmt_elf is no longer modular, but binfmt_elf32 still is.
- * Therefore, task_nice is needed if there is a compat_mode.
- */
-#ifdef CONFIG_COMPAT
  EXPORT_SYMBOL_GPL(task_nice);
-#endif
  
  /**
   * idle_cpu - is a given cpu idle currently?
@@ -3489,7 +3486,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
         p->policy = policy;
         p->rt_priority = prio;
         if (policy != SCHED_NORMAL)
-               p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
+               p->prio = MAX_RT_PRIO-1 - p->rt_priority;
         else
                 p->prio = p->static_prio;
  }
@@ -3521,18 +3518,31 @@ recheck:
          * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
          */
         if (param->sched_priority < 0 ||
-           param->sched_priority > MAX_USER_RT_PRIO-1)
+           (p->mm &&  param->sched_priority > MAX_USER_RT_PRIO-1) ||
+           (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
                 return -EINVAL;
         if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
                 return -EINVAL;
  
-       if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
-           param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur &&
-           !capable(CAP_SYS_NICE))
-               return -EPERM;
-       if ((current->euid != p->euid) && (current->euid != p->uid) &&
-           !capable(CAP_SYS_NICE))
-               return -EPERM;
+       /*
+        * Allow unprivileged RT tasks to decrease priority:
+        */
+       if (!capable(CAP_SYS_NICE)) {
+               /* can't change policy */
+               if (policy != p->policy &&
+                       !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
+                       return -EPERM;
+               /* can't increase priority */
+               if (policy != SCHED_NORMAL &&
+                   param->sched_priority > p->rt_priority &&
+                   param->sched_priority >
+                               p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
+                       return -EPERM;
+               /* can't change other user's priorities */
+               if ((current->euid != p->euid) &&
+                   (current->euid != p->uid))
+                       return -EPERM;
+       }
  
         retval = security_task_setscheduler(p, policy, param);
         if (retval)
@@ -3869,6 +3879,13 @@ asmlinkage long sys_sched_yield(void)
  
  static inline void __cond_resched(void)
  {
+       /*
+        * The BKS might be reacquired before we have dropped
+        * PREEMPT_ACTIVE, which could trigger a second
+        * cond_resched() call.
+        */
+       if (unlikely(preempt_count()))
+               return;
         do {
                 add_preempt_count(PREEMPT_ACTIVE);
                 schedule();
@@ -4158,6 +4175,14 @@ void show_state(void)
         read_unlock(&tasklist_lock);
  }
  
+/**
+ * init_idle - set up an idle thread for a given CPU
+ * @idle: task in question
+ * @cpu: cpu the idle task belongs to
+ *
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
  void __devinit init_idle(task_t *idle, int cpu)
  {
         runqueue_t *rq = cpu_rq(cpu);
@@ -4175,7 +4200,6 @@ void __devinit init_idle(task_t *idle, int cpu)
  #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
         idle->oncpu = 1;
  #endif
-       set_tsk_need_resched(idle);
         spin_unlock_irqrestore(&rq->lock, flags);
  
         /* Set the preempt count _outside_ the spinlocks! */
@@ -4319,8 +4343,7 @@ static int migration_thread(void * data)
                 struct list_head *head;
                 migration_req_t *req;
  
-               if (current->flags & PF_FREEZE)
-                       refrigerator(PF_FREEZE);
+               try_to_freeze();
  
                 spin_lock_irq(&rq->lock);
  
@@ -4608,7 +4631,7 @@ int __init migration_init(void)
  #endif
  
  #ifdef CONFIG_SMP
-#define SCHED_DOMAIN_DEBUG
+#undef SCHED_DOMAIN_DEBUG
  #ifdef SCHED_DOMAIN_DEBUG
  static void sched_domain_debug(struct sched_domain *sd, int cpu)
  {
@@ -4701,7 +4724,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
  #define sched_domain_debug(sd, cpu) {}
  #endif
  
-static int __devinit sd_degenerate(struct sched_domain *sd)
+static int sd_degenerate(struct sched_domain *sd)
  {
         if (cpus_weight(sd->span) == 1)
                 return 1;
@@ -4724,7 +4747,7 @@ static int __devinit sd_degenerate(struct sched_domain *sd)
         return 1;
  }
  
-static int __devinit sd_parent_degenerate(struct sched_domain *sd,
+static int sd_parent_degenerate(struct sched_domain *sd,
                                                 struct sched_domain *parent)
  {
         unsigned long cflags = sd->flags, pflags = parent->flags;
@@ -4756,7 +4779,7 @@ static int __devinit sd_parent_degenerate(struct sched_domain *sd,
   * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
   * hold the hotplug lock.
   */
-void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu)
+void cpu_attach_domain(struct sched_domain *sd, int cpu)
  {
         runqueue_t *rq = cpu_rq(cpu);
         struct sched_domain *tmp;
@@ -4807,7 +4830,7 @@ __setup ("isolcpus=", isolated_cpu_setup);
   * covered by the given span, and will set each group's ->cpumask correctly,
   * and ->cpu_power to 0.
   */
-void __devinit init_sched_build_groups(struct sched_group groups[],
+void init_sched_build_groups(struct sched_group groups[],
                         cpumask_t span, int (*group_fn)(int cpu))
  {
         struct sched_group *first = NULL, *last = NULL;
@@ -4843,13 +4866,14 @@ void __devinit init_sched_build_groups(struct sched_group groups[],
  
  
  #ifdef ARCH_HAS_SCHED_DOMAIN
-extern void __devinit arch_init_sched_domains(void);
-extern void __devinit arch_destroy_sched_domains(void);
+extern void build_sched_domains(const cpumask_t *cpu_map);
+extern void arch_init_sched_domains(const cpumask_t *cpu_map);
+extern void arch_destroy_sched_domains(const cpumask_t *cpu_map);
  #else
  #ifdef CONFIG_SCHED_SMT
  static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  static struct sched_group sched_group_cpus[NR_CPUS];
-static int __devinit cpu_to_cpu_group(int cpu)
+static int cpu_to_cpu_group(int cpu)
  {
         return cpu;
  }
@@ -4857,7 +4881,7 @@ static int __devinit cpu_to_cpu_group(int cpu)
  
  static DEFINE_PER_CPU(struct sched_domain, phys_domains);
  static struct sched_group sched_group_phys[NR_CPUS];
-static int __devinit cpu_to_phys_group(int cpu)
+static int cpu_to_phys_group(int cpu)
  {
  #ifdef CONFIG_SCHED_SMT
         return first_cpu(cpu_sibling_map[cpu]);
@@ -4870,7 +4894,7 @@ static int __devinit cpu_to_phys_group(int cpu)
  
  static DEFINE_PER_CPU(struct sched_domain, node_domains);
  static struct sched_group sched_group_nodes[MAX_NUMNODES];
-static int __devinit cpu_to_node_group(int cpu)
+static int cpu_to_node_group(int cpu)
  {
         return cpu_to_node(cpu);
  }
@@ -4901,39 +4925,28 @@ static void check_sibling_maps(void)
  #endif
  
  /*
- * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ * Build sched domains for a given set of cpus and attach the sched domains
+ * to the individual cpus
   */
-static void __devinit arch_init_sched_domains(void)
+static void build_sched_domains(const cpumask_t *cpu_map)
  {
         int i;
-       cpumask_t cpu_default_map;
-
-#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
-       check_sibling_maps();
-#endif
-       /*
-        * Setup mask for cpus without special case scheduling requirements.
-        * For now this just excludes isolated cpus, but could be used to
-        * exclude other special cases in the future.
-        */
-       cpus_complement(cpu_default_map, cpu_isolated_map);
-       cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
  
         /*
-        * Set up domains. Isolated domains just stay on the NULL domain.
+        * Set up domains for cpus specified by the cpu_map.
          */
-       for_each_cpu_mask(i, cpu_default_map) {
+       for_each_cpu_mask(i, *cpu_map) {
                 int group;
                 struct sched_domain *sd = NULL, *p;
                 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
  
-               cpus_and(nodemask, nodemask, cpu_default_map);
+               cpus_and(nodemask, nodemask, *cpu_map);
  
  #ifdef CONFIG_NUMA
                 sd = &per_cpu(node_domains, i);
                 group = cpu_to_node_group(i);
                 *sd = SD_NODE_INIT;
-               sd->span = cpu_default_map;
+               sd->span = *cpu_map;
                 sd->groups = &sched_group_nodes[group];
  #endif
  
@@ -4951,7 +4964,7 @@ static void __devinit arch_init_sched_domains(void)
                 group = cpu_to_cpu_group(i);
                 *sd = SD_SIBLING_INIT;
                 sd->span = cpu_sibling_map[i];
-               cpus_and(sd->span, sd->span, cpu_default_map);
+               cpus_and(sd->span, sd->span, *cpu_map);
                 sd->parent = p;
                 sd->groups = &sched_group_cpus[group];
  #endif
@@ -4961,7 +4974,7 @@ static void __devinit arch_init_sched_domains(void)
         /* Set up CPU (sibling) groups */
         for_each_online_cpu(i) {
                 cpumask_t this_sibling_map = cpu_sibling_map[i];
-               cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
+               cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
                 if (i != first_cpu(this_sibling_map))
                         continue;
  
@@ -4974,7 +4987,7 @@ static void __devinit arch_init_sched_domains(void)
         for (i = 0; i < MAX_NUMNODES; i++) {
                 cpumask_t nodemask = node_to_cpumask(i);
  
-               cpus_and(nodemask, nodemask, cpu_default_map);
+               cpus_and(nodemask, nodemask, *cpu_map);
                 if (cpus_empty(nodemask))
                         continue;
  
@@ -4984,12 +4997,12 @@ static void __devinit arch_init_sched_domains(void)
  
  #ifdef CONFIG_NUMA
         /* Set up node groups */
-       init_sched_build_groups(sched_group_nodes, cpu_default_map,
+       init_sched_build_groups(sched_group_nodes, *cpu_map,
                                         &cpu_to_node_group);
  #endif
  
         /* Calculate CPU power for physical packages and nodes */
-       for_each_cpu_mask(i, cpu_default_map) {
+       for_each_cpu_mask(i, *cpu_map) {
                 int power;
                 struct sched_domain *sd;
  #ifdef CONFIG_SCHED_SMT
@@ -5013,7 +5026,7 @@ static void __devinit arch_init_sched_domains(void)
         }
  
         /* Attach the domains */
-       for_each_online_cpu(i) {
+       for_each_cpu_mask(i, *cpu_map) {
                 struct sched_domain *sd;
  #ifdef CONFIG_SCHED_SMT
                 sd = &per_cpu(cpu_domains, i);
@@ -5023,16 +5036,71 @@ static void __devinit arch_init_sched_domains(void)
                 cpu_attach_domain(sd, i);
         }
  }
+/*
+ * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ */
+static void arch_init_sched_domains(cpumask_t *cpu_map)
+{
+       cpumask_t cpu_default_map;
  
-#ifdef CONFIG_HOTPLUG_CPU
-static void __devinit arch_destroy_sched_domains(void)
+#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
+       check_sibling_maps();
+#endif
+       /*
+        * Setup mask for cpus without special case scheduling requirements.
+        * For now this just excludes isolated cpus, but could be used to
+        * exclude other special cases in the future.
+        */
+       cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
+
+       build_sched_domains(&cpu_default_map);
+}
+
+static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
  {
         /* Do nothing: everything is statically allocated. */
  }
-#endif
  
  #endif /* ARCH_HAS_SCHED_DOMAIN */
  
+/*
+ * Detach sched domains from a group of cpus specified in cpu_map
+ * These cpus will now be attached to the NULL domain
+ */
+static inline void detach_destroy_domains(const cpumask_t *cpu_map)
+{
+       int i;
+
+       for_each_cpu_mask(i, *cpu_map)
+               cpu_attach_domain(NULL, i);
+       synchronize_sched();
+       arch_destroy_sched_domains(cpu_map);
+}
+
+/*
+ * Partition sched domains as specified by the cpumasks below.
+ * This attaches all cpus from the cpumasks to the NULL domain,
+ * waits for a RCU quiescent period, recalculates sched
+ * domain information and then attaches them back to the
+ * correct sched domains
+ * Call with hotplug lock held
+ */
+void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
+{
+       cpumask_t change_map;
+
+       cpus_and(*partition1, *partition1, cpu_online_map);
+       cpus_and(*partition2, *partition2, cpu_online_map);
+       cpus_or(change_map, *partition1, *partition2);
+
+       /* Detach sched domains from all of the affected cpus */
+       detach_destroy_domains(&change_map);
+       if (!cpus_empty(*partition1))
+               build_sched_domains(partition1);
+       if (!cpus_empty(*partition2))
+               build_sched_domains(partition2);
+}
+
  #ifdef CONFIG_HOTPLUG_CPU
  /*
   * Force a reinitialization of the sched domains hierarchy.  The domains
@@ -5043,15 +5111,10 @@ static void __devinit arch_destroy_sched_domains(void)
  static int update_sched_domains(struct notifier_block *nfb,
                                 unsigned long action, void *hcpu)
  {
-       int i;
-
         switch (action) {
         case CPU_UP_PREPARE:
         case CPU_DOWN_PREPARE:
-               for_each_online_cpu(i)
-                       cpu_attach_domain(NULL, i);
-               synchronize_kernel();
-               arch_destroy_sched_domains();
+               detach_destroy_domains(&cpu_online_map);
                 return NOTIFY_OK;
  
         case CPU_UP_CANCELED:
@@ -5067,7 +5130,7 @@ static int update_sched_domains(struct notifier_block *nfb,
         }
  
         /* The hotplug lock is already held by cpu_up/cpu_down */
-       arch_init_sched_domains();
+       arch_init_sched_domains(&cpu_online_map);
  
         return NOTIFY_OK;
  }
@@ -5076,7 +5139,7 @@ static int update_sched_domains(struct notifier_block *nfb,
  void __init sched_init_smp(void)
  {
         lock_cpu_hotplug();
-       arch_init_sched_domains();
+       arch_init_sched_domains(&cpu_online_map);
         unlock_cpu_hotplug();
         /* XXX: Theoretical race here - CPU may be hotplugged now */
         hotcpu_notifier(update_sched_domains, 0);