X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fsched.c;h=0dc757246d89b02c96593df44ff8d75acb93e711;hb=5fc77247f7db01b6377a5ea6ab18c8ac60021045;hp=48e35c9163263cc1c7813a715ddfcf32f8dff31f;hpb=62ab616d54371a65f595c199aad1e1755b837d25;p=linux-2.6 diff --git a/kernel/sched.c b/kernel/sched.c index 48e35c9163..0dc757246d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -56,6 +56,16 @@ #include +/* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -466,7 +476,8 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "domain%d %s", dcnt++, mask_str); for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++) { - seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " + "%lu", sd->lb_cnt[itype], sd->lb_balanced[itype], sd->lb_failed[itype], @@ -476,11 +487,13 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } - seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" + " %lu %lu %lu\n", sd->alb_cnt, sd->alb_failed, sd->alb_pushed, sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, - sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); + sd->ttwu_wake_remote, sd->ttwu_move_affine, + sd->ttwu_move_balance); } preempt_enable(); #endif @@ -1454,7 +1467,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) if (this_sd->flags & SD_WAKE_AFFINE) { unsigned long tl = this_load; - unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu); + unsigned long tl_per_task; + + tl_per_task = cpu_avg_load_per_task(this_cpu); /* * If sync wakeup then subtract the (maximum possible) @@ -1562,6 +1577,7 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state) return try_to_wake_up(p, state, 0); } +static void task_running_tick(struct rq *rq, struct task_struct *p); /* * Perform scheduler related setup for a newly forked process p. * p is forked by current. @@ -1622,7 +1638,7 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) * runqueue lock is not a problem. */ current->time_slice = 1; - scheduler_tick(); + task_running_tick(cpu_rq(cpu), current); } local_irq_enable(); put_cpu(); @@ -1837,6 +1853,13 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; + /* + * For paravirt, this is coupled with an exit in switch_to to + * combine the page table reload and the switch backend into + * one hypercall. + */ + arch_enter_lazy_cpu_mode(); + if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); @@ -2487,18 +2510,21 @@ small_imbalance: pwr_now /= SCHED_LOAD_SCALE; /* Amount of load we'd subtract */ - tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power; + tmp = busiest_load_per_task * SCHED_LOAD_SCALE / + busiest->cpu_power; if (max_load > tmp) pwr_move += busiest->cpu_power * min(busiest_load_per_task, max_load - tmp); /* Amount of load we'd add */ - if (max_load*busiest->cpu_power < - busiest_load_per_task*SCHED_LOAD_SCALE) - tmp = max_load*busiest->cpu_power/this->cpu_power; + if (max_load * busiest->cpu_power < + busiest_load_per_task * SCHED_LOAD_SCALE) + tmp = max_load * busiest->cpu_power / this->cpu_power; else - tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power; - pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp); + tmp = busiest_load_per_task * SCHED_LOAD_SCALE / + this->cpu_power; + pwr_move += this->cpu_power * + min(this_load_per_task, this_load + tmp); pwr_move /= SCHED_LOAD_SCALE; /* Move if we gain throughput */ @@ -2878,14 +2904,16 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) static void update_load(struct rq *this_rq) { unsigned long this_load; - int i, scale; + unsigned int i, scale; this_load = this_rq->raw_weighted_load; /* Update our load: */ - for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { + for (i = 0, scale = 1; i < 3; i++, scale += scale) { unsigned long old_load, new_load; + /* scale is effectively 1 << i now, and >> i divides by scale */ + old_load = this_rq->cpu_load[i]; new_load = this_load; /* @@ -2895,7 +2923,7 @@ static void update_load(struct rq *this_rq) */ if (new_load > old_load) new_load += scale-1; - this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; + this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; } } @@ -3366,7 +3394,8 @@ void fastcall add_preempt_count(int val) /* * Spinlock count overflowing soon? */ - DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); + DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= + PREEMPT_MASK - 10); } EXPORT_SYMBOL(add_preempt_count); @@ -3420,6 +3449,8 @@ asmlinkage void __sched schedule(void) "%s/0x%08x/%d\n", current->comm, preempt_count(), current->pid); debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); dump_stack(); } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -4181,13 +4212,12 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) } /** - * sched_setscheduler - change the scheduling policy and/or RT priority of - * a thread. + * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. * @p: the task in question. * @policy: new policy. * @param: structure containing the new RT priority. * - * NOTE: the task may be already dead + * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) @@ -4555,7 +4585,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, /** * sys_sched_yield - yield the current processor to other threads. * - * this function yields the current CPU by moving the calling thread + * This function yields the current CPU by moving the calling thread * to the expired array. If there are no other threads running on this * CPU then this function will return. */ @@ -4605,15 +4635,6 @@ asmlinkage long sys_sched_yield(void) return 0; } -static inline int __resched_legal(int expected_preempt_count) -{ - if (unlikely(preempt_count() != expected_preempt_count)) - return 0; - if (unlikely(system_state != SYSTEM_RUNNING)) - return 0; - return 1; -} - static void __cond_resched(void) { #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP @@ -4633,7 +4654,8 @@ static void __cond_resched(void) int __sched cond_resched(void) { - if (need_resched() && __resched_legal(0)) { + if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && + system_state == SYSTEM_RUNNING) { __cond_resched(); return 1; } @@ -4659,7 +4681,7 @@ int cond_resched_lock(spinlock_t *lock) ret = 1; spin_lock(lock); } - if (need_resched() && __resched_legal(1)) { + if (need_resched() && system_state == SYSTEM_RUNNING) { spin_release(&lock->dep_map, 1, _THIS_IP_); _raw_spin_unlock(lock); preempt_enable_no_resched(); @@ -4675,7 +4697,7 @@ int __sched cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (need_resched() && __resched_legal(0)) { + if (need_resched() && system_state == SYSTEM_RUNNING) { raw_local_irq_disable(); _local_bh_enable(); raw_local_irq_enable(); @@ -4690,7 +4712,7 @@ EXPORT_SYMBOL(cond_resched_softirq); /** * yield - yield the current processor to other threads. * - * this is a shortcut for kernel-space yielding - it marks the + * This is a shortcut for kernel-space yielding - it marks the * thread runnable and calls sys_sched_yield(). */ void __sched yield(void) @@ -5439,16 +5461,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!(sd->flags & SD_LOAD_BALANCE)) { printk("does not load-balance\n"); if (sd->parent) - printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); + printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" + " has parent"); break; } printk("span %s\n", str); if (!cpu_isset(cpu, sd->span)) - printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); + printk(KERN_ERR "ERROR: domain->span does not contain " + "CPU%d\n", cpu); if (!cpu_isset(cpu, group->cpumask)) - printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); + printk(KERN_ERR "ERROR: domain->groups does not contain" + " CPU%d\n", cpu); printk(KERN_DEBUG); for (i = 0; i < level + 2; i++) @@ -5463,7 +5488,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!group->cpu_power) { printk("\n"); - printk(KERN_ERR "ERROR: domain->cpu_power not set\n"); + printk(KERN_ERR "ERROR: domain->cpu_power not " + "set\n"); } if (!cpus_weight(group->cpumask)) { @@ -5486,15 +5512,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk("\n"); if (!cpus_equal(sd->span, groupmask)) - printk(KERN_ERR "ERROR: groups don't span domain->span\n"); + printk(KERN_ERR "ERROR: groups don't span " + "domain->span\n"); level++; sd = sd->parent; + if (!sd) + continue; - if (sd) { - if (!cpus_subset(groupmask, sd->span)) - printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); - } + if (!cpus_subset(groupmask, sd->span)) + printk(KERN_ERR "ERROR: parent span is not a superset " + "of domain->span\n"); } while (sd); } @@ -5590,7 +5618,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) } /* cpus with isolated domains */ -static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE; +static cpumask_t cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) @@ -5812,8 +5840,9 @@ __setup("max_cache_size=", setup_max_cache_size); */ static void touch_cache(void *__cache, unsigned long __size) { - unsigned long size = __size/sizeof(long), chunk1 = size/3, - chunk2 = 2*size/3; + unsigned long size = __size / sizeof(long); + unsigned long chunk1 = size / 3; + unsigned long chunk2 = 2 * size / 3; unsigned long *cache = __cache; int i; @@ -5922,11 +5951,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) */ measure_one(cache, size, cpu1, cpu2); for (i = 0; i < ITERATIONS; i++) - cost1 += measure_one(cache, size - i*1024, cpu1, cpu2); + cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2); measure_one(cache, size, cpu2, cpu1); for (i = 0; i < ITERATIONS; i++) - cost1 += measure_one(cache, size - i*1024, cpu2, cpu1); + cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1); /* * (We measure the non-migrating [cached] cost on both @@ -5936,17 +5965,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) measure_one(cache, size, cpu1, cpu1); for (i = 0; i < ITERATIONS; i++) - cost2 += measure_one(cache, size - i*1024, cpu1, cpu1); + cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1); measure_one(cache, size, cpu2, cpu2); for (i = 0; i < ITERATIONS; i++) - cost2 += measure_one(cache, size - i*1024, cpu2, cpu2); + cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2); /* * Get the per-iteration migration cost: */ - do_div(cost1, 2*ITERATIONS); - do_div(cost2, 2*ITERATIONS); + do_div(cost1, 2 * ITERATIONS); + do_div(cost2, 2 * ITERATIONS); return cost1 - cost2; } @@ -5984,7 +6013,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) */ cache = vmalloc(max_size); if (!cache) { - printk("could not vmalloc %d bytes for cache!\n", 2*max_size); + printk("could not vmalloc %d bytes for cache!\n", 2 * max_size); return 1000000; /* return 1 msec on very small boxen */ } @@ -6009,7 +6038,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) avg_fluct = (avg_fluct + fluct)/2; if (migration_debug) - printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n", + printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): " + "(%8Ld %8Ld)\n", cpu1, cpu2, size, (long)cost / 1000000, ((long)cost / 100000) % 10, @@ -6104,20 +6134,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map) -1 #endif ); - if (system_state == SYSTEM_BOOTING) { - if (num_online_cpus() > 1) { - printk("migration_cost="); - for (distance = 0; distance <= max_distance; distance++) { - if (distance) - printk(","); - printk("%ld", (long)migration_cost[distance] / 1000); - } - printk("\n"); + if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) { + printk("migration_cost="); + for (distance = 0; distance <= max_distance; distance++) { + if (distance) + printk(","); + printk("%ld", (long)migration_cost[distance] / 1000); } + printk("\n"); } j1 = jiffies; if (migration_debug) - printk("migration: %ld seconds\n", (j1-j0)/HZ); + printk("migration: %ld seconds\n", (j1-j0) / HZ); /* * Move back to the original CPU. NUMA-Q gets confused @@ -6855,7 +6883,7 @@ void __init sched_init_smp(void) lock_cpu_hotplug(); arch_init_sched_domains(&cpu_online_map); - cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map); + cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); if (cpus_empty(non_isolated_cpus)) cpu_set(smp_processor_id(), non_isolated_cpus); unlock_cpu_hotplug(); @@ -6962,6 +6990,8 @@ void __might_sleep(char *file, int line) printk("in_atomic():%d, irqs_disabled():%d\n", in_atomic(), irqs_disabled()); debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); dump_stack(); } #endif