]> err.no Git - linux-2.6/blob - arch/x86/kernel/smpboot.c
5bff87e99898fe7d6f86c86e8abcc0937831b0df
[linux-2.6] / arch / x86 / kernel / smpboot.c
1 #include <linux/init.h>
2 #include <linux/smp.h>
3 #include <linux/module.h>
4 #include <linux/sched.h>
5 #include <linux/percpu.h>
6 #include <linux/bootmem.h>
7
8 #include <asm/nmi.h>
9 #include <asm/irq.h>
10 #include <asm/smp.h>
11 #include <asm/cpu.h>
12 #include <asm/numa.h>
13
14 #include <mach_apic.h>
15
16 /* Number of siblings per CPU package */
17 int smp_num_siblings = 1;
18 EXPORT_SYMBOL(smp_num_siblings);
19
20 /* Last level cache ID of each logical CPU */
21 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
22
23 /* bitmap of online cpus */
24 cpumask_t cpu_online_map __read_mostly;
25 EXPORT_SYMBOL(cpu_online_map);
26
27 cpumask_t cpu_callin_map;
28 cpumask_t cpu_callout_map;
29 cpumask_t cpu_possible_map;
30 EXPORT_SYMBOL(cpu_possible_map);
31
32 /* representing HT siblings of each logical CPU */
33 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
34 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
35
36 /* representing HT and core siblings of each logical CPU */
37 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
38 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
39
40 /* Per CPU bogomips and other parameters */
41 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
42 EXPORT_PER_CPU_SYMBOL(cpu_info);
43
44 /* ready for x86_64, no harm for x86, since it will overwrite after alloc */
45 unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE);
46
47 /* representing cpus for which sibling maps can be computed */
48 static cpumask_t cpu_sibling_setup_map;
49
50 /* Set if we find a B stepping CPU */
51 int __cpuinitdata smp_b_stepping;
52
53 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
54
55 /* which logical CPUs are on which nodes */
56 cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
57                                 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
58 EXPORT_SYMBOL(node_to_cpumask_map);
59 /* which node each logical CPU is on */
60 int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
61 EXPORT_SYMBOL(cpu_to_node_map);
62
63 /* set up a mapping between cpu and node. */
64 static void map_cpu_to_node(int cpu, int node)
65 {
66         printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
67         cpu_set(cpu, node_to_cpumask_map[node]);
68         cpu_to_node_map[cpu] = node;
69 }
70
71 /* undo a mapping between cpu and node. */
72 static void unmap_cpu_to_node(int cpu)
73 {
74         int node;
75
76         printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
77         for (node = 0; node < MAX_NUMNODES; node++)
78                 cpu_clear(cpu, node_to_cpumask_map[node]);
79         cpu_to_node_map[cpu] = 0;
80 }
81 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
82 #define map_cpu_to_node(cpu, node)      ({})
83 #define unmap_cpu_to_node(cpu)  ({})
84 #endif
85
86 #ifdef CONFIG_X86_32
87 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
88                                         { [0 ... NR_CPUS-1] = BAD_APICID };
89
90 void map_cpu_to_logical_apicid(void)
91 {
92         int cpu = smp_processor_id();
93         int apicid = logical_smp_processor_id();
94         int node = apicid_to_node(apicid);
95
96         if (!node_online(node))
97                 node = first_online_node;
98
99         cpu_2_logical_apicid[cpu] = apicid;
100         map_cpu_to_node(cpu, node);
101 }
102
103 void unmap_cpu_to_logical_apicid(int cpu)
104 {
105         cpu_2_logical_apicid[cpu] = BAD_APICID;
106         unmap_cpu_to_node(cpu);
107 }
108 #else
109 #define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
110 #define map_cpu_to_logical_apicid()  do {} while (0)
111 #endif
112
113 static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
114 {
115 #ifdef CONFIG_X86_32
116         /*
117          * Mask B, Pentium, but not Pentium MMX
118          */
119         if (c->x86_vendor == X86_VENDOR_INTEL &&
120             c->x86 == 5 &&
121             c->x86_mask >= 1 && c->x86_mask <= 4 &&
122             c->x86_model <= 3)
123                 /*
124                  * Remember we have B step Pentia with bugs
125                  */
126                 smp_b_stepping = 1;
127
128         /*
129          * Certain Athlons might work (for various values of 'work') in SMP
130          * but they are not certified as MP capable.
131          */
132         if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
133
134                 if (num_possible_cpus() == 1)
135                         goto valid_k7;
136
137                 /* Athlon 660/661 is valid. */
138                 if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
139                     (c->x86_mask == 1)))
140                         goto valid_k7;
141
142                 /* Duron 670 is valid */
143                 if ((c->x86_model == 7) && (c->x86_mask == 0))
144                         goto valid_k7;
145
146                 /*
147                  * Athlon 662, Duron 671, and Athlon >model 7 have capability
148                  * bit. It's worth noting that the A5 stepping (662) of some
149                  * Athlon XP's have the MP bit set.
150                  * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
151                  * more.
152                  */
153                 if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
154                     ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
155                      (c->x86_model > 7))
156                         if (cpu_has_mp)
157                                 goto valid_k7;
158
159                 /* If we get here, not a certified SMP capable AMD system. */
160                 add_taint(TAINT_UNSAFE_SMP);
161         }
162
163 valid_k7:
164         ;
165 #endif
166 }
167
168 void smp_checks(void)
169 {
170         if (smp_b_stepping)
171                 printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
172                                     "with B stepping processors.\n");
173
174         /*
175          * Don't taint if we are running SMP kernel on a single non-MP
176          * approved Athlon
177          */
178         if (tainted & TAINT_UNSAFE_SMP) {
179                 if (num_online_cpus())
180                         printk(KERN_INFO "WARNING: This combination of AMD"
181                                 "processors is not suitable for SMP.\n");
182                 else
183                         tainted &= ~TAINT_UNSAFE_SMP;
184         }
185 }
186
187 /*
188  * The bootstrap kernel entry code has set these up. Save them for
189  * a given CPU
190  */
191
192 void __cpuinit smp_store_cpu_info(int id)
193 {
194         struct cpuinfo_x86 *c = &cpu_data(id);
195
196         *c = boot_cpu_data;
197         c->cpu_index = id;
198         if (id != 0)
199                 identify_secondary_cpu(c);
200         smp_apply_quirks(c);
201 }
202
203
204 void __cpuinit set_cpu_sibling_map(int cpu)
205 {
206         int i;
207         struct cpuinfo_x86 *c = &cpu_data(cpu);
208
209         cpu_set(cpu, cpu_sibling_setup_map);
210
211         if (smp_num_siblings > 1) {
212                 for_each_cpu_mask(i, cpu_sibling_setup_map) {
213                         if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
214                             c->cpu_core_id == cpu_data(i).cpu_core_id) {
215                                 cpu_set(i, per_cpu(cpu_sibling_map, cpu));
216                                 cpu_set(cpu, per_cpu(cpu_sibling_map, i));
217                                 cpu_set(i, per_cpu(cpu_core_map, cpu));
218                                 cpu_set(cpu, per_cpu(cpu_core_map, i));
219                                 cpu_set(i, c->llc_shared_map);
220                                 cpu_set(cpu, cpu_data(i).llc_shared_map);
221                         }
222                 }
223         } else {
224                 cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
225         }
226
227         cpu_set(cpu, c->llc_shared_map);
228
229         if (current_cpu_data.x86_max_cores == 1) {
230                 per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
231                 c->booted_cores = 1;
232                 return;
233         }
234
235         for_each_cpu_mask(i, cpu_sibling_setup_map) {
236                 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
237                     per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
238                         cpu_set(i, c->llc_shared_map);
239                         cpu_set(cpu, cpu_data(i).llc_shared_map);
240                 }
241                 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
242                         cpu_set(i, per_cpu(cpu_core_map, cpu));
243                         cpu_set(cpu, per_cpu(cpu_core_map, i));
244                         /*
245                          *  Does this new cpu bringup a new core?
246                          */
247                         if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
248                                 /*
249                                  * for each core in package, increment
250                                  * the booted_cores for this new cpu
251                                  */
252                                 if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
253                                         c->booted_cores++;
254                                 /*
255                                  * increment the core count for all
256                                  * the other cpus in this package
257                                  */
258                                 if (i != cpu)
259                                         cpu_data(i).booted_cores++;
260                         } else if (i != cpu && !c->booted_cores)
261                                 c->booted_cores = cpu_data(i).booted_cores;
262                 }
263         }
264 }
265
266 /* maps the cpu to the sched domain representing multi-core */
267 cpumask_t cpu_coregroup_map(int cpu)
268 {
269         struct cpuinfo_x86 *c = &cpu_data(cpu);
270         /*
271          * For perf, we return last level cache shared map.
272          * And for power savings, we return cpu_core_map
273          */
274         if (sched_mc_power_savings || sched_smt_power_savings)
275                 return per_cpu(cpu_core_map, cpu);
276         else
277                 return c->llc_shared_map;
278 }
279
280 /*
281  * Currently trivial. Write the real->protected mode
282  * bootstrap into the page concerned. The caller
283  * has made sure it's suitably aligned.
284  */
285
286 unsigned long __cpuinit setup_trampoline(void)
287 {
288         memcpy(trampoline_base, trampoline_data,
289                trampoline_end - trampoline_data);
290         return virt_to_phys(trampoline_base);
291 }
292
293 #ifdef CONFIG_X86_32
294 /*
295  * We are called very early to get the low memory for the
296  * SMP bootup trampoline page.
297  */
298 void __init smp_alloc_memory(void)
299 {
300         trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
301         /*
302          * Has to be in very low memory so we can execute
303          * real-mode AP code.
304          */
305         if (__pa(trampoline_base) >= 0x9F000)
306                 BUG();
307 }
308 #endif
309
310 void impress_friends(void)
311 {
312         int cpu;
313         unsigned long bogosum = 0;
314         /*
315          * Allow the user to impress friends.
316          */
317         Dprintk("Before bogomips.\n");
318         for_each_possible_cpu(cpu)
319                 if (cpu_isset(cpu, cpu_callout_map))
320                         bogosum += cpu_data(cpu).loops_per_jiffy;
321         printk(KERN_INFO
322                 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
323                 num_online_cpus(),
324                 bogosum/(500000/HZ),
325                 (bogosum/(5000/HZ))%100);
326
327         Dprintk("Before bogocount - setting activated=1.\n");
328 }
329
330 #ifdef CONFIG_HOTPLUG_CPU
331 void remove_siblinginfo(int cpu)
332 {
333         int sibling;
334         struct cpuinfo_x86 *c = &cpu_data(cpu);
335
336         for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
337                 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
338                 /*/
339                  * last thread sibling in this cpu core going down
340                  */
341                 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
342                         cpu_data(sibling).booted_cores--;
343         }
344
345         for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
346                 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
347         cpus_clear(per_cpu(cpu_sibling_map, cpu));
348         cpus_clear(per_cpu(cpu_core_map, cpu));
349         c->phys_proc_id = 0;
350         c->cpu_core_id = 0;
351         cpu_clear(cpu, cpu_sibling_setup_map);
352 }
353
354 int additional_cpus __initdata = -1;
355
356 static __init int setup_additional_cpus(char *s)
357 {
358         return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
359 }
360 early_param("additional_cpus", setup_additional_cpus);
361
362 /*
363  * cpu_possible_map should be static, it cannot change as cpu's
364  * are onlined, or offlined. The reason is per-cpu data-structures
365  * are allocated by some modules at init time, and dont expect to
366  * do this dynamically on cpu arrival/departure.
367  * cpu_present_map on the other hand can change dynamically.
368  * In case when cpu_hotplug is not compiled, then we resort to current
369  * behaviour, which is cpu_possible == cpu_present.
370  * - Ashok Raj
371  *
372  * Three ways to find out the number of additional hotplug CPUs:
373  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
374  * - The user can overwrite it with additional_cpus=NUM
375  * - Otherwise don't reserve additional CPUs.
376  * We do this because additional CPUs waste a lot of memory.
377  * -AK
378  */
379 __init void prefill_possible_map(void)
380 {
381         int i;
382         int possible;
383
384         if (additional_cpus == -1) {
385                 if (disabled_cpus > 0)
386                         additional_cpus = disabled_cpus;
387                 else
388                         additional_cpus = 0;
389         }
390         possible = num_processors + additional_cpus;
391         if (possible > NR_CPUS)
392                 possible = NR_CPUS;
393
394         printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
395                 possible, max_t(int, possible - num_processors, 0));
396
397         for (i = 0; i < possible; i++)
398                 cpu_set(i, cpu_possible_map);
399 }
400
401 static void __ref remove_cpu_from_maps(int cpu)
402 {
403         cpu_clear(cpu, cpu_online_map);
404 #ifdef CONFIG_X86_64
405         cpu_clear(cpu, cpu_callout_map);
406         cpu_clear(cpu, cpu_callin_map);
407         /* was set by cpu_init() */
408         clear_bit(cpu, (unsigned long *)&cpu_initialized);
409         clear_node_cpumask(cpu);
410 #endif
411 }
412
413 int __cpu_disable(void)
414 {
415         int cpu = smp_processor_id();
416
417         /*
418          * Perhaps use cpufreq to drop frequency, but that could go
419          * into generic code.
420          *
421          * We won't take down the boot processor on i386 due to some
422          * interrupts only being able to be serviced by the BSP.
423          * Especially so if we're not using an IOAPIC   -zwane
424          */
425         if (cpu == 0)
426                 return -EBUSY;
427
428         if (nmi_watchdog == NMI_LOCAL_APIC)
429                 stop_apic_nmi_watchdog(NULL);
430         clear_local_APIC();
431
432         /*
433          * HACK:
434          * Allow any queued timer interrupts to get serviced
435          * This is only a temporary solution until we cleanup
436          * fixup_irqs as we do for IA64.
437          */
438         local_irq_enable();
439         mdelay(1);
440
441         local_irq_disable();
442         remove_siblinginfo(cpu);
443
444         /* It's now safe to remove this processor from the online map */
445         remove_cpu_from_maps(cpu);
446         fixup_irqs(cpu_online_map);
447         return 0;
448 }
449
450 void __cpu_die(unsigned int cpu)
451 {
452         /* We don't do anything here: idle task is faking death itself. */
453         unsigned int i;
454
455         for (i = 0; i < 10; i++) {
456                 /* They ack this in play_dead by setting CPU_DEAD */
457                 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
458                         printk(KERN_INFO "CPU %d is now offline\n", cpu);
459                         if (1 == num_online_cpus())
460                                 alternatives_smp_switch(0);
461                         return;
462                 }
463                 msleep(100);
464         }
465         printk(KERN_ERR "CPU %u didn't die...\n", cpu);
466 }
467 #else /* ... !CONFIG_HOTPLUG_CPU */
468 int __cpu_disable(void)
469 {
470         return -ENOSYS;
471 }
472
473 void __cpu_die(unsigned int cpu)
474 {
475         /* We said "no" in __cpu_disable */
476         BUG();
477 }
478 #endif
479
480 /*
481  * If the BIOS enumerates physical processors before logical,
482  * maxcpus=N at enumeration-time can be used to disable HT.
483  */
484 static int __init parse_maxcpus(char *arg)
485 {
486         extern unsigned int maxcpus;
487
488         maxcpus = simple_strtoul(arg, NULL, 0);
489         return 0;
490 }
491 early_param("maxcpus", parse_maxcpus);