]> err.no Git - linux-2.6/blob - arch/i386/kernel/setup.c
[PATCH] kdump: Retrieve saved max pfn
[linux-2.6] / arch / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/config.h>
27 #include <linux/sched.h>
28 #include <linux/mm.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
47
48 #include <video/edid.h>
49
50 #include <asm/apic.h>
51 #include <asm/e820.h>
52 #include <asm/mpspec.h>
53 #include <asm/setup.h>
54 #include <asm/arch_hooks.h>
55 #include <asm/sections.h>
56 #include <asm/io_apic.h>
57 #include <asm/ist.h>
58 #include <asm/io.h>
59 #include "setup_arch_pre.h"
60 #include <bios_ebda.h>
61
62 /* Forward Declaration. */
63 void __init find_max_pfn(void);
64
65 /* This value is set up by the early boot code to point to the value
66    immediately after the boot time page tables.  It contains a *physical*
67    address, and must not be in the .bss segment! */
68 unsigned long init_pg_tables_end __initdata = ~0UL;
69
70 int disable_pse __devinitdata = 0;
71
72 /*
73  * Machine setup..
74  */
75
76 #ifdef CONFIG_EFI
77 int efi_enabled = 0;
78 EXPORT_SYMBOL(efi_enabled);
79 #endif
80
81 /* cpu data as detected by the assembly code in head.S */
82 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
83 /* common cpu data for all cpus */
84 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
85 EXPORT_SYMBOL(boot_cpu_data);
86
87 unsigned long mmu_cr4_features;
88
89 #ifdef  CONFIG_ACPI_INTERPRETER
90         int acpi_disabled = 0;
91 #else
92         int acpi_disabled = 1;
93 #endif
94 EXPORT_SYMBOL(acpi_disabled);
95
96 #ifdef  CONFIG_ACPI_BOOT
97 int __initdata acpi_force = 0;
98 extern acpi_interrupt_flags     acpi_sci_flags;
99 #endif
100
101 /* for MCA, but anyone else can use it if they want */
102 unsigned int machine_id;
103 #ifdef CONFIG_MCA
104 EXPORT_SYMBOL(machine_id);
105 #endif
106 unsigned int machine_submodel_id;
107 unsigned int BIOS_revision;
108 unsigned int mca_pentium_flag;
109
110 /* For PCI or other memory-mapped resources */
111 unsigned long pci_mem_start = 0x10000000;
112 #ifdef CONFIG_PCI
113 EXPORT_SYMBOL(pci_mem_start);
114 #endif
115
116 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
117 int bootloader_type;
118
119 /* user-defined highmem size */
120 static unsigned int highmem_pages = -1;
121
122 /*
123  * Setup options
124  */
125 struct drive_info_struct { char dummy[32]; } drive_info;
126 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
127     defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
128 EXPORT_SYMBOL(drive_info);
129 #endif
130 struct screen_info screen_info;
131 #ifdef CONFIG_VT
132 EXPORT_SYMBOL(screen_info);
133 #endif
134 struct apm_info apm_info;
135 EXPORT_SYMBOL(apm_info);
136 struct sys_desc_table_struct {
137         unsigned short length;
138         unsigned char table[0];
139 };
140 struct edid_info edid_info;
141 struct ist_info ist_info;
142 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
143         defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
144 EXPORT_SYMBOL(ist_info);
145 #endif
146 struct e820map e820;
147
148 extern void early_cpu_init(void);
149 extern void dmi_scan_machine(void);
150 extern void generic_apic_probe(char *);
151 extern int root_mountflags;
152
153 unsigned long saved_videomode;
154
155 #define RAMDISK_IMAGE_START_MASK        0x07FF
156 #define RAMDISK_PROMPT_FLAG             0x8000
157 #define RAMDISK_LOAD_FLAG               0x4000  
158
159 static char command_line[COMMAND_LINE_SIZE];
160
161 unsigned char __initdata boot_params[PARAM_SIZE];
162
163 static struct resource data_resource = {
164         .name   = "Kernel data",
165         .start  = 0,
166         .end    = 0,
167         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
168 };
169
170 static struct resource code_resource = {
171         .name   = "Kernel code",
172         .start  = 0,
173         .end    = 0,
174         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
175 };
176
177 static struct resource system_rom_resource = {
178         .name   = "System ROM",
179         .start  = 0xf0000,
180         .end    = 0xfffff,
181         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182 };
183
184 static struct resource extension_rom_resource = {
185         .name   = "Extension ROM",
186         .start  = 0xe0000,
187         .end    = 0xeffff,
188         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
189 };
190
191 static struct resource adapter_rom_resources[] = { {
192         .name   = "Adapter ROM",
193         .start  = 0xc8000,
194         .end    = 0,
195         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196 }, {
197         .name   = "Adapter ROM",
198         .start  = 0,
199         .end    = 0,
200         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
201 }, {
202         .name   = "Adapter ROM",
203         .start  = 0,
204         .end    = 0,
205         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
206 }, {
207         .name   = "Adapter ROM",
208         .start  = 0,
209         .end    = 0,
210         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
211 }, {
212         .name   = "Adapter ROM",
213         .start  = 0,
214         .end    = 0,
215         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
216 }, {
217         .name   = "Adapter ROM",
218         .start  = 0,
219         .end    = 0,
220         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
221 } };
222
223 #define ADAPTER_ROM_RESOURCES \
224         (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
225
226 static struct resource video_rom_resource = {
227         .name   = "Video ROM",
228         .start  = 0xc0000,
229         .end    = 0xc7fff,
230         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
231 };
232
233 static struct resource video_ram_resource = {
234         .name   = "Video RAM area",
235         .start  = 0xa0000,
236         .end    = 0xbffff,
237         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
238 };
239
240 static struct resource standard_io_resources[] = { {
241         .name   = "dma1",
242         .start  = 0x0000,
243         .end    = 0x001f,
244         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
245 }, {
246         .name   = "pic1",
247         .start  = 0x0020,
248         .end    = 0x0021,
249         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
250 }, {
251         .name   = "timer0",
252         .start  = 0x0040,
253         .end    = 0x0043,
254         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
255 }, {
256         .name   = "timer1",
257         .start  = 0x0050,
258         .end    = 0x0053,
259         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
260 }, {
261         .name   = "keyboard",
262         .start  = 0x0060,
263         .end    = 0x006f,
264         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
265 }, {
266         .name   = "dma page reg",
267         .start  = 0x0080,
268         .end    = 0x008f,
269         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
270 }, {
271         .name   = "pic2",
272         .start  = 0x00a0,
273         .end    = 0x00a1,
274         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
275 }, {
276         .name   = "dma2",
277         .start  = 0x00c0,
278         .end    = 0x00df,
279         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
280 }, {
281         .name   = "fpu",
282         .start  = 0x00f0,
283         .end    = 0x00ff,
284         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
285 } };
286
287 #define STANDARD_IO_RESOURCES \
288         (sizeof standard_io_resources / sizeof standard_io_resources[0])
289
290 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
291
292 static int __init romchecksum(unsigned char *rom, unsigned long length)
293 {
294         unsigned char *p, sum = 0;
295
296         for (p = rom; p < rom + length; p++)
297                 sum += *p;
298         return sum == 0;
299 }
300
301 static void __init probe_roms(void)
302 {
303         unsigned long start, length, upper;
304         unsigned char *rom;
305         int           i;
306
307         /* video rom */
308         upper = adapter_rom_resources[0].start;
309         for (start = video_rom_resource.start; start < upper; start += 2048) {
310                 rom = isa_bus_to_virt(start);
311                 if (!romsignature(rom))
312                         continue;
313
314                 video_rom_resource.start = start;
315
316                 /* 0 < length <= 0x7f * 512, historically */
317                 length = rom[2] * 512;
318
319                 /* if checksum okay, trust length byte */
320                 if (length && romchecksum(rom, length))
321                         video_rom_resource.end = start + length - 1;
322
323                 request_resource(&iomem_resource, &video_rom_resource);
324                 break;
325         }
326
327         start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
328         if (start < upper)
329                 start = upper;
330
331         /* system rom */
332         request_resource(&iomem_resource, &system_rom_resource);
333         upper = system_rom_resource.start;
334
335         /* check for extension rom (ignore length byte!) */
336         rom = isa_bus_to_virt(extension_rom_resource.start);
337         if (romsignature(rom)) {
338                 length = extension_rom_resource.end - extension_rom_resource.start + 1;
339                 if (romchecksum(rom, length)) {
340                         request_resource(&iomem_resource, &extension_rom_resource);
341                         upper = extension_rom_resource.start;
342                 }
343         }
344
345         /* check for adapter roms on 2k boundaries */
346         for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
347                 rom = isa_bus_to_virt(start);
348                 if (!romsignature(rom))
349                         continue;
350
351                 /* 0 < length <= 0x7f * 512, historically */
352                 length = rom[2] * 512;
353
354                 /* but accept any length that fits if checksum okay */
355                 if (!length || start + length > upper || !romchecksum(rom, length))
356                         continue;
357
358                 adapter_rom_resources[i].start = start;
359                 adapter_rom_resources[i].end = start + length - 1;
360                 request_resource(&iomem_resource, &adapter_rom_resources[i]);
361
362                 start = adapter_rom_resources[i++].end & ~2047UL;
363         }
364 }
365
366 static void __init limit_regions(unsigned long long size)
367 {
368         unsigned long long current_addr = 0;
369         int i;
370
371         if (efi_enabled) {
372                 for (i = 0; i < memmap.nr_map; i++) {
373                         current_addr = memmap.map[i].phys_addr +
374                                        (memmap.map[i].num_pages << 12);
375                         if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
376                                 if (current_addr >= size) {
377                                         memmap.map[i].num_pages -=
378                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
379                                         memmap.nr_map = i + 1;
380                                         return;
381                                 }
382                         }
383                 }
384         }
385         for (i = 0; i < e820.nr_map; i++) {
386                 if (e820.map[i].type == E820_RAM) {
387                         current_addr = e820.map[i].addr + e820.map[i].size;
388                         if (current_addr >= size) {
389                                 e820.map[i].size -= current_addr-size;
390                                 e820.nr_map = i + 1;
391                                 return;
392                         }
393                 }
394         }
395 }
396
397 static void __init add_memory_region(unsigned long long start,
398                                   unsigned long long size, int type)
399 {
400         int x;
401
402         if (!efi_enabled) {
403                 x = e820.nr_map;
404
405                 if (x == E820MAX) {
406                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
407                     return;
408                 }
409
410                 e820.map[x].addr = start;
411                 e820.map[x].size = size;
412                 e820.map[x].type = type;
413                 e820.nr_map++;
414         }
415 } /* add_memory_region */
416
417 #define E820_DEBUG      1
418
419 static void __init print_memory_map(char *who)
420 {
421         int i;
422
423         for (i = 0; i < e820.nr_map; i++) {
424                 printk(" %s: %016Lx - %016Lx ", who,
425                         e820.map[i].addr,
426                         e820.map[i].addr + e820.map[i].size);
427                 switch (e820.map[i].type) {
428                 case E820_RAM:  printk("(usable)\n");
429                                 break;
430                 case E820_RESERVED:
431                                 printk("(reserved)\n");
432                                 break;
433                 case E820_ACPI:
434                                 printk("(ACPI data)\n");
435                                 break;
436                 case E820_NVS:
437                                 printk("(ACPI NVS)\n");
438                                 break;
439                 default:        printk("type %lu\n", e820.map[i].type);
440                                 break;
441                 }
442         }
443 }
444
445 /*
446  * Sanitize the BIOS e820 map.
447  *
448  * Some e820 responses include overlapping entries.  The following 
449  * replaces the original e820 map with a new one, removing overlaps.
450  *
451  */
452 struct change_member {
453         struct e820entry *pbios; /* pointer to original bios entry */
454         unsigned long long addr; /* address for this change point */
455 };
456 static struct change_member change_point_list[2*E820MAX] __initdata;
457 static struct change_member *change_point[2*E820MAX] __initdata;
458 static struct e820entry *overlap_list[E820MAX] __initdata;
459 static struct e820entry new_bios[E820MAX] __initdata;
460
461 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
462 {
463         struct change_member *change_tmp;
464         unsigned long current_type, last_type;
465         unsigned long long last_addr;
466         int chgidx, still_changing;
467         int overlap_entries;
468         int new_bios_entry;
469         int old_nr, new_nr, chg_nr;
470         int i;
471
472         /*
473                 Visually we're performing the following (1,2,3,4 = memory types)...
474
475                 Sample memory map (w/overlaps):
476                    ____22__________________
477                    ______________________4_
478                    ____1111________________
479                    _44_____________________
480                    11111111________________
481                    ____________________33__
482                    ___________44___________
483                    __________33333_________
484                    ______________22________
485                    ___________________2222_
486                    _________111111111______
487                    _____________________11_
488                    _________________4______
489
490                 Sanitized equivalent (no overlap):
491                    1_______________________
492                    _44_____________________
493                    ___1____________________
494                    ____22__________________
495                    ______11________________
496                    _________1______________
497                    __________3_____________
498                    ___________44___________
499                    _____________33_________
500                    _______________2________
501                    ________________1_______
502                    _________________4______
503                    ___________________2____
504                    ____________________33__
505                    ______________________4_
506         */
507
508         /* if there's only one memory region, don't bother */
509         if (*pnr_map < 2)
510                 return -1;
511
512         old_nr = *pnr_map;
513
514         /* bail out if we find any unreasonable addresses in bios map */
515         for (i=0; i<old_nr; i++)
516                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
517                         return -1;
518
519         /* create pointers for initial change-point information (for sorting) */
520         for (i=0; i < 2*old_nr; i++)
521                 change_point[i] = &change_point_list[i];
522
523         /* record all known change-points (starting and ending addresses),
524            omitting those that are for empty memory regions */
525         chgidx = 0;
526         for (i=0; i < old_nr; i++)      {
527                 if (biosmap[i].size != 0) {
528                         change_point[chgidx]->addr = biosmap[i].addr;
529                         change_point[chgidx++]->pbios = &biosmap[i];
530                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
531                         change_point[chgidx++]->pbios = &biosmap[i];
532                 }
533         }
534         chg_nr = chgidx;        /* true number of change-points */
535
536         /* sort change-point list by memory addresses (low -> high) */
537         still_changing = 1;
538         while (still_changing)  {
539                 still_changing = 0;
540                 for (i=1; i < chg_nr; i++)  {
541                         /* if <current_addr> > <last_addr>, swap */
542                         /* or, if current=<start_addr> & last=<end_addr>, swap */
543                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
544                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
545                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
546                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
547                            )
548                         {
549                                 change_tmp = change_point[i];
550                                 change_point[i] = change_point[i-1];
551                                 change_point[i-1] = change_tmp;
552                                 still_changing=1;
553                         }
554                 }
555         }
556
557         /* create a new bios memory map, removing overlaps */
558         overlap_entries=0;       /* number of entries in the overlap table */
559         new_bios_entry=0;        /* index for creating new bios map entries */
560         last_type = 0;           /* start with undefined memory type */
561         last_addr = 0;           /* start with 0 as last starting address */
562         /* loop through change-points, determining affect on the new bios map */
563         for (chgidx=0; chgidx < chg_nr; chgidx++)
564         {
565                 /* keep track of all overlapping bios entries */
566                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
567                 {
568                         /* add map entry to overlap list (> 1 entry implies an overlap) */
569                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
570                 }
571                 else
572                 {
573                         /* remove entry from list (order independent, so swap with last) */
574                         for (i=0; i<overlap_entries; i++)
575                         {
576                                 if (overlap_list[i] == change_point[chgidx]->pbios)
577                                         overlap_list[i] = overlap_list[overlap_entries-1];
578                         }
579                         overlap_entries--;
580                 }
581                 /* if there are overlapping entries, decide which "type" to use */
582                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
583                 current_type = 0;
584                 for (i=0; i<overlap_entries; i++)
585                         if (overlap_list[i]->type > current_type)
586                                 current_type = overlap_list[i]->type;
587                 /* continue building up new bios map based on this information */
588                 if (current_type != last_type)  {
589                         if (last_type != 0)      {
590                                 new_bios[new_bios_entry].size =
591                                         change_point[chgidx]->addr - last_addr;
592                                 /* move forward only if the new size was non-zero */
593                                 if (new_bios[new_bios_entry].size != 0)
594                                         if (++new_bios_entry >= E820MAX)
595                                                 break;  /* no more space left for new bios entries */
596                         }
597                         if (current_type != 0)  {
598                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
599                                 new_bios[new_bios_entry].type = current_type;
600                                 last_addr=change_point[chgidx]->addr;
601                         }
602                         last_type = current_type;
603                 }
604         }
605         new_nr = new_bios_entry;   /* retain count for new bios entries */
606
607         /* copy new bios mapping into original location */
608         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
609         *pnr_map = new_nr;
610
611         return 0;
612 }
613
614 /*
615  * Copy the BIOS e820 map into a safe place.
616  *
617  * Sanity-check it while we're at it..
618  *
619  * If we're lucky and live on a modern system, the setup code
620  * will have given us a memory map that we can use to properly
621  * set up memory.  If we aren't, we'll fake a memory map.
622  *
623  * We check to see that the memory map contains at least 2 elements
624  * before we'll use it, because the detection code in setup.S may
625  * not be perfect and most every PC known to man has two memory
626  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
627  * thinkpad 560x, for example, does not cooperate with the memory
628  * detection code.)
629  */
630 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
631 {
632         /* Only one memory region (or negative)? Ignore it */
633         if (nr_map < 2)
634                 return -1;
635
636         do {
637                 unsigned long long start = biosmap->addr;
638                 unsigned long long size = biosmap->size;
639                 unsigned long long end = start + size;
640                 unsigned long type = biosmap->type;
641
642                 /* Overflow in 64 bits? Ignore the memory map. */
643                 if (start > end)
644                         return -1;
645
646                 /*
647                  * Some BIOSes claim RAM in the 640k - 1M region.
648                  * Not right. Fix it up.
649                  */
650                 if (type == E820_RAM) {
651                         if (start < 0x100000ULL && end > 0xA0000ULL) {
652                                 if (start < 0xA0000ULL)
653                                         add_memory_region(start, 0xA0000ULL-start, type);
654                                 if (end <= 0x100000ULL)
655                                         continue;
656                                 start = 0x100000ULL;
657                                 size = end - start;
658                         }
659                 }
660                 add_memory_region(start, size, type);
661         } while (biosmap++,--nr_map);
662         return 0;
663 }
664
665 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
666 struct edd edd;
667 #ifdef CONFIG_EDD_MODULE
668 EXPORT_SYMBOL(edd);
669 #endif
670 /**
671  * copy_edd() - Copy the BIOS EDD information
672  *              from boot_params into a safe place.
673  *
674  */
675 static inline void copy_edd(void)
676 {
677      memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
678      memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
679      edd.mbr_signature_nr = EDD_MBR_SIG_NR;
680      edd.edd_info_nr = EDD_NR;
681 }
682 #else
683 static inline void copy_edd(void)
684 {
685 }
686 #endif
687
688 /*
689  * Do NOT EVER look at the BIOS memory size location.
690  * It does not work on many machines.
691  */
692 #define LOWMEMSIZE()    (0x9f000)
693
694 static void __init parse_cmdline_early (char ** cmdline_p)
695 {
696         char c = ' ', *to = command_line, *from = saved_command_line;
697         int len = 0;
698         int userdef = 0;
699
700         /* Save unparsed command line copy for /proc/cmdline */
701         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
702
703         for (;;) {
704                 if (c != ' ')
705                         goto next_char;
706                 /*
707                  * "mem=nopentium" disables the 4MB page tables.
708                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
709                  * to <mem>, overriding the bios size.
710                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
711                  * <start> to <start>+<mem>, overriding the bios size.
712                  *
713                  * HPA tells me bootloaders need to parse mem=, so no new
714                  * option should be mem=  [also see Documentation/i386/boot.txt]
715                  */
716                 if (!memcmp(from, "mem=", 4)) {
717                         if (to != command_line)
718                                 to--;
719                         if (!memcmp(from+4, "nopentium", 9)) {
720                                 from += 9+4;
721                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
722                                 disable_pse = 1;
723                         } else {
724                                 /* If the user specifies memory size, we
725                                  * limit the BIOS-provided memory map to
726                                  * that size. exactmap can be used to specify
727                                  * the exact map. mem=number can be used to
728                                  * trim the existing memory map.
729                                  */
730                                 unsigned long long mem_size;
731  
732                                 mem_size = memparse(from+4, &from);
733                                 limit_regions(mem_size);
734                                 userdef=1;
735                         }
736                 }
737
738                 else if (!memcmp(from, "memmap=", 7)) {
739                         if (to != command_line)
740                                 to--;
741                         if (!memcmp(from+7, "exactmap", 8)) {
742 #ifdef CONFIG_CRASH_DUMP
743                                 /* If we are doing a crash dump, we
744                                  * still need to know the real mem
745                                  * size before original memory map is
746                                  * reset.
747                                  */
748                                 find_max_pfn();
749                                 saved_max_pfn = max_pfn;
750 #endif
751                                 from += 8+7;
752                                 e820.nr_map = 0;
753                                 userdef = 1;
754                         } else {
755                                 /* If the user specifies memory size, we
756                                  * limit the BIOS-provided memory map to
757                                  * that size. exactmap can be used to specify
758                                  * the exact map. mem=number can be used to
759                                  * trim the existing memory map.
760                                  */
761                                 unsigned long long start_at, mem_size;
762  
763                                 mem_size = memparse(from+7, &from);
764                                 if (*from == '@') {
765                                         start_at = memparse(from+1, &from);
766                                         add_memory_region(start_at, mem_size, E820_RAM);
767                                 } else if (*from == '#') {
768                                         start_at = memparse(from+1, &from);
769                                         add_memory_region(start_at, mem_size, E820_ACPI);
770                                 } else if (*from == '$') {
771                                         start_at = memparse(from+1, &from);
772                                         add_memory_region(start_at, mem_size, E820_RESERVED);
773                                 } else {
774                                         limit_regions(mem_size);
775                                         userdef=1;
776                                 }
777                         }
778                 }
779
780                 else if (!memcmp(from, "noexec=", 7))
781                         noexec_setup(from + 7);
782
783
784 #ifdef  CONFIG_X86_SMP
785                 /*
786                  * If the BIOS enumerates physical processors before logical,
787                  * maxcpus=N at enumeration-time can be used to disable HT.
788                  */
789                 else if (!memcmp(from, "maxcpus=", 8)) {
790                         extern unsigned int maxcpus;
791
792                         maxcpus = simple_strtoul(from + 8, NULL, 0);
793                 }
794 #endif
795
796 #ifdef CONFIG_ACPI_BOOT
797                 /* "acpi=off" disables both ACPI table parsing and interpreter */
798                 else if (!memcmp(from, "acpi=off", 8)) {
799                         disable_acpi();
800                 }
801
802                 /* acpi=force to over-ride black-list */
803                 else if (!memcmp(from, "acpi=force", 10)) {
804                         acpi_force = 1;
805                         acpi_ht = 1;
806                         acpi_disabled = 0;
807                 }
808
809                 /* acpi=strict disables out-of-spec workarounds */
810                 else if (!memcmp(from, "acpi=strict", 11)) {
811                         acpi_strict = 1;
812                 }
813
814                 /* Limit ACPI just to boot-time to enable HT */
815                 else if (!memcmp(from, "acpi=ht", 7)) {
816                         if (!acpi_force)
817                                 disable_acpi();
818                         acpi_ht = 1;
819                 }
820                 
821                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
822                 else if (!memcmp(from, "pci=noacpi", 10)) {
823                         acpi_disable_pci();
824                 }
825                 /* "acpi=noirq" disables ACPI interrupt routing */
826                 else if (!memcmp(from, "acpi=noirq", 10)) {
827                         acpi_noirq_set();
828                 }
829
830                 else if (!memcmp(from, "acpi_sci=edge", 13))
831                         acpi_sci_flags.trigger =  1;
832
833                 else if (!memcmp(from, "acpi_sci=level", 14))
834                         acpi_sci_flags.trigger = 3;
835
836                 else if (!memcmp(from, "acpi_sci=high", 13))
837                         acpi_sci_flags.polarity = 1;
838
839                 else if (!memcmp(from, "acpi_sci=low", 12))
840                         acpi_sci_flags.polarity = 3;
841
842 #ifdef CONFIG_X86_IO_APIC
843                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
844                         acpi_skip_timer_override = 1;
845 #endif
846
847 #ifdef CONFIG_X86_LOCAL_APIC
848                 /* disable IO-APIC */
849                 else if (!memcmp(from, "noapic", 6))
850                         disable_ioapic_setup();
851 #endif /* CONFIG_X86_LOCAL_APIC */
852 #endif /* CONFIG_ACPI_BOOT */
853
854 #ifdef CONFIG_X86_LOCAL_APIC
855                 /* enable local APIC */
856                 else if (!memcmp(from, "lapic", 5))
857                         lapic_enable();
858
859                 /* disable local APIC */
860                 else if (!memcmp(from, "nolapic", 6))
861                         lapic_disable();
862 #endif /* CONFIG_X86_LOCAL_APIC */
863
864 #ifdef CONFIG_KEXEC
865                 /* crashkernel=size@addr specifies the location to reserve for
866                  * a crash kernel.  By reserving this memory we guarantee
867                  * that linux never set's it up as a DMA target.
868                  * Useful for holding code to do something appropriate
869                  * after a kernel panic.
870                  */
871                 else if (!memcmp(from, "crashkernel=", 12)) {
872                         unsigned long size, base;
873                         size = memparse(from+12, &from);
874                         if (*from == '@') {
875                                 base = memparse(from+1, &from);
876                                 /* FIXME: Do I want a sanity check
877                                  * to validate the memory range?
878                                  */
879                                 crashk_res.start = base;
880                                 crashk_res.end   = base + size - 1;
881                         }
882                 }
883 #endif
884
885                 /*
886                  * highmem=size forces highmem to be exactly 'size' bytes.
887                  * This works even on boxes that have no highmem otherwise.
888                  * This also works to reduce highmem size on bigger boxes.
889                  */
890                 else if (!memcmp(from, "highmem=", 8))
891                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
892         
893                 /*
894                  * vmalloc=size forces the vmalloc area to be exactly 'size'
895                  * bytes. This can be used to increase (or decrease) the
896                  * vmalloc area - the default is 128m.
897                  */
898                 else if (!memcmp(from, "vmalloc=", 8))
899                         __VMALLOC_RESERVE = memparse(from+8, &from);
900
901         next_char:
902                 c = *(from++);
903                 if (!c)
904                         break;
905                 if (COMMAND_LINE_SIZE <= ++len)
906                         break;
907                 *(to++) = c;
908         }
909         *to = '\0';
910         *cmdline_p = command_line;
911         if (userdef) {
912                 printk(KERN_INFO "user-defined physical RAM map:\n");
913                 print_memory_map("user");
914         }
915 }
916
917 /*
918  * Callback for efi_memory_walk.
919  */
920 static int __init
921 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
922 {
923         unsigned long *max_pfn = arg, pfn;
924
925         if (start < end) {
926                 pfn = PFN_UP(end -1);
927                 if (pfn > *max_pfn)
928                         *max_pfn = pfn;
929         }
930         return 0;
931 }
932
933
934 /*
935  * Find the highest page frame number we have available
936  */
937 void __init find_max_pfn(void)
938 {
939         int i;
940
941         max_pfn = 0;
942         if (efi_enabled) {
943                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
944                 return;
945         }
946
947         for (i = 0; i < e820.nr_map; i++) {
948                 unsigned long start, end;
949                 /* RAM? */
950                 if (e820.map[i].type != E820_RAM)
951                         continue;
952                 start = PFN_UP(e820.map[i].addr);
953                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
954                 if (start >= end)
955                         continue;
956                 if (end > max_pfn)
957                         max_pfn = end;
958         }
959 }
960
961 /*
962  * Determine low and high memory ranges:
963  */
964 unsigned long __init find_max_low_pfn(void)
965 {
966         unsigned long max_low_pfn;
967
968         max_low_pfn = max_pfn;
969         if (max_low_pfn > MAXMEM_PFN) {
970                 if (highmem_pages == -1)
971                         highmem_pages = max_pfn - MAXMEM_PFN;
972                 if (highmem_pages + MAXMEM_PFN < max_pfn)
973                         max_pfn = MAXMEM_PFN + highmem_pages;
974                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
975                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
976                         highmem_pages = 0;
977                 }
978                 max_low_pfn = MAXMEM_PFN;
979 #ifndef CONFIG_HIGHMEM
980                 /* Maximum memory usable is what is directly addressable */
981                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
982                                         MAXMEM>>20);
983                 if (max_pfn > MAX_NONPAE_PFN)
984                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
985                 else
986                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
987                 max_pfn = MAXMEM_PFN;
988 #else /* !CONFIG_HIGHMEM */
989 #ifndef CONFIG_X86_PAE
990                 if (max_pfn > MAX_NONPAE_PFN) {
991                         max_pfn = MAX_NONPAE_PFN;
992                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
993                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
994                 }
995 #endif /* !CONFIG_X86_PAE */
996 #endif /* !CONFIG_HIGHMEM */
997         } else {
998                 if (highmem_pages == -1)
999                         highmem_pages = 0;
1000 #ifdef CONFIG_HIGHMEM
1001                 if (highmem_pages >= max_pfn) {
1002                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1003                         highmem_pages = 0;
1004                 }
1005                 if (highmem_pages) {
1006                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1007                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1008                                 highmem_pages = 0;
1009                         }
1010                         max_low_pfn -= highmem_pages;
1011                 }
1012 #else
1013                 if (highmem_pages)
1014                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1015 #endif
1016         }
1017         return max_low_pfn;
1018 }
1019
1020 /*
1021  * Free all available memory for boot time allocation.  Used
1022  * as a callback function by efi_memory_walk()
1023  */
1024
1025 static int __init
1026 free_available_memory(unsigned long start, unsigned long end, void *arg)
1027 {
1028         /* check max_low_pfn */
1029         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1030                 return 0;
1031         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1032                 end = (max_low_pfn + 1) << PAGE_SHIFT;
1033         if (start < end)
1034                 free_bootmem(start, end - start);
1035
1036         return 0;
1037 }
1038 /*
1039  * Register fully available low RAM pages with the bootmem allocator.
1040  */
1041 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1042 {
1043         int i;
1044
1045         if (efi_enabled) {
1046                 efi_memmap_walk(free_available_memory, NULL);
1047                 return;
1048         }
1049         for (i = 0; i < e820.nr_map; i++) {
1050                 unsigned long curr_pfn, last_pfn, size;
1051                 /*
1052                  * Reserve usable low memory
1053                  */
1054                 if (e820.map[i].type != E820_RAM)
1055                         continue;
1056                 /*
1057                  * We are rounding up the start address of usable memory:
1058                  */
1059                 curr_pfn = PFN_UP(e820.map[i].addr);
1060                 if (curr_pfn >= max_low_pfn)
1061                         continue;
1062                 /*
1063                  * ... and at the end of the usable range downwards:
1064                  */
1065                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1066
1067                 if (last_pfn > max_low_pfn)
1068                         last_pfn = max_low_pfn;
1069
1070                 /*
1071                  * .. finally, did all the rounding and playing
1072                  * around just make the area go away?
1073                  */
1074                 if (last_pfn <= curr_pfn)
1075                         continue;
1076
1077                 size = last_pfn - curr_pfn;
1078                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1079         }
1080 }
1081
1082 /*
1083  * workaround for Dell systems that neglect to reserve EBDA
1084  */
1085 static void __init reserve_ebda_region(void)
1086 {
1087         unsigned int addr;
1088         addr = get_bios_ebda();
1089         if (addr)
1090                 reserve_bootmem(addr, PAGE_SIZE);       
1091 }
1092
1093 #ifndef CONFIG_NEED_MULTIPLE_NODES
1094 void __init setup_bootmem_allocator(void);
1095 static unsigned long __init setup_memory(void)
1096 {
1097         /*
1098          * partially used pages are not usable - thus
1099          * we are rounding upwards:
1100          */
1101         min_low_pfn = PFN_UP(init_pg_tables_end);
1102
1103         find_max_pfn();
1104
1105         max_low_pfn = find_max_low_pfn();
1106
1107 #ifdef CONFIG_HIGHMEM
1108         highstart_pfn = highend_pfn = max_pfn;
1109         if (max_pfn > max_low_pfn) {
1110                 highstart_pfn = max_low_pfn;
1111         }
1112         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1113                 pages_to_mb(highend_pfn - highstart_pfn));
1114 #endif
1115         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1116                         pages_to_mb(max_low_pfn));
1117
1118         setup_bootmem_allocator();
1119
1120         return max_low_pfn;
1121 }
1122
1123 void __init zone_sizes_init(void)
1124 {
1125         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1126         unsigned int max_dma, low;
1127
1128         max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1129         low = max_low_pfn;
1130
1131         if (low < max_dma)
1132                 zones_size[ZONE_DMA] = low;
1133         else {
1134                 zones_size[ZONE_DMA] = max_dma;
1135                 zones_size[ZONE_NORMAL] = low - max_dma;
1136 #ifdef CONFIG_HIGHMEM
1137                 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1138 #endif
1139         }
1140         free_area_init(zones_size);
1141 }
1142 #else
1143 extern unsigned long __init setup_memory(void);
1144 extern void zone_sizes_init(void);
1145 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1146
1147 void __init setup_bootmem_allocator(void)
1148 {
1149         unsigned long bootmap_size;
1150         /*
1151          * Initialize the boot-time allocator (with low memory only):
1152          */
1153         bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1154
1155         register_bootmem_low_pages(max_low_pfn);
1156
1157         /*
1158          * Reserve the bootmem bitmap itself as well. We do this in two
1159          * steps (first step was init_bootmem()) because this catches
1160          * the (very unlikely) case of us accidentally initializing the
1161          * bootmem allocator with an invalid RAM area.
1162          */
1163         reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1164                          bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1165
1166         /*
1167          * reserve physical page 0 - it's a special BIOS page on many boxes,
1168          * enabling clean reboots, SMP operation, laptop functions.
1169          */
1170         reserve_bootmem(0, PAGE_SIZE);
1171
1172         /* reserve EBDA region, it's a 4K region */
1173         reserve_ebda_region();
1174
1175     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
1176        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1177        unless you have no PS/2 mouse plugged in. */
1178         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1179             boot_cpu_data.x86 == 6)
1180              reserve_bootmem(0xa0000 - 4096, 4096);
1181
1182 #ifdef CONFIG_SMP
1183         /*
1184          * But first pinch a few for the stack/trampoline stuff
1185          * FIXME: Don't need the extra page at 4K, but need to fix
1186          * trampoline before removing it. (see the GDT stuff)
1187          */
1188         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1189 #endif
1190 #ifdef CONFIG_ACPI_SLEEP
1191         /*
1192          * Reserve low memory region for sleep support.
1193          */
1194         acpi_reserve_bootmem();
1195 #endif
1196 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1197         /*
1198          * Find and reserve possible boot-time SMP configuration:
1199          */
1200         find_smp_config();
1201 #endif
1202
1203 #ifdef CONFIG_BLK_DEV_INITRD
1204         if (LOADER_TYPE && INITRD_START) {
1205                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1206                         reserve_bootmem(INITRD_START, INITRD_SIZE);
1207                         initrd_start =
1208                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1209                         initrd_end = initrd_start+INITRD_SIZE;
1210                 }
1211                 else {
1212                         printk(KERN_ERR "initrd extends beyond end of memory "
1213                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1214                             INITRD_START + INITRD_SIZE,
1215                             max_low_pfn << PAGE_SHIFT);
1216                         initrd_start = 0;
1217                 }
1218         }
1219 #endif
1220 #ifdef CONFIG_KEXEC
1221         if (crashk_res.start != crashk_res.end)
1222                 reserve_bootmem(crashk_res.start,
1223                         crashk_res.end - crashk_res.start + 1);
1224 #endif
1225 }
1226
1227 /*
1228  * The node 0 pgdat is initialized before all of these because
1229  * it's needed for bootmem.  node>0 pgdats have their virtual
1230  * space allocated before the pagetables are in place to access
1231  * them, so they can't be cleared then.
1232  *
1233  * This should all compile down to nothing when NUMA is off.
1234  */
1235 void __init remapped_pgdat_init(void)
1236 {
1237         int nid;
1238
1239         for_each_online_node(nid) {
1240                 if (nid != 0)
1241                         memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1242         }
1243 }
1244
1245 /*
1246  * Request address space for all standard RAM and ROM resources
1247  * and also for regions reported as reserved by the e820.
1248  */
1249 static void __init
1250 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1251 {
1252         int i;
1253
1254         probe_roms();
1255         for (i = 0; i < e820.nr_map; i++) {
1256                 struct resource *res;
1257                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1258                         continue;
1259                 res = alloc_bootmem_low(sizeof(struct resource));
1260                 switch (e820.map[i].type) {
1261                 case E820_RAM:  res->name = "System RAM"; break;
1262                 case E820_ACPI: res->name = "ACPI Tables"; break;
1263                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
1264                 default:        res->name = "reserved";
1265                 }
1266                 res->start = e820.map[i].addr;
1267                 res->end = res->start + e820.map[i].size - 1;
1268                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1269                 request_resource(&iomem_resource, res);
1270                 if (e820.map[i].type == E820_RAM) {
1271                         /*
1272                          *  We don't know which RAM region contains kernel data,
1273                          *  so we try it repeatedly and let the resource manager
1274                          *  test it.
1275                          */
1276                         request_resource(res, code_resource);
1277                         request_resource(res, data_resource);
1278 #ifdef CONFIG_KEXEC
1279                         request_resource(res, &crashk_res);
1280 #endif
1281                 }
1282         }
1283 }
1284
1285 /*
1286  * Request address space for all standard resources
1287  */
1288 static void __init register_memory(void)
1289 {
1290         unsigned long gapstart, gapsize;
1291         unsigned long long last;
1292         int           i;
1293
1294         if (efi_enabled)
1295                 efi_initialize_iomem_resources(&code_resource, &data_resource);
1296         else
1297                 legacy_init_iomem_resources(&code_resource, &data_resource);
1298
1299         /* EFI systems may still have VGA */
1300         request_resource(&iomem_resource, &video_ram_resource);
1301
1302         /* request I/O space for devices used on all i[345]86 PCs */
1303         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1304                 request_resource(&ioport_resource, &standard_io_resources[i]);
1305
1306         /*
1307          * Search for the bigest gap in the low 32 bits of the e820
1308          * memory space.
1309          */
1310         last = 0x100000000ull;
1311         gapstart = 0x10000000;
1312         gapsize = 0x400000;
1313         i = e820.nr_map;
1314         while (--i >= 0) {
1315                 unsigned long long start = e820.map[i].addr;
1316                 unsigned long long end = start + e820.map[i].size;
1317
1318                 /*
1319                  * Since "last" is at most 4GB, we know we'll
1320                  * fit in 32 bits if this condition is true
1321                  */
1322                 if (last > end) {
1323                         unsigned long gap = last - end;
1324
1325                         if (gap > gapsize) {
1326                                 gapsize = gap;
1327                                 gapstart = end;
1328                         }
1329                 }
1330                 if (start < last)
1331                         last = start;
1332         }
1333
1334         /*
1335          * Start allocating dynamic PCI memory a bit into the gap,
1336          * aligned up to the nearest megabyte.
1337          *
1338          * Question: should we try to pad it up a bit (do something
1339          * like " + (gapsize >> 3)" in there too?). We now have the
1340          * technology.
1341          */
1342         pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1343
1344         printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1345                 pci_mem_start, gapstart, gapsize);
1346 }
1347
1348 /* Use inline assembly to define this because the nops are defined 
1349    as inline assembly strings in the include files and we cannot 
1350    get them easily into strings. */
1351 asm("\t.data\nintelnops: " 
1352     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1353     GENERIC_NOP7 GENERIC_NOP8); 
1354 asm("\t.data\nk8nops: " 
1355     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1356     K8_NOP7 K8_NOP8); 
1357 asm("\t.data\nk7nops: " 
1358     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1359     K7_NOP7 K7_NOP8); 
1360     
1361 extern unsigned char intelnops[], k8nops[], k7nops[];
1362 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
1363      NULL,
1364      intelnops,
1365      intelnops + 1,
1366      intelnops + 1 + 2,
1367      intelnops + 1 + 2 + 3,
1368      intelnops + 1 + 2 + 3 + 4,
1369      intelnops + 1 + 2 + 3 + 4 + 5,
1370      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1371      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1372 }; 
1373 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
1374      NULL,
1375      k8nops,
1376      k8nops + 1,
1377      k8nops + 1 + 2,
1378      k8nops + 1 + 2 + 3,
1379      k8nops + 1 + 2 + 3 + 4,
1380      k8nops + 1 + 2 + 3 + 4 + 5,
1381      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1382      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1383 }; 
1384 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
1385      NULL,
1386      k7nops,
1387      k7nops + 1,
1388      k7nops + 1 + 2,
1389      k7nops + 1 + 2 + 3,
1390      k7nops + 1 + 2 + 3 + 4,
1391      k7nops + 1 + 2 + 3 + 4 + 5,
1392      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1393      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1394 }; 
1395 static struct nop { 
1396      int cpuid; 
1397      unsigned char **noptable; 
1398 } noptypes[] = { 
1399      { X86_FEATURE_K8, k8_nops }, 
1400      { X86_FEATURE_K7, k7_nops }, 
1401      { -1, NULL }
1402 }; 
1403
1404 /* Replace instructions with better alternatives for this CPU type.
1405
1406    This runs before SMP is initialized to avoid SMP problems with
1407    self modifying code. This implies that assymetric systems where
1408    APs have less capabilities than the boot processor are not handled. 
1409    In this case boot with "noreplacement". */ 
1410 void apply_alternatives(void *start, void *end) 
1411
1412         struct alt_instr *a; 
1413         int diff, i, k;
1414         unsigned char **noptable = intel_nops; 
1415         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1416                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1417                         noptable = noptypes[i].noptable;
1418                         break;
1419                 }
1420         } 
1421         for (a = start; (void *)a < end; a++) { 
1422                 if (!boot_cpu_has(a->cpuid))
1423                         continue;
1424                 BUG_ON(a->replacementlen > a->instrlen); 
1425                 memcpy(a->instr, a->replacement, a->replacementlen); 
1426                 diff = a->instrlen - a->replacementlen; 
1427                 /* Pad the rest with nops */
1428                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1429                         k = diff;
1430                         if (k > ASM_NOP_MAX)
1431                                 k = ASM_NOP_MAX;
1432                         memcpy(a->instr + i, noptable[k], k); 
1433                 } 
1434         }
1435
1436
1437 static int no_replacement __initdata = 0; 
1438  
1439 void __init alternative_instructions(void)
1440 {
1441         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1442         if (no_replacement) 
1443                 return;
1444         apply_alternatives(__alt_instructions, __alt_instructions_end);
1445 }
1446
1447 static int __init noreplacement_setup(char *s)
1448
1449      no_replacement = 1; 
1450      return 0; 
1451
1452
1453 __setup("noreplacement", noreplacement_setup); 
1454
1455 static char * __init machine_specific_memory_setup(void);
1456
1457 #ifdef CONFIG_MCA
1458 static void set_mca_bus(int x)
1459 {
1460         MCA_bus = x;
1461 }
1462 #else
1463 static void set_mca_bus(int x) { }
1464 #endif
1465
1466 /*
1467  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1468  * passed the efi memmap, systab, etc., so we should use these data structures
1469  * for initialization.  Note, the efi init code path is determined by the
1470  * global efi_enabled. This allows the same kernel image to be used on existing
1471  * systems (with a traditional BIOS) as well as on EFI systems.
1472  */
1473 void __init setup_arch(char **cmdline_p)
1474 {
1475         unsigned long max_low_pfn;
1476
1477         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1478         pre_setup_arch_hook();
1479         early_cpu_init();
1480
1481         /*
1482          * FIXME: This isn't an official loader_type right
1483          * now but does currently work with elilo.
1484          * If we were configured as an EFI kernel, check to make
1485          * sure that we were loaded correctly from elilo and that
1486          * the system table is valid.  If not, then initialize normally.
1487          */
1488 #ifdef CONFIG_EFI
1489         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1490                 efi_enabled = 1;
1491 #endif
1492
1493         ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1494         drive_info = DRIVE_INFO;
1495         screen_info = SCREEN_INFO;
1496         edid_info = EDID_INFO;
1497         apm_info.bios = APM_BIOS_INFO;
1498         ist_info = IST_INFO;
1499         saved_videomode = VIDEO_MODE;
1500         if( SYS_DESC_TABLE.length != 0 ) {
1501                 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1502                 machine_id = SYS_DESC_TABLE.table[0];
1503                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1504                 BIOS_revision = SYS_DESC_TABLE.table[2];
1505         }
1506         bootloader_type = LOADER_TYPE;
1507
1508 #ifdef CONFIG_BLK_DEV_RAM
1509         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1510         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1511         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1512 #endif
1513         ARCH_SETUP
1514         if (efi_enabled)
1515                 efi_init();
1516         else {
1517                 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1518                 print_memory_map(machine_specific_memory_setup());
1519         }
1520
1521         copy_edd();
1522
1523         if (!MOUNT_ROOT_RDONLY)
1524                 root_mountflags &= ~MS_RDONLY;
1525         init_mm.start_code = (unsigned long) _text;
1526         init_mm.end_code = (unsigned long) _etext;
1527         init_mm.end_data = (unsigned long) _edata;
1528         init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1529
1530         code_resource.start = virt_to_phys(_text);
1531         code_resource.end = virt_to_phys(_etext)-1;
1532         data_resource.start = virt_to_phys(_etext);
1533         data_resource.end = virt_to_phys(_edata)-1;
1534
1535         parse_cmdline_early(cmdline_p);
1536
1537         max_low_pfn = setup_memory();
1538
1539         /*
1540          * NOTE: before this point _nobody_ is allowed to allocate
1541          * any memory using the bootmem allocator.  Although the
1542          * alloctor is now initialised only the first 8Mb of the kernel
1543          * virtual address space has been mapped.  All allocations before
1544          * paging_init() has completed must use the alloc_bootmem_low_pages()
1545          * variant (which allocates DMA'able memory) and care must be taken
1546          * not to exceed the 8Mb limit.
1547          */
1548
1549 #ifdef CONFIG_SMP
1550         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1551 #endif
1552         paging_init();
1553         remapped_pgdat_init();
1554         sparse_init();
1555         zone_sizes_init();
1556
1557         /*
1558          * NOTE: at this point the bootmem allocator is fully available.
1559          */
1560
1561 #ifdef CONFIG_EARLY_PRINTK
1562         {
1563                 char *s = strstr(*cmdline_p, "earlyprintk=");
1564                 if (s) {
1565                         extern void setup_early_printk(char *);
1566
1567                         setup_early_printk(s);
1568                         printk("early console enabled\n");
1569                 }
1570         }
1571 #endif
1572
1573
1574         dmi_scan_machine();
1575
1576 #ifdef CONFIG_X86_GENERICARCH
1577         generic_apic_probe(*cmdline_p);
1578 #endif  
1579         if (efi_enabled)
1580                 efi_map_memmap();
1581
1582 #ifdef CONFIG_ACPI_BOOT
1583         /*
1584          * Parse the ACPI tables for possible boot-time SMP configuration.
1585          */
1586         acpi_boot_table_init();
1587         acpi_boot_init();
1588 #endif
1589
1590 #ifdef CONFIG_X86_LOCAL_APIC
1591         if (smp_found_config)
1592                 get_smp_config();
1593 #endif
1594
1595         register_memory();
1596
1597 #ifdef CONFIG_VT
1598 #if defined(CONFIG_VGA_CONSOLE)
1599         if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1600                 conswitchp = &vga_con;
1601 #elif defined(CONFIG_DUMMY_CONSOLE)
1602         conswitchp = &dummy_con;
1603 #endif
1604 #endif
1605 }
1606
1607 #include "setup_arch_post.h"
1608 /*
1609  * Local Variables:
1610  * mode:c
1611  * c-file-style:"k&r"
1612  * c-basic-offset:8
1613  * End:
1614  */