2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/config.h>
27 #include <linux/sched.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
48 #include <video/edid.h>
52 #include <asm/mpspec.h>
53 #include <asm/setup.h>
54 #include <asm/arch_hooks.h>
55 #include <asm/sections.h>
56 #include <asm/io_apic.h>
59 #include "setup_arch_pre.h"
60 #include <bios_ebda.h>
62 /* Forward Declaration. */
63 void __init find_max_pfn(void);
65 /* This value is set up by the early boot code to point to the value
66 immediately after the boot time page tables. It contains a *physical*
67 address, and must not be in the .bss segment! */
68 unsigned long init_pg_tables_end __initdata = ~0UL;
70 int disable_pse __devinitdata = 0;
78 EXPORT_SYMBOL(efi_enabled);
81 /* cpu data as detected by the assembly code in head.S */
82 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
83 /* common cpu data for all cpus */
84 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
85 EXPORT_SYMBOL(boot_cpu_data);
87 unsigned long mmu_cr4_features;
89 #ifdef CONFIG_ACPI_INTERPRETER
90 int acpi_disabled = 0;
92 int acpi_disabled = 1;
94 EXPORT_SYMBOL(acpi_disabled);
96 #ifdef CONFIG_ACPI_BOOT
97 int __initdata acpi_force = 0;
98 extern acpi_interrupt_flags acpi_sci_flags;
101 /* for MCA, but anyone else can use it if they want */
102 unsigned int machine_id;
104 EXPORT_SYMBOL(machine_id);
106 unsigned int machine_submodel_id;
107 unsigned int BIOS_revision;
108 unsigned int mca_pentium_flag;
110 /* For PCI or other memory-mapped resources */
111 unsigned long pci_mem_start = 0x10000000;
113 EXPORT_SYMBOL(pci_mem_start);
116 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
119 /* user-defined highmem size */
120 static unsigned int highmem_pages = -1;
125 struct drive_info_struct { char dummy[32]; } drive_info;
126 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
127 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
128 EXPORT_SYMBOL(drive_info);
130 struct screen_info screen_info;
132 EXPORT_SYMBOL(screen_info);
134 struct apm_info apm_info;
135 EXPORT_SYMBOL(apm_info);
136 struct sys_desc_table_struct {
137 unsigned short length;
138 unsigned char table[0];
140 struct edid_info edid_info;
141 struct ist_info ist_info;
142 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
143 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
144 EXPORT_SYMBOL(ist_info);
148 extern void early_cpu_init(void);
149 extern void dmi_scan_machine(void);
150 extern void generic_apic_probe(char *);
151 extern int root_mountflags;
153 unsigned long saved_videomode;
155 #define RAMDISK_IMAGE_START_MASK 0x07FF
156 #define RAMDISK_PROMPT_FLAG 0x8000
157 #define RAMDISK_LOAD_FLAG 0x4000
159 static char command_line[COMMAND_LINE_SIZE];
161 unsigned char __initdata boot_params[PARAM_SIZE];
163 static struct resource data_resource = {
164 .name = "Kernel data",
167 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
170 static struct resource code_resource = {
171 .name = "Kernel code",
174 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
177 static struct resource system_rom_resource = {
178 .name = "System ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
184 static struct resource extension_rom_resource = {
185 .name = "Extension ROM",
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
191 static struct resource adapter_rom_resources[] = { {
192 .name = "Adapter ROM",
195 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 .name = "Adapter ROM",
200 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 .name = "Adapter ROM",
205 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207 .name = "Adapter ROM",
210 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 .name = "Adapter ROM",
215 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217 .name = "Adapter ROM",
220 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
223 #define ADAPTER_ROM_RESOURCES \
224 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
226 static struct resource video_rom_resource = {
230 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
233 static struct resource video_ram_resource = {
234 .name = "Video RAM area",
237 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
240 static struct resource standard_io_resources[] = { {
244 .flags = IORESOURCE_BUSY | IORESOURCE_IO
249 .flags = IORESOURCE_BUSY | IORESOURCE_IO
254 .flags = IORESOURCE_BUSY | IORESOURCE_IO
259 .flags = IORESOURCE_BUSY | IORESOURCE_IO
264 .flags = IORESOURCE_BUSY | IORESOURCE_IO
266 .name = "dma page reg",
269 .flags = IORESOURCE_BUSY | IORESOURCE_IO
274 .flags = IORESOURCE_BUSY | IORESOURCE_IO
279 .flags = IORESOURCE_BUSY | IORESOURCE_IO
284 .flags = IORESOURCE_BUSY | IORESOURCE_IO
287 #define STANDARD_IO_RESOURCES \
288 (sizeof standard_io_resources / sizeof standard_io_resources[0])
290 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
292 static int __init romchecksum(unsigned char *rom, unsigned long length)
294 unsigned char *p, sum = 0;
296 for (p = rom; p < rom + length; p++)
301 static void __init probe_roms(void)
303 unsigned long start, length, upper;
308 upper = adapter_rom_resources[0].start;
309 for (start = video_rom_resource.start; start < upper; start += 2048) {
310 rom = isa_bus_to_virt(start);
311 if (!romsignature(rom))
314 video_rom_resource.start = start;
316 /* 0 < length <= 0x7f * 512, historically */
317 length = rom[2] * 512;
319 /* if checksum okay, trust length byte */
320 if (length && romchecksum(rom, length))
321 video_rom_resource.end = start + length - 1;
323 request_resource(&iomem_resource, &video_rom_resource);
327 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
332 request_resource(&iomem_resource, &system_rom_resource);
333 upper = system_rom_resource.start;
335 /* check for extension rom (ignore length byte!) */
336 rom = isa_bus_to_virt(extension_rom_resource.start);
337 if (romsignature(rom)) {
338 length = extension_rom_resource.end - extension_rom_resource.start + 1;
339 if (romchecksum(rom, length)) {
340 request_resource(&iomem_resource, &extension_rom_resource);
341 upper = extension_rom_resource.start;
345 /* check for adapter roms on 2k boundaries */
346 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
347 rom = isa_bus_to_virt(start);
348 if (!romsignature(rom))
351 /* 0 < length <= 0x7f * 512, historically */
352 length = rom[2] * 512;
354 /* but accept any length that fits if checksum okay */
355 if (!length || start + length > upper || !romchecksum(rom, length))
358 adapter_rom_resources[i].start = start;
359 adapter_rom_resources[i].end = start + length - 1;
360 request_resource(&iomem_resource, &adapter_rom_resources[i]);
362 start = adapter_rom_resources[i++].end & ~2047UL;
366 static void __init limit_regions(unsigned long long size)
368 unsigned long long current_addr = 0;
372 for (i = 0; i < memmap.nr_map; i++) {
373 current_addr = memmap.map[i].phys_addr +
374 (memmap.map[i].num_pages << 12);
375 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
376 if (current_addr >= size) {
377 memmap.map[i].num_pages -=
378 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
379 memmap.nr_map = i + 1;
385 for (i = 0; i < e820.nr_map; i++) {
386 if (e820.map[i].type == E820_RAM) {
387 current_addr = e820.map[i].addr + e820.map[i].size;
388 if (current_addr >= size) {
389 e820.map[i].size -= current_addr-size;
397 static void __init add_memory_region(unsigned long long start,
398 unsigned long long size, int type)
406 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
410 e820.map[x].addr = start;
411 e820.map[x].size = size;
412 e820.map[x].type = type;
415 } /* add_memory_region */
419 static void __init print_memory_map(char *who)
423 for (i = 0; i < e820.nr_map; i++) {
424 printk(" %s: %016Lx - %016Lx ", who,
426 e820.map[i].addr + e820.map[i].size);
427 switch (e820.map[i].type) {
428 case E820_RAM: printk("(usable)\n");
431 printk("(reserved)\n");
434 printk("(ACPI data)\n");
437 printk("(ACPI NVS)\n");
439 default: printk("type %lu\n", e820.map[i].type);
446 * Sanitize the BIOS e820 map.
448 * Some e820 responses include overlapping entries. The following
449 * replaces the original e820 map with a new one, removing overlaps.
452 struct change_member {
453 struct e820entry *pbios; /* pointer to original bios entry */
454 unsigned long long addr; /* address for this change point */
456 static struct change_member change_point_list[2*E820MAX] __initdata;
457 static struct change_member *change_point[2*E820MAX] __initdata;
458 static struct e820entry *overlap_list[E820MAX] __initdata;
459 static struct e820entry new_bios[E820MAX] __initdata;
461 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
463 struct change_member *change_tmp;
464 unsigned long current_type, last_type;
465 unsigned long long last_addr;
466 int chgidx, still_changing;
469 int old_nr, new_nr, chg_nr;
473 Visually we're performing the following (1,2,3,4 = memory types)...
475 Sample memory map (w/overlaps):
476 ____22__________________
477 ______________________4_
478 ____1111________________
479 _44_____________________
480 11111111________________
481 ____________________33__
482 ___________44___________
483 __________33333_________
484 ______________22________
485 ___________________2222_
486 _________111111111______
487 _____________________11_
488 _________________4______
490 Sanitized equivalent (no overlap):
491 1_______________________
492 _44_____________________
493 ___1____________________
494 ____22__________________
495 ______11________________
496 _________1______________
497 __________3_____________
498 ___________44___________
499 _____________33_________
500 _______________2________
501 ________________1_______
502 _________________4______
503 ___________________2____
504 ____________________33__
505 ______________________4_
508 /* if there's only one memory region, don't bother */
514 /* bail out if we find any unreasonable addresses in bios map */
515 for (i=0; i<old_nr; i++)
516 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
519 /* create pointers for initial change-point information (for sorting) */
520 for (i=0; i < 2*old_nr; i++)
521 change_point[i] = &change_point_list[i];
523 /* record all known change-points (starting and ending addresses),
524 omitting those that are for empty memory regions */
526 for (i=0; i < old_nr; i++) {
527 if (biosmap[i].size != 0) {
528 change_point[chgidx]->addr = biosmap[i].addr;
529 change_point[chgidx++]->pbios = &biosmap[i];
530 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
531 change_point[chgidx++]->pbios = &biosmap[i];
534 chg_nr = chgidx; /* true number of change-points */
536 /* sort change-point list by memory addresses (low -> high) */
538 while (still_changing) {
540 for (i=1; i < chg_nr; i++) {
541 /* if <current_addr> > <last_addr>, swap */
542 /* or, if current=<start_addr> & last=<end_addr>, swap */
543 if ((change_point[i]->addr < change_point[i-1]->addr) ||
544 ((change_point[i]->addr == change_point[i-1]->addr) &&
545 (change_point[i]->addr == change_point[i]->pbios->addr) &&
546 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
549 change_tmp = change_point[i];
550 change_point[i] = change_point[i-1];
551 change_point[i-1] = change_tmp;
557 /* create a new bios memory map, removing overlaps */
558 overlap_entries=0; /* number of entries in the overlap table */
559 new_bios_entry=0; /* index for creating new bios map entries */
560 last_type = 0; /* start with undefined memory type */
561 last_addr = 0; /* start with 0 as last starting address */
562 /* loop through change-points, determining affect on the new bios map */
563 for (chgidx=0; chgidx < chg_nr; chgidx++)
565 /* keep track of all overlapping bios entries */
566 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
568 /* add map entry to overlap list (> 1 entry implies an overlap) */
569 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
573 /* remove entry from list (order independent, so swap with last) */
574 for (i=0; i<overlap_entries; i++)
576 if (overlap_list[i] == change_point[chgidx]->pbios)
577 overlap_list[i] = overlap_list[overlap_entries-1];
581 /* if there are overlapping entries, decide which "type" to use */
582 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
584 for (i=0; i<overlap_entries; i++)
585 if (overlap_list[i]->type > current_type)
586 current_type = overlap_list[i]->type;
587 /* continue building up new bios map based on this information */
588 if (current_type != last_type) {
589 if (last_type != 0) {
590 new_bios[new_bios_entry].size =
591 change_point[chgidx]->addr - last_addr;
592 /* move forward only if the new size was non-zero */
593 if (new_bios[new_bios_entry].size != 0)
594 if (++new_bios_entry >= E820MAX)
595 break; /* no more space left for new bios entries */
597 if (current_type != 0) {
598 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
599 new_bios[new_bios_entry].type = current_type;
600 last_addr=change_point[chgidx]->addr;
602 last_type = current_type;
605 new_nr = new_bios_entry; /* retain count for new bios entries */
607 /* copy new bios mapping into original location */
608 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
615 * Copy the BIOS e820 map into a safe place.
617 * Sanity-check it while we're at it..
619 * If we're lucky and live on a modern system, the setup code
620 * will have given us a memory map that we can use to properly
621 * set up memory. If we aren't, we'll fake a memory map.
623 * We check to see that the memory map contains at least 2 elements
624 * before we'll use it, because the detection code in setup.S may
625 * not be perfect and most every PC known to man has two memory
626 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
627 * thinkpad 560x, for example, does not cooperate with the memory
630 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
632 /* Only one memory region (or negative)? Ignore it */
637 unsigned long long start = biosmap->addr;
638 unsigned long long size = biosmap->size;
639 unsigned long long end = start + size;
640 unsigned long type = biosmap->type;
642 /* Overflow in 64 bits? Ignore the memory map. */
647 * Some BIOSes claim RAM in the 640k - 1M region.
648 * Not right. Fix it up.
650 if (type == E820_RAM) {
651 if (start < 0x100000ULL && end > 0xA0000ULL) {
652 if (start < 0xA0000ULL)
653 add_memory_region(start, 0xA0000ULL-start, type);
654 if (end <= 0x100000ULL)
660 add_memory_region(start, size, type);
661 } while (biosmap++,--nr_map);
665 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
667 #ifdef CONFIG_EDD_MODULE
671 * copy_edd() - Copy the BIOS EDD information
672 * from boot_params into a safe place.
675 static inline void copy_edd(void)
677 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
678 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
679 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
680 edd.edd_info_nr = EDD_NR;
683 static inline void copy_edd(void)
689 * Do NOT EVER look at the BIOS memory size location.
690 * It does not work on many machines.
692 #define LOWMEMSIZE() (0x9f000)
694 static void __init parse_cmdline_early (char ** cmdline_p)
696 char c = ' ', *to = command_line, *from = saved_command_line;
700 /* Save unparsed command line copy for /proc/cmdline */
701 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
707 * "mem=nopentium" disables the 4MB page tables.
708 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
709 * to <mem>, overriding the bios size.
710 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
711 * <start> to <start>+<mem>, overriding the bios size.
713 * HPA tells me bootloaders need to parse mem=, so no new
714 * option should be mem= [also see Documentation/i386/boot.txt]
716 if (!memcmp(from, "mem=", 4)) {
717 if (to != command_line)
719 if (!memcmp(from+4, "nopentium", 9)) {
721 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
724 /* If the user specifies memory size, we
725 * limit the BIOS-provided memory map to
726 * that size. exactmap can be used to specify
727 * the exact map. mem=number can be used to
728 * trim the existing memory map.
730 unsigned long long mem_size;
732 mem_size = memparse(from+4, &from);
733 limit_regions(mem_size);
738 else if (!memcmp(from, "memmap=", 7)) {
739 if (to != command_line)
741 if (!memcmp(from+7, "exactmap", 8)) {
742 #ifdef CONFIG_CRASH_DUMP
743 /* If we are doing a crash dump, we
744 * still need to know the real mem
745 * size before original memory map is
749 saved_max_pfn = max_pfn;
755 /* If the user specifies memory size, we
756 * limit the BIOS-provided memory map to
757 * that size. exactmap can be used to specify
758 * the exact map. mem=number can be used to
759 * trim the existing memory map.
761 unsigned long long start_at, mem_size;
763 mem_size = memparse(from+7, &from);
765 start_at = memparse(from+1, &from);
766 add_memory_region(start_at, mem_size, E820_RAM);
767 } else if (*from == '#') {
768 start_at = memparse(from+1, &from);
769 add_memory_region(start_at, mem_size, E820_ACPI);
770 } else if (*from == '$') {
771 start_at = memparse(from+1, &from);
772 add_memory_region(start_at, mem_size, E820_RESERVED);
774 limit_regions(mem_size);
780 else if (!memcmp(from, "noexec=", 7))
781 noexec_setup(from + 7);
784 #ifdef CONFIG_X86_SMP
786 * If the BIOS enumerates physical processors before logical,
787 * maxcpus=N at enumeration-time can be used to disable HT.
789 else if (!memcmp(from, "maxcpus=", 8)) {
790 extern unsigned int maxcpus;
792 maxcpus = simple_strtoul(from + 8, NULL, 0);
796 #ifdef CONFIG_ACPI_BOOT
797 /* "acpi=off" disables both ACPI table parsing and interpreter */
798 else if (!memcmp(from, "acpi=off", 8)) {
802 /* acpi=force to over-ride black-list */
803 else if (!memcmp(from, "acpi=force", 10)) {
809 /* acpi=strict disables out-of-spec workarounds */
810 else if (!memcmp(from, "acpi=strict", 11)) {
814 /* Limit ACPI just to boot-time to enable HT */
815 else if (!memcmp(from, "acpi=ht", 7)) {
821 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
822 else if (!memcmp(from, "pci=noacpi", 10)) {
825 /* "acpi=noirq" disables ACPI interrupt routing */
826 else if (!memcmp(from, "acpi=noirq", 10)) {
830 else if (!memcmp(from, "acpi_sci=edge", 13))
831 acpi_sci_flags.trigger = 1;
833 else if (!memcmp(from, "acpi_sci=level", 14))
834 acpi_sci_flags.trigger = 3;
836 else if (!memcmp(from, "acpi_sci=high", 13))
837 acpi_sci_flags.polarity = 1;
839 else if (!memcmp(from, "acpi_sci=low", 12))
840 acpi_sci_flags.polarity = 3;
842 #ifdef CONFIG_X86_IO_APIC
843 else if (!memcmp(from, "acpi_skip_timer_override", 24))
844 acpi_skip_timer_override = 1;
847 #ifdef CONFIG_X86_LOCAL_APIC
848 /* disable IO-APIC */
849 else if (!memcmp(from, "noapic", 6))
850 disable_ioapic_setup();
851 #endif /* CONFIG_X86_LOCAL_APIC */
852 #endif /* CONFIG_ACPI_BOOT */
854 #ifdef CONFIG_X86_LOCAL_APIC
855 /* enable local APIC */
856 else if (!memcmp(from, "lapic", 5))
859 /* disable local APIC */
860 else if (!memcmp(from, "nolapic", 6))
862 #endif /* CONFIG_X86_LOCAL_APIC */
865 /* crashkernel=size@addr specifies the location to reserve for
866 * a crash kernel. By reserving this memory we guarantee
867 * that linux never set's it up as a DMA target.
868 * Useful for holding code to do something appropriate
869 * after a kernel panic.
871 else if (!memcmp(from, "crashkernel=", 12)) {
872 unsigned long size, base;
873 size = memparse(from+12, &from);
875 base = memparse(from+1, &from);
876 /* FIXME: Do I want a sanity check
877 * to validate the memory range?
879 crashk_res.start = base;
880 crashk_res.end = base + size - 1;
886 * highmem=size forces highmem to be exactly 'size' bytes.
887 * This works even on boxes that have no highmem otherwise.
888 * This also works to reduce highmem size on bigger boxes.
890 else if (!memcmp(from, "highmem=", 8))
891 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
894 * vmalloc=size forces the vmalloc area to be exactly 'size'
895 * bytes. This can be used to increase (or decrease) the
896 * vmalloc area - the default is 128m.
898 else if (!memcmp(from, "vmalloc=", 8))
899 __VMALLOC_RESERVE = memparse(from+8, &from);
905 if (COMMAND_LINE_SIZE <= ++len)
910 *cmdline_p = command_line;
912 printk(KERN_INFO "user-defined physical RAM map:\n");
913 print_memory_map("user");
918 * Callback for efi_memory_walk.
921 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
923 unsigned long *max_pfn = arg, pfn;
926 pfn = PFN_UP(end -1);
935 * Find the highest page frame number we have available
937 void __init find_max_pfn(void)
943 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
947 for (i = 0; i < e820.nr_map; i++) {
948 unsigned long start, end;
950 if (e820.map[i].type != E820_RAM)
952 start = PFN_UP(e820.map[i].addr);
953 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
962 * Determine low and high memory ranges:
964 unsigned long __init find_max_low_pfn(void)
966 unsigned long max_low_pfn;
968 max_low_pfn = max_pfn;
969 if (max_low_pfn > MAXMEM_PFN) {
970 if (highmem_pages == -1)
971 highmem_pages = max_pfn - MAXMEM_PFN;
972 if (highmem_pages + MAXMEM_PFN < max_pfn)
973 max_pfn = MAXMEM_PFN + highmem_pages;
974 if (highmem_pages + MAXMEM_PFN > max_pfn) {
975 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
978 max_low_pfn = MAXMEM_PFN;
979 #ifndef CONFIG_HIGHMEM
980 /* Maximum memory usable is what is directly addressable */
981 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
983 if (max_pfn > MAX_NONPAE_PFN)
984 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
986 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
987 max_pfn = MAXMEM_PFN;
988 #else /* !CONFIG_HIGHMEM */
989 #ifndef CONFIG_X86_PAE
990 if (max_pfn > MAX_NONPAE_PFN) {
991 max_pfn = MAX_NONPAE_PFN;
992 printk(KERN_WARNING "Warning only 4GB will be used.\n");
993 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
995 #endif /* !CONFIG_X86_PAE */
996 #endif /* !CONFIG_HIGHMEM */
998 if (highmem_pages == -1)
1000 #ifdef CONFIG_HIGHMEM
1001 if (highmem_pages >= max_pfn) {
1002 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1005 if (highmem_pages) {
1006 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1007 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1010 max_low_pfn -= highmem_pages;
1014 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1021 * Free all available memory for boot time allocation. Used
1022 * as a callback function by efi_memory_walk()
1026 free_available_memory(unsigned long start, unsigned long end, void *arg)
1028 /* check max_low_pfn */
1029 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1031 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1032 end = (max_low_pfn + 1) << PAGE_SHIFT;
1034 free_bootmem(start, end - start);
1039 * Register fully available low RAM pages with the bootmem allocator.
1041 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1046 efi_memmap_walk(free_available_memory, NULL);
1049 for (i = 0; i < e820.nr_map; i++) {
1050 unsigned long curr_pfn, last_pfn, size;
1052 * Reserve usable low memory
1054 if (e820.map[i].type != E820_RAM)
1057 * We are rounding up the start address of usable memory:
1059 curr_pfn = PFN_UP(e820.map[i].addr);
1060 if (curr_pfn >= max_low_pfn)
1063 * ... and at the end of the usable range downwards:
1065 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1067 if (last_pfn > max_low_pfn)
1068 last_pfn = max_low_pfn;
1071 * .. finally, did all the rounding and playing
1072 * around just make the area go away?
1074 if (last_pfn <= curr_pfn)
1077 size = last_pfn - curr_pfn;
1078 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1083 * workaround for Dell systems that neglect to reserve EBDA
1085 static void __init reserve_ebda_region(void)
1088 addr = get_bios_ebda();
1090 reserve_bootmem(addr, PAGE_SIZE);
1093 #ifndef CONFIG_NEED_MULTIPLE_NODES
1094 void __init setup_bootmem_allocator(void);
1095 static unsigned long __init setup_memory(void)
1098 * partially used pages are not usable - thus
1099 * we are rounding upwards:
1101 min_low_pfn = PFN_UP(init_pg_tables_end);
1105 max_low_pfn = find_max_low_pfn();
1107 #ifdef CONFIG_HIGHMEM
1108 highstart_pfn = highend_pfn = max_pfn;
1109 if (max_pfn > max_low_pfn) {
1110 highstart_pfn = max_low_pfn;
1112 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1113 pages_to_mb(highend_pfn - highstart_pfn));
1115 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1116 pages_to_mb(max_low_pfn));
1118 setup_bootmem_allocator();
1123 void __init zone_sizes_init(void)
1125 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1126 unsigned int max_dma, low;
1128 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1132 zones_size[ZONE_DMA] = low;
1134 zones_size[ZONE_DMA] = max_dma;
1135 zones_size[ZONE_NORMAL] = low - max_dma;
1136 #ifdef CONFIG_HIGHMEM
1137 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1140 free_area_init(zones_size);
1143 extern unsigned long __init setup_memory(void);
1144 extern void zone_sizes_init(void);
1145 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1147 void __init setup_bootmem_allocator(void)
1149 unsigned long bootmap_size;
1151 * Initialize the boot-time allocator (with low memory only):
1153 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1155 register_bootmem_low_pages(max_low_pfn);
1158 * Reserve the bootmem bitmap itself as well. We do this in two
1159 * steps (first step was init_bootmem()) because this catches
1160 * the (very unlikely) case of us accidentally initializing the
1161 * bootmem allocator with an invalid RAM area.
1163 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1164 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1167 * reserve physical page 0 - it's a special BIOS page on many boxes,
1168 * enabling clean reboots, SMP operation, laptop functions.
1170 reserve_bootmem(0, PAGE_SIZE);
1172 /* reserve EBDA region, it's a 4K region */
1173 reserve_ebda_region();
1175 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1176 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1177 unless you have no PS/2 mouse plugged in. */
1178 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1179 boot_cpu_data.x86 == 6)
1180 reserve_bootmem(0xa0000 - 4096, 4096);
1184 * But first pinch a few for the stack/trampoline stuff
1185 * FIXME: Don't need the extra page at 4K, but need to fix
1186 * trampoline before removing it. (see the GDT stuff)
1188 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1190 #ifdef CONFIG_ACPI_SLEEP
1192 * Reserve low memory region for sleep support.
1194 acpi_reserve_bootmem();
1196 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1198 * Find and reserve possible boot-time SMP configuration:
1203 #ifdef CONFIG_BLK_DEV_INITRD
1204 if (LOADER_TYPE && INITRD_START) {
1205 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1206 reserve_bootmem(INITRD_START, INITRD_SIZE);
1208 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1209 initrd_end = initrd_start+INITRD_SIZE;
1212 printk(KERN_ERR "initrd extends beyond end of memory "
1213 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1214 INITRD_START + INITRD_SIZE,
1215 max_low_pfn << PAGE_SHIFT);
1221 if (crashk_res.start != crashk_res.end)
1222 reserve_bootmem(crashk_res.start,
1223 crashk_res.end - crashk_res.start + 1);
1228 * The node 0 pgdat is initialized before all of these because
1229 * it's needed for bootmem. node>0 pgdats have their virtual
1230 * space allocated before the pagetables are in place to access
1231 * them, so they can't be cleared then.
1233 * This should all compile down to nothing when NUMA is off.
1235 void __init remapped_pgdat_init(void)
1239 for_each_online_node(nid) {
1241 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1246 * Request address space for all standard RAM and ROM resources
1247 * and also for regions reported as reserved by the e820.
1250 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1255 for (i = 0; i < e820.nr_map; i++) {
1256 struct resource *res;
1257 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1259 res = alloc_bootmem_low(sizeof(struct resource));
1260 switch (e820.map[i].type) {
1261 case E820_RAM: res->name = "System RAM"; break;
1262 case E820_ACPI: res->name = "ACPI Tables"; break;
1263 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1264 default: res->name = "reserved";
1266 res->start = e820.map[i].addr;
1267 res->end = res->start + e820.map[i].size - 1;
1268 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1269 request_resource(&iomem_resource, res);
1270 if (e820.map[i].type == E820_RAM) {
1272 * We don't know which RAM region contains kernel data,
1273 * so we try it repeatedly and let the resource manager
1276 request_resource(res, code_resource);
1277 request_resource(res, data_resource);
1279 request_resource(res, &crashk_res);
1286 * Request address space for all standard resources
1288 static void __init register_memory(void)
1290 unsigned long gapstart, gapsize;
1291 unsigned long long last;
1295 efi_initialize_iomem_resources(&code_resource, &data_resource);
1297 legacy_init_iomem_resources(&code_resource, &data_resource);
1299 /* EFI systems may still have VGA */
1300 request_resource(&iomem_resource, &video_ram_resource);
1302 /* request I/O space for devices used on all i[345]86 PCs */
1303 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1304 request_resource(&ioport_resource, &standard_io_resources[i]);
1307 * Search for the bigest gap in the low 32 bits of the e820
1310 last = 0x100000000ull;
1311 gapstart = 0x10000000;
1315 unsigned long long start = e820.map[i].addr;
1316 unsigned long long end = start + e820.map[i].size;
1319 * Since "last" is at most 4GB, we know we'll
1320 * fit in 32 bits if this condition is true
1323 unsigned long gap = last - end;
1325 if (gap > gapsize) {
1335 * Start allocating dynamic PCI memory a bit into the gap,
1336 * aligned up to the nearest megabyte.
1338 * Question: should we try to pad it up a bit (do something
1339 * like " + (gapsize >> 3)" in there too?). We now have the
1342 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1344 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1345 pci_mem_start, gapstart, gapsize);
1348 /* Use inline assembly to define this because the nops are defined
1349 as inline assembly strings in the include files and we cannot
1350 get them easily into strings. */
1351 asm("\t.data\nintelnops: "
1352 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1353 GENERIC_NOP7 GENERIC_NOP8);
1354 asm("\t.data\nk8nops: "
1355 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1357 asm("\t.data\nk7nops: "
1358 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1361 extern unsigned char intelnops[], k8nops[], k7nops[];
1362 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1367 intelnops + 1 + 2 + 3,
1368 intelnops + 1 + 2 + 3 + 4,
1369 intelnops + 1 + 2 + 3 + 4 + 5,
1370 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1371 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1373 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1379 k8nops + 1 + 2 + 3 + 4,
1380 k8nops + 1 + 2 + 3 + 4 + 5,
1381 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1382 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1384 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1390 k7nops + 1 + 2 + 3 + 4,
1391 k7nops + 1 + 2 + 3 + 4 + 5,
1392 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1393 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1397 unsigned char **noptable;
1399 { X86_FEATURE_K8, k8_nops },
1400 { X86_FEATURE_K7, k7_nops },
1404 /* Replace instructions with better alternatives for this CPU type.
1406 This runs before SMP is initialized to avoid SMP problems with
1407 self modifying code. This implies that assymetric systems where
1408 APs have less capabilities than the boot processor are not handled.
1409 In this case boot with "noreplacement". */
1410 void apply_alternatives(void *start, void *end)
1412 struct alt_instr *a;
1414 unsigned char **noptable = intel_nops;
1415 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1416 if (boot_cpu_has(noptypes[i].cpuid)) {
1417 noptable = noptypes[i].noptable;
1421 for (a = start; (void *)a < end; a++) {
1422 if (!boot_cpu_has(a->cpuid))
1424 BUG_ON(a->replacementlen > a->instrlen);
1425 memcpy(a->instr, a->replacement, a->replacementlen);
1426 diff = a->instrlen - a->replacementlen;
1427 /* Pad the rest with nops */
1428 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1430 if (k > ASM_NOP_MAX)
1432 memcpy(a->instr + i, noptable[k], k);
1437 static int no_replacement __initdata = 0;
1439 void __init alternative_instructions(void)
1441 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1444 apply_alternatives(__alt_instructions, __alt_instructions_end);
1447 static int __init noreplacement_setup(char *s)
1453 __setup("noreplacement", noreplacement_setup);
1455 static char * __init machine_specific_memory_setup(void);
1458 static void set_mca_bus(int x)
1463 static void set_mca_bus(int x) { }
1467 * Determine if we were loaded by an EFI loader. If so, then we have also been
1468 * passed the efi memmap, systab, etc., so we should use these data structures
1469 * for initialization. Note, the efi init code path is determined by the
1470 * global efi_enabled. This allows the same kernel image to be used on existing
1471 * systems (with a traditional BIOS) as well as on EFI systems.
1473 void __init setup_arch(char **cmdline_p)
1475 unsigned long max_low_pfn;
1477 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1478 pre_setup_arch_hook();
1482 * FIXME: This isn't an official loader_type right
1483 * now but does currently work with elilo.
1484 * If we were configured as an EFI kernel, check to make
1485 * sure that we were loaded correctly from elilo and that
1486 * the system table is valid. If not, then initialize normally.
1489 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1493 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1494 drive_info = DRIVE_INFO;
1495 screen_info = SCREEN_INFO;
1496 edid_info = EDID_INFO;
1497 apm_info.bios = APM_BIOS_INFO;
1498 ist_info = IST_INFO;
1499 saved_videomode = VIDEO_MODE;
1500 if( SYS_DESC_TABLE.length != 0 ) {
1501 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1502 machine_id = SYS_DESC_TABLE.table[0];
1503 machine_submodel_id = SYS_DESC_TABLE.table[1];
1504 BIOS_revision = SYS_DESC_TABLE.table[2];
1506 bootloader_type = LOADER_TYPE;
1508 #ifdef CONFIG_BLK_DEV_RAM
1509 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1510 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1511 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1517 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1518 print_memory_map(machine_specific_memory_setup());
1523 if (!MOUNT_ROOT_RDONLY)
1524 root_mountflags &= ~MS_RDONLY;
1525 init_mm.start_code = (unsigned long) _text;
1526 init_mm.end_code = (unsigned long) _etext;
1527 init_mm.end_data = (unsigned long) _edata;
1528 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1530 code_resource.start = virt_to_phys(_text);
1531 code_resource.end = virt_to_phys(_etext)-1;
1532 data_resource.start = virt_to_phys(_etext);
1533 data_resource.end = virt_to_phys(_edata)-1;
1535 parse_cmdline_early(cmdline_p);
1537 max_low_pfn = setup_memory();
1540 * NOTE: before this point _nobody_ is allowed to allocate
1541 * any memory using the bootmem allocator. Although the
1542 * alloctor is now initialised only the first 8Mb of the kernel
1543 * virtual address space has been mapped. All allocations before
1544 * paging_init() has completed must use the alloc_bootmem_low_pages()
1545 * variant (which allocates DMA'able memory) and care must be taken
1546 * not to exceed the 8Mb limit.
1550 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1553 remapped_pgdat_init();
1558 * NOTE: at this point the bootmem allocator is fully available.
1561 #ifdef CONFIG_EARLY_PRINTK
1563 char *s = strstr(*cmdline_p, "earlyprintk=");
1565 extern void setup_early_printk(char *);
1567 setup_early_printk(s);
1568 printk("early console enabled\n");
1576 #ifdef CONFIG_X86_GENERICARCH
1577 generic_apic_probe(*cmdline_p);
1582 #ifdef CONFIG_ACPI_BOOT
1584 * Parse the ACPI tables for possible boot-time SMP configuration.
1586 acpi_boot_table_init();
1590 #ifdef CONFIG_X86_LOCAL_APIC
1591 if (smp_found_config)
1598 #if defined(CONFIG_VGA_CONSOLE)
1599 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1600 conswitchp = &vga_con;
1601 #elif defined(CONFIG_DUMMY_CONSOLE)
1602 conswitchp = &dummy_con;
1607 #include "setup_arch_post.h"
1611 * c-file-style:"k&r"