From: Ingo Molnar Date: Tue, 8 Jul 2008 09:14:58 +0000 (+0200) Subject: Merge branch 'x86/mpparse' into x86/devel X-Git-Tag: v2.6.27-rc1~1106^2~247 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce;p=linux-2.6 Merge branch 'x86/mpparse' into x86/devel Conflicts: arch/x86/Kconfig arch/x86/kernel/io_apic_32.c arch/x86/kernel/setup_64.c arch/x86/mm/init_32.c Signed-off-by: Ingo Molnar --- 3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce diff --cc arch/x86/Kconfig index 7dc46ba26f,23c352e408..640dc62a7f --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -261,39 -282,9 +282,9 @@@ config X86_VOYAGE If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. - config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 && PCI - select NUMA - help - This option is used for getting Linux to run on a (IBM/Sequent) NUMA - multiquad box. This changes the way that processors are bootstrapped, - and uses Clustered Logical APIC addressing mode instead of Flat Logical. - You will need a new lynxer.elf file to flash your firmware with - send - email to . - - config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - - If you don't have one of these computers, you should say N here. - If you want to build a NUMA kernel, you must select ACPI. - - config X86_BIGSMP - bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - - If you don't have such a system, you should say N here. - config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 + depends on X86_32 && !PCI help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@@ -304,12 -295,33 +295,33 @@@ and vice versa. See for details. config X86_GENERICARCH - bool "Generic architecture (Summit, bigsmp, ES7000, default)" + bool "Generic architecture" depends on X86_32 help - This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. - It is intended for a generic binary kernel. - If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + + if X86_GENERICARCH + + config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 && X86_MPPARSE ++ depends on SMP && X86_32 && PCI && X86_MPPARSE + select NUMA + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to . + + config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" @@@ -423,16 -442,32 +442,16 @@@ config MEMTES default y help This option adds a kernel parameter 'memtest', which allows memtest - to be disabled at boot. If this option is selected, memtest - functionality can be disabled with memtest=0 on the kernel - command line. The purpose of this option is to allow a single - kernel image to be distributed with memtest built in, but not - necessarily enabled. - + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer Y. -config MEMTEST_BOOTPARAM_VALUE - int "Memtest boot parameter default value (0-4)" - depends on MEMTEST_BOOTPARAM - range 0 4 - default 0 - help - This option sets the default value for the kernel parameter - 'memtest', which allows memtest to be disabled at boot. If this - option is set to 0 (zero), the memtest kernel parameter will - default to 0, disabling memtest at bootup. If this option is - set to 4, the memtest kernel parameter will default to 4, - enabling memtest at bootup, and use that as pattern number. - - If you are unsure how to answer this question, answer 0. - config ACPI_SRAT def_bool y - depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && ACPI && NUMA && X86_GENERICARCH select ACPI_NUMA config HAVE_ARCH_PARSE_SRAT diff --cc arch/x86/kernel/Makefile index 53557cbe4b,bcc2b123da..d1d4ee8952 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@@ -22,9 -22,10 +22,9 @@@ obj-y += setup_$(BITS).o i8259.o irqi obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o - obj-y += bootflag.o e820_$(BITS).o + obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o -obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --cc arch/x86/kernel/aperture_64.c index e819362c70,66b140932b..600470d464 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@@ -326,33 -290,22 +326,33 @@@ void __init early_gart_iommu_check(void if (gart_fix_e820 && !fix && aper_enabled) { if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - /* reserved it, so we can resuse it in second kernel */ + /* reserve it, so we can reuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); - add_memory_region(aper_base, aper_size, E820_RESERVED); + e820_add_region(aper_base, aper_size, E820_RESERVED); update_e820(); } - return; } + if (!fix) + return; + /* different nodes have different setting, disable them all at first*/ - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; - ctl = read_pci_config(0, num, 3, 0x90); - ctl &= ~1; - write_pci_config(0, num, 3, 0x90, ctl); + ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); + ctl &= ~AMD64_GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + } } } diff --cc arch/x86/kernel/io_apic_32.c index dac47d61d2,0662817d61..fedb3b113a --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@@ -844,8 -849,8 +850,8 @@@ static int __init find_isa_irq_apic(in } if (i < mp_irq_entries) { int apic; - for(apic = 0; apic < nr_ioapics; apic++) { + for (apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@@ -878,10 -883,10 +884,10 @@@ int IO_APIC_get_PCI_irq_vector(int bus break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); ++ int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; @@@ -976,36 -981,37 +982,36 @@@ static int MPBIOS_polarity(int idx /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) { - switch (mp_irqs[idx].mp_irqflag & 3) ++ switch (mp_irqs[idx].mp_irqflag & 3) { + case 0: /* conforms, ie. bus-type dependent polarity */ { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } + polarity = test_bit(bus, mp_bus_not_pci)? + default_ISA_polarity(idx): + default_PCI_polarity(idx); + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } } return polarity; } @@@ -1018,38 -1024,66 +1024,38 @@@ static int MPBIOS_trigger(int idx /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) { - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) ++ switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { + case 0: /* conforms, ie. bus-type dependent */ { - case 0: /* conforms, ie. bus-type dependent */ - { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); + trigger = test_bit(bus, mp_bus_not_pci)? + default_ISA_trigger(idx): + default_PCI_trigger(idx); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ break; } - case 1: /* edge */ + case MP_BUS_EISA: /* EISA pin */ { - trigger = 0; + trigger = default_EISA_trigger(idx); break; } - case 2: /* reserved */ + case MP_BUS_PCI: /* PCI pin */ { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; + /* set before the switch */ break; } - case 3: /* level */ + case MP_BUS_MCA: /* MCA pin */ { - trigger = 1; + trigger = default_MCA_trigger(idx); break; } - default: /* invalid */ + default: { printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; + trigger = 1; break; } } @@@ -1345,10 -1360,10 +1351,10 @@@ void __init print_IO_APIC(void if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@@ -1740,15 -1758,15 +1750,15 @@@ static void __init setup_ioapic_ids_fro spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - + - old_id = mp_ioapics[apic].mpc_apicid; + old_id = mp_ioapics[apic].mp_apicid; - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; } /* @@@ -1792,12 -1810,12 +1802,12 @@@ /* * Read the right value from the MPC table and * write it into the ID register. - */ + */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); diff --cc arch/x86/kernel/setup_64.c index 545440e471,3220c7b56e..9a87113ba9 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@@ -267,34 -266,18 +268,6 @@@ static inline void __init reserve_crash {} #endif - /* Overridden in paravirt.c if CONFIG_PARAVIRT */ - void __attribute__((weak)) __init memory_setup(void) -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) --{ - machine_specific_memory_setup(); --} - - static void __init parse_setup_data(void) -void __init check_enable_amd_mmconf_dmi(void) --{ - struct setup_data *data; - unsigned long pa_data; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, PAGE_SIZE); - switch (data->type) { - default: - break; - } - #ifndef CONFIG_DEBUG_BOOT_PARAMS - free_early(pa_data, pa_data+sizeof(*data)+data->len); - #endif - pa_data = data->next; - early_iounmap(data, PAGE_SIZE); - } --} -#endif -- /* * setup_arch - architecture-specific boot-time initializations * diff --cc arch/x86/mm/discontig_32.c index 8b4eac0ca0,6216e43b6e..a2f73ba42b --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@@ -156,21 -156,32 +156,29 @@@ static void __init propagate_e820_map_n */ static void __init allocate_pgdat(int nid) { - if (nid && node_has_online_mem(nid)) + if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); - min_low_pfn += PFN_UP(sizeof(pg_data_t)); + unsigned long pgdat_phys; + pgdat_phys = find_e820_area(min_low_pfn<>PAGE_SHIFT)); + reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), + "NODE_DATA"); } + printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", + nid, (unsigned long)NODE_DATA(nid)); } -#ifdef CONFIG_DISCONTIGMEM /* - * In the discontig memory model, a portion of the kernel virtual area (KVA) - * is reserved and portions of nodes are mapped using it. This is to allow - * node-local memory to be allocated for structures that would normally require - * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers - * should be prepared to allocate from the bootmem allocator instead. This KVA - * mechanism is incompatible with SPARSEMEM as it makes assumptions about the - * layout of memory that are broken if alloc_remap() succeeds for some of the - * map and fails for others + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; @@@ -284,9 -309,27 +306,8 @@@ static void init_remap_allocator(int ni printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn - + node_remap_offset[nid] + node_remap_size[nid])); + (ulong) node_remap_end_vaddr[nid]); } -#else -void *alloc_remap(int nid, unsigned long size) -{ - return NULL; -} - -static unsigned long calculate_numa_remap_pages(void) -{ - return 0; -} - -static void init_remap_allocator(int nid) -{ -} - -void __init remap_numa_kva(void) -{ -} -#endif /* CONFIG_DISCONTIGMEM */ extern void setup_bootmem_allocator(void); unsigned long __init setup_memory(void) diff --cc arch/x86/mm/init_32.c index d71be0eb01,fb5694d788..65d55056b6 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@@ -221,17 -218,8 +221,10 @@@ static void __init kernel_physical_mapp max_pfn_mapped = pfn; } } + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } - static inline int page_kills_ppro(unsigned long pagenr) - { - if (pagenr >= 0x70000 && pagenr <= 0x7003F) - return 1; - return 0; - } - /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. @@@ -573,9 -592,18 +597,7 @@@ void __init mem_init(void #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); -#endif -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } #endif - bad_ppro = ppro_with_ram_bug(); - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); diff --cc arch/x86/xen/enlighten.c index bd74229081,275163f814..fe60aa9fed --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@@ -1309,12 -1233,9 +1310,12 @@@ asmlinkage void __init xen_start_kernel ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - if (!is_initial_xendomain()) + if (!is_initial_xendomain()) { + add_preferred_console("xenboot", 0, NULL); + add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } /* Start the world */ - start_kernel(); + i386_start_kernel(); } diff --cc arch/x86/xen/setup.c index 488447878a,9001c9df04..a295758032 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@@ -37,11 -38,9 +37,11 @@@ char * __init xen_memory_setup(void { unsigned long max_pfn = xen_start_info->nr_pages; + max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); return "Xen"; }