From 98075d245a5bc4aeebc2e9f16fa8b089a5c200ac Mon Sep 17 00:00:00 2001 From: Zoltan Menyhart Date: Fri, 11 Apr 2008 15:21:35 -0700 Subject: [PATCH] [IA64] Fix NUMA configuration issue There is a NUMA memory configuration issue in 2.6.24: A 2-node machine of ours has got the following memory layout: Node 0: 0 - 2 Gbytes Node 0: 4 - 8 Gbytes Node 1: 8 - 16 Gbytes Node 0: 16 - 18 Gbytes "efi_memmap_init()" merges the three last ranges into one. "register_active_ranges()" is called as follows: efi_memmap_walk(register_active_ranges, NULL); i.e. once for the 4 - 18 Gbytes range. It picks up the node number from the start address, and registers all the memory for the node #0. "register_active_ranges()" should be called as follows to make sure there is no merged address range at its entry: efi_memmap_walk(filter_memory, register_active_ranges); "filter_memory()" is similar to "filter_rsvd_memory()", but the reserved memory ranges are not filtered out. Signed-off-by: Zoltan Menyhart Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 23 +++++++++++++++++++++++ arch/ia64/mm/contig.c | 2 +- arch/ia64/mm/discontig.c | 2 +- arch/ia64/mm/init.c | 6 ++---- include/asm-ia64/meminit.h | 3 ++- 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4aa9eaea76..c85b7dd6ef 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -176,6 +176,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) return 0; } +/* + * Similar to "filter_rsvd_memory()", but the reserved memory ranges + * are not filtered out. + */ +int __init +filter_memory(unsigned long start, unsigned long end, void *arg) +{ + void (*func)(unsigned long, unsigned long, int); + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + func = arg; + if (start < end) + call_pernode_memory(__pa(start), end - start, func); + return 0; +} + static void __init sort_regions (struct rsvd_region *rsvd_region, int max) { diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 0479661fa4..798bf9835a 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -253,7 +253,7 @@ paging_init (void) max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - efi_memmap_walk(register_active_ranges, NULL); + efi_memmap_walk(filter_memory, register_active_ranges); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ffee1ea00b..96d5fbfa44 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -444,7 +444,7 @@ void __init find_memory(void) mem_data[node].min_pfn = ~0UL; } - efi_memmap_walk(register_active_ranges, NULL); + efi_memmap_walk(filter_memory, register_active_ranges); /* * Initialize the boot memory maps in reverse order since that's diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index da05893294..5c1de53c8c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -547,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg) #endif /* CONFIG_VIRTUAL_MEM_MAP */ int __init -register_active_ranges(u64 start, u64 end, void *arg) +register_active_ranges(u64 start, u64 len, int nid) { - int nid = paddr_to_nid(__pa(start)); + u64 end = start + len; - if (nid < 0) - nid = 0; #ifdef CONFIG_KEXEC if (start > crashk_res.start && start < crashk_res.end) start = crashk_res.end; diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index f93308f54b..7245a57815 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -35,6 +35,7 @@ extern void find_memory (void); extern void reserve_memory (void); extern void find_initrd (void); extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); +extern int filter_memory (unsigned long start, unsigned long end, void *arg); extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e); extern int find_max_min_low_pfn (unsigned long , unsigned long, void *); @@ -56,7 +57,7 @@ extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end); #define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ -extern int register_active_ranges(u64 start, u64 end, void *arg); +extern int register_active_ranges(u64 start, u64 len, int nid); #ifdef CONFIG_VIRTUAL_MEM_MAP # define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */ -- 2.39.5