X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=5af33186a25feb86afd4db40462f74340e09488e;hb=fadd8fbd153c12963f8fe3c9ef7f8967f286f98b;hp=4bc66f6b7718fe72253a1766b92b7a00c46b513d;hpb=718127cc3170454f4aa274fdd2f1e01574fecd66;p=linux-2.6 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4bc66f6b77..5af33186a2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1704,14 +1705,29 @@ static void __meminit build_zonelists(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ -void __init build_all_zonelists(void) +/* return values int ....just for stop_machine_run() */ +static int __meminit __build_all_zonelists(void *dummy) { - int i; + int nid; + for_each_online_node(nid) + build_zonelists(NODE_DATA(nid)); + return 0; +} + +void __meminit build_all_zonelists(void) +{ + if (system_state == SYSTEM_BOOTING) { + __build_all_zonelists(0); + cpuset_init_current_mems_allowed(); + } else { + /* we have to stop all cpus to guaranntee there is no user + of zonelist */ + stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); + /* cpuset refresh routine should be here */ + } - for_each_online_node(i) - build_zonelists(NODE_DATA(i)); printk("Built %i zonelists\n", num_online_nodes()); - cpuset_init_current_mems_allowed(); + } /* @@ -1727,6 +1743,7 @@ void __init build_all_zonelists(void) */ #define PAGES_PER_WAITQUEUE 256 +#ifndef CONFIG_MEMORY_HOTPLUG static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) { unsigned long size = 1; @@ -1745,6 +1762,29 @@ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) return max(size, 4UL); } +#else +/* + * A zone's size might be changed by hot-add, so it is not possible to determine + * a suitable size for its wait_table. So we use the maximum size now. + * + * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: + * + * i386 (preemption config) : 4096 x 16 = 64Kbyte. + * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. + * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. + * + * The maximum entries are prepared when a zone's memory is (512K + 256) pages + * or more by the traditional way. (See above). It equals: + * + * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. + * ia64(16K page size) : = ( 8G + 4M)byte. + * powerpc (64K page size) : = (32G +16M)byte. + */ +static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) +{ + return 4096UL; +} +#endif /* * This is an integer logarithm so that shifts can be used later @@ -2010,10 +2050,11 @@ void __init setup_per_cpu_pageset(void) #endif static __meminit -void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) +int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; struct pglist_data *pgdat = zone->zone_pgdat; + size_t alloc_size; /* * The per-page waitqueue mechanism uses hashed waitqueues @@ -2023,12 +2064,32 @@ void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) wait_table_hash_nr_entries(zone_size_pages); zone->wait_table_bits = wait_table_bits(zone->wait_table_hash_nr_entries); - zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t)); + alloc_size = zone->wait_table_hash_nr_entries + * sizeof(wait_queue_head_t); + + if (system_state == SYSTEM_BOOTING) { + zone->wait_table = (wait_queue_head_t *) + alloc_bootmem_node(pgdat, alloc_size); + } else { + /* + * This case means that a zone whose size was 0 gets new memory + * via memory hot-add. + * But it may be the case that a new node was hot-added. In + * this case vmalloc() will not be able to use this new node's + * memory - this wait_table must be initialized to use this new + * node itself as well. + * To use this new node's memory, further consideration will be + * necessary. + */ + zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size); + } + if (!zone->wait_table) + return -ENOMEM; for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) init_waitqueue_head(zone->wait_table + i); + + return 0; } static __meminit void zone_pcp_init(struct zone *zone) @@ -2055,8 +2116,10 @@ __meminit int init_currently_empty_zone(struct zone *zone, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; - - zone_wait_table_init(zone, size); + int ret; + ret = zone_wait_table_init(zone, size); + if (ret) + return ret; pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; @@ -2816,42 +2879,14 @@ void *__init alloc_large_system_hash(const char *tablename, } #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE -/* - * pfn <-> page translation. out-of-line version. - * (see asm-generic/memory_model.h) - */ -#if defined(CONFIG_FLATMEM) -struct page *pfn_to_page(unsigned long pfn) -{ - return mem_map + (pfn - ARCH_PFN_OFFSET); -} -unsigned long page_to_pfn(struct page *page) -{ - return (page - mem_map) + ARCH_PFN_OFFSET; -} -#elif defined(CONFIG_DISCONTIGMEM) struct page *pfn_to_page(unsigned long pfn) { - int nid = arch_pfn_to_nid(pfn); - return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); + return __pfn_to_page(pfn); } unsigned long page_to_pfn(struct page *page) { - struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); - return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; -} -#elif defined(CONFIG_SPARSEMEM) -struct page *pfn_to_page(unsigned long pfn) -{ - return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; -} - -unsigned long page_to_pfn(struct page *page) -{ - long section_id = page_to_section(page); - return page - __section_mem_map_addr(__nr_to_section(section_id)); + return __page_to_pfn(page); } -#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */