X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=cace22b3ac25dc0f4af6fe10a7cc0a74c9d32778;hb=15ad7cdcfd76450d4beebc789ec646664238184d;hp=4f59d90b81e65a314e0433dc61371b785b3df61f;hpb=82965addad66fce61a92c5f03104ea90b0b87124;p=linux-2.6 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4f59d90b81..cace22b3ac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -82,14 +83,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); -/* - * Used by page_zone() to look up the address of the struct zone whose - * id is encoded in the upper bits of page->flags - */ -struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; -EXPORT_SYMBOL(zone_table); - -static char *zone_names[MAX_NR_ZONES] = { +static char * const zone_names[MAX_NR_ZONES] = { "DMA", #ifdef CONFIG_ZONE_DMA32 "DMA32", @@ -236,7 +230,7 @@ static void prep_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - page[1].lru.next = (void *)free_compound_page; /* set dtor */ + set_compound_page_dtor(page, free_compound_page); page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; @@ -485,7 +479,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order) spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __free_one_page(page, zone ,order); + __free_one_page(page, zone, order); spin_unlock(&zone->lock); } @@ -495,17 +489,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) int i; int reserved = 0; - arch_free_page(page, order); - if (!PageHighMem(page)) - debug_check_no_locks_freed(page_address(page), - PAGE_SIZE<pcp[i]; if (pcp->count) { + int to_drain; + local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); - pcp->count = 0; + if (pcp->count >= pcp->batch) + to_drain = pcp->batch; + else + to_drain = pcp->count; + free_pages_bulk(zone, to_drain, &pcp->list, 0); + pcp->count -= to_drain; local_irq_restore(flags); } } @@ -700,7 +701,6 @@ void drain_node_pages(int nodeid) } #endif -#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) static void __drain_pages(unsigned int cpu) { unsigned long flags; @@ -722,7 +722,6 @@ static void __drain_pages(unsigned int cpu) } } } -#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM @@ -781,13 +780,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) struct per_cpu_pages *pcp; unsigned long flags; - arch_free_page(page, 0); - if (PageAnon(page)) page->mapping = NULL; if (free_pages_check(page)) return; + if (!PageHighMem(page)) + debug_check_no_locks_freed(page_address(page), PAGE_SIZE); + arch_free_page(page, 0); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; @@ -852,7 +852,7 @@ again: pcp = &zone_pcp(zone, cpu)->pcp[cold]; local_irq_save(flags); if (!pcp->count) { - pcp->count += rmqueue_bulk(zone, 0, + pcp->count = rmqueue_bulk(zone, 0, pcp->batch, &pcp->list); if (unlikely(!pcp->count)) goto failed; @@ -900,7 +900,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags) { /* free_pages my go negative - that's OK */ - long min = mark, free_pages = z->free_pages - (1 << order) + 1; + unsigned long min = mark; + long free_pages = z->free_pages - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -923,31 +924,160 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, return 1; } +#ifdef CONFIG_NUMA +/* + * zlc_setup - Setup for "zonelist cache". Uses cached zone data to + * skip over zones that are not allowed by the cpuset, or that have + * been recently (in last second) found to be nearly full. See further + * comments in mmzone.h. Reduces cache footprint of zonelist scans + * that have to skip over alot of full or unallowed zones. + * + * If the zonelist cache is present in the passed in zonelist, then + * returns a pointer to the allowed node mask (either the current + * tasks mems_allowed, or node_online_map.) + * + * If the zonelist cache is not available for this zonelist, does + * nothing and returns NULL. + * + * If the fullzones BITMAP in the zonelist cache is stale (more than + * a second since last zap'd) then we zap it out (clear its bits.) + * + * We hold off even calling zlc_setup, until after we've checked the + * first zone in the zonelist, on the theory that most allocations will + * be satisfied from that first zone, so best to examine that zone as + * quickly as we can. + */ +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + nodemask_t *allowednodes; /* zonelist_cache approximation */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return NULL; + + if (jiffies - zlc->last_full_zap > 1 * HZ) { + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + zlc->last_full_zap = jiffies; + } + + allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? + &cpuset_current_mems_allowed : + &node_online_map; + return allowednodes; +} + /* - * get_page_from_freeliest goes through the zonelist trying to allocate + * Given 'z' scanning a zonelist, run a couple of quick checks to see + * if it is worth looking at further for free memory: + * 1) Check that the zone isn't thought to be full (doesn't have its + * bit set in the zonelist_cache fullzones BITMAP). + * 2) Check that the zones node (obtained from the zonelist_cache + * z_to_n[] mapping) is allowed in the passed in allowednodes mask. + * Return true (non-zero) if zone is worth looking at further, or + * else return false (zero) if it is not. + * + * This check -ignores- the distinction between various watermarks, + * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is + * found to be full for any variation of these watermarks, it will + * be considered full for up to one second by all requests, unless + * we are so low on memory on all allowed nodes that we are forced + * into the second scan of the zonelist. + * + * In the second scan we ignore this zonelist cache and exactly + * apply the watermarks to all zones, even it is slower to do so. + * We are low on memory in the second scan, and should leave no stone + * unturned looking for a free page. + */ +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + int n; /* node that zone *z is on */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return 1; + + i = z - zonelist->zones; + n = zlc->z_to_n[i]; + + /* This zone is worth trying if it is allowed but not full */ + return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones); +} + +/* + * Given 'z' scanning a zonelist, set the corresponding bit in + * zlc->fullzones, so that subsequent attempts to allocate a page + * from that zone don't waste time re-examining it. + */ +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return; + + i = z - zonelist->zones; + + set_bit(i, zlc->fullzones); +} + +#else /* CONFIG_NUMA */ + +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + return NULL; +} + +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + return 1; +} + +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ +} +#endif /* CONFIG_NUMA */ + +/* + * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ static struct page * get_page_from_freelist(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, int alloc_flags) { - struct zone **z = zonelist->zones; + struct zone **z; struct page *page = NULL; - int classzone_idx = zone_idx(*z); + int classzone_idx = zone_idx(zonelist->zones[0]); struct zone *zone; + nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ + int zlc_active = 0; /* set if using zonelist_cache */ + int did_zlc_setup = 0; /* just call zlc_setup() one time */ +zonelist_scan: /* - * Go through the zonelist once, looking for a zone with enough free. + * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ + z = zonelist->zones; + do { + if (NUMA_BUILD && zlc_active && + !zlc_zone_worth_trying(zonelist, z, allowednodes)) + continue; zone = *z; if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) break; if ((alloc_flags & ALLOC_CPUSET) && - !cpuset_zone_allowed(zone, gfp_mask)) - continue; + !cpuset_zone_allowed(zone, gfp_mask)) + goto try_next_zone; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; @@ -957,18 +1087,34 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, mark = zone->pages_low; else mark = zone->pages_high; - if (!zone_watermark_ok(zone , order, mark, - classzone_idx, alloc_flags)) + if (!zone_watermark_ok(zone, order, mark, + classzone_idx, alloc_flags)) { if (!zone_reclaim_mode || !zone_reclaim(zone, gfp_mask, order)) - continue; + goto this_zone_full; + } } page = buffered_rmqueue(zonelist, zone, order, gfp_mask); - if (page) { + if (page) break; +this_zone_full: + if (NUMA_BUILD) + zlc_mark_zone_full(zonelist, z); +try_next_zone: + if (NUMA_BUILD && !did_zlc_setup) { + /* we do zlc_setup after the first zone is tried */ + allowednodes = zlc_setup(zonelist, alloc_flags); + zlc_active = 1; + did_zlc_setup = 1; } } while (*(++z) != NULL); + + if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + goto zonelist_scan; + } return page; } @@ -1003,9 +1149,19 @@ restart: if (page) goto got_pg; - do { + /* + * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and + * __GFP_NOWARN set) should not cause reclaim since the subsystem + * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim + * using a larger set of nodes after it has established that the + * allowed per node queues are empty and that nodes are + * over allocated. + */ + if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) + goto nopage; + + for (z = zonelist->zones; *z; z++) wakeup_kswapd(*z, order); - } while (*(++z)); /* * OK, we're below the kswapd watermark and have kicked background @@ -1039,6 +1195,7 @@ restart: /* This allocation should allow future memory freeing. */ +rebalance: if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { if (!(gfp_mask & __GFP_NOMEMALLOC)) { @@ -1049,7 +1206,7 @@ nofail_alloc: if (page) goto got_pg; if (gfp_mask & __GFP_NOFAIL) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto nofail_alloc; } } @@ -1060,7 +1217,6 @@ nofail_alloc: if (!wait) goto nopage; -rebalance: cond_resched(); /* We now go into synchronous reclaim */ @@ -1112,7 +1268,7 @@ rebalance: do_retry = 1; } if (do_retry) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto rebalance; } @@ -1260,7 +1416,7 @@ unsigned int nr_free_pagecache_pages(void) static inline void show_node(struct zone *zone) { if (NUMA_BUILD) - printk("Node %ld ", zone_to_nid(zone)); + printk("Node %d ", zone_to_nid(zone)); } void si_meminfo(struct sysinfo *val) @@ -1540,6 +1696,24 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* Construct the zonelist performance cache - see further mmzone.h */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zonelist *zonelist; + struct zonelist_cache *zlc; + struct zone **z; + + zonelist = pgdat->node_zonelists + i; + zonelist->zlcache_ptr = zlc = &zonelist->zlcache; + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + for (z = zonelist->zones; *z; z++) + zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); + } +} + #else /* CONFIG_NUMA */ static void __meminit build_zonelists(pg_data_t *pgdat) @@ -1577,14 +1751,26 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) + pgdat->node_zonelists[i].zlcache_ptr = NULL; +} + #endif /* CONFIG_NUMA */ /* return values int ....just for stop_machine_run() */ static int __meminit __build_all_zonelists(void *dummy) { int nid; - for_each_online_node(nid) + + for_each_online_node(nid) { build_zonelists(NODE_DATA(nid)); + build_zonelist_cache(NODE_DATA(nid)); + } return 0; } @@ -1687,6 +1873,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!early_pfn_valid(pfn)) continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -1711,20 +1899,6 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } } -#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) -void zonetable_add(struct zone *zone, int nid, enum zone_type zid, - unsigned long pfn, unsigned long size) -{ - unsigned long snum = pfn_to_section_nr(pfn); - unsigned long end = pfn_to_section_nr(pfn + size); - - if (FLAGS_HAS_NODE) - zone_table[ZONETABLE_INDEX(nid, zid)] = zone; - else - for (; snum <= end; snum++) - zone_table[ZONETABLE_INDEX(snum, zid)] = zone; -} - #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ memmap_init_zone((size), (nid), (zone), (start_pfn)) @@ -1877,16 +2051,16 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, int ret = NOTIFY_OK; switch (action) { - case CPU_UP_PREPARE: - if (process_zones(cpu)) - ret = NOTIFY_BAD; - break; - case CPU_UP_CANCELED: - case CPU_DEAD: - free_zone_pagesets(cpu); - break; - default: - break; + case CPU_UP_PREPARE: + if (process_zones(cpu)) + ret = NOTIFY_BAD; + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + free_zone_pagesets(cpu); + break; + default: + break; } return ret; } @@ -2050,8 +2224,8 @@ int __init early_pfn_to_nid(unsigned long pfn) /** * free_bootmem_with_active_regions - Call free_bootmem_node for each active range - * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed - * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node + * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. + * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this @@ -2081,11 +2255,11 @@ void __init free_bootmem_with_active_regions(int nid, /** * sparse_memory_present_with_active_regions - Call memory_present for each active range - * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used + * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this - * this function may be used instead of calling memory_present() manually. + * function may be used instead of calling memory_present() manually. */ void __init sparse_memory_present_with_active_regions(int nid) { @@ -2155,14 +2329,14 @@ static void __init account_node_boundary(unsigned int nid, /** * get_pfn_range_for_nid - Return the start and end page frames for a node - * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned - * @start_pfn: Passed by reference. On return, it will have the node start_pfn - * @end_pfn: Passed by reference. On return, it will have the node end_pfn + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. + * @start_pfn: Passed by reference. On return, it will have the node start_pfn. + * @end_pfn: Passed by reference. On return, it will have the node end_pfn. * * It returns the start and end page frame of a node based on information * provided by an arch calling add_active_range(). If called for a node * with no available memory, a warning is printed and the start and end - * PFNs will be 0 + * PFNs will be 0. */ void __init get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) @@ -2215,7 +2389,7 @@ unsigned long __init zone_spanned_pages_in_node(int nid, /* * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, - * then all holes in the requested range will be accounted for + * then all holes in the requested range will be accounted for. */ unsigned long __init __absent_pages_in_range(int nid, unsigned long range_start_pfn, @@ -2257,7 +2431,7 @@ unsigned long __init __absent_pages_in_range(int nid, /* Account for ranges past physical memory on this node */ if (range_end_pfn > prev_end_pfn) - hole_pages = range_end_pfn - + hole_pages += range_end_pfn - max(range_start_pfn, prev_end_pfn); return hole_pages; @@ -2268,7 +2442,7 @@ unsigned long __init __absent_pages_in_range(int nid, * @start_pfn: The start PFN to start searching for holes * @end_pfn: The end PFN to stop searching for holes * - * It returns the number of pages frames in memory holes within a range + * It returns the number of pages frames in memory holes within a range. */ unsigned long __init absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn) @@ -2293,19 +2467,6 @@ unsigned long __init zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } -/* Return the zone index a PFN is in */ -int memmap_zone_idx(struct page *lmem_map) -{ - int i; - unsigned long phys_addr = virt_to_phys(lmem_map); - unsigned long pfn = phys_addr >> PAGE_SHIFT; - - for (i = 0; i < MAX_NR_ZONES; i++) - if (pfn < arch_zone_highest_possible_pfn[i]) - break; - - return i; -} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2324,10 +2485,6 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } -static inline int memmap_zone_idx(struct page *lmem_map) -{ - return MAX_NR_ZONES; -} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, @@ -2420,7 +2577,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone->free_pages = 0; - zone->temp_priority = zone->prev_priority = DEF_PRIORITY; + zone->prev_priority = DEF_PRIORITY; zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); @@ -2434,7 +2591,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (!size) continue; - zonetable_add(zone, nid, j, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); zone_start_pfn += size; @@ -2582,11 +2738,12 @@ void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, /** * remove_all_active_ranges - Remove all currently registered regions + * * During discovery, it may be found that a table like SRAT is invalid * and an alternative discovery method must be used. This function removes * all currently registered regions. */ -void __init remove_all_active_ranges() +void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; @@ -2624,6 +2781,9 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid) { int i; + /* Regions in the early_node_map can be in any order */ + sort_node_map(); + /* Assuming a sorted map, the first range found has the starting pfn */ for_each_active_range_index_in_nid(i, nid) return early_node_map[i].start_pfn; @@ -2636,7 +2796,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid) * find_min_pfn_with_active_regions - Find the minimum PFN registered * * It returns the minimum PFN based on information provided via - * add_active_range() + * add_active_range(). */ unsigned long __init find_min_pfn_with_active_regions(void) { @@ -2647,7 +2807,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) * find_max_pfn_with_active_regions - Find the maximum PFN registered * * It returns the maximum PFN based on information provided via - * add_active_range() + * add_active_range(). */ unsigned long __init find_max_pfn_with_active_regions(void) { @@ -2662,10 +2822,7 @@ unsigned long __init find_max_pfn_with_active_regions(void) /** * free_area_init_nodes - Initialise all pg_data_t and zone data - * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA - * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32 - * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL - * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM + * @max_zone_pfn: an array of max PFNs for each zone * * This will call free_area_init_node() for each active node in the system. * Using the page ranges provided by add_active_range(), the size of each @@ -2695,9 +2852,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); } - /* Regions in the early_node_map can be in any order */ - sort_node_map(); - /* Print out the zone ranges */ printk("Zone PFN ranges:\n"); for (i = 0; i < MAX_NR_ZONES; i++) @@ -2723,14 +2877,15 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ /** - * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA - * @new_dma_reserve - The number of pages to mark reserved + * set_dma_reserve - set the specified number of pages reserved in the first zone + * @new_dma_reserve: The number of pages to mark reserved * * The per-cpu batchsize and zone watermarks are determined by present_pages. * In the DMA zone, a significant percentage may be consumed by kernel image * and other unfreeable allocations which can skew the watermarks badly. This - * function may optionally be used to account for unfreeable pages in - * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize + * function may optionally be used to account for unfreeable pages in the + * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and + * smaller per-cpu batchsize. */ void __init set_dma_reserve(unsigned long new_dma_reserve) { @@ -2750,7 +2905,6 @@ void __init free_area_init(unsigned long *zones_size) __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -#ifdef CONFIG_HOTPLUG_CPU static int page_alloc_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -2765,7 +2919,6 @@ static int page_alloc_cpu_notify(struct notifier_block *self, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init page_alloc_init(void) { @@ -2843,10 +2996,11 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/* - * setup_per_zone_pages_min - called when min_free_kbytes changes. Ensures - * that the pages_{min,low,high} values for each zone are set correctly - * with respect to min_free_kbytes. +/** + * setup_per_zone_pages_min - called when min_free_kbytes changes. + * + * Ensures that the pages_{min,low,high} values for each zone are set correctly + * with respect to min_free_kbytes. */ void setup_per_zone_pages_min(void) { @@ -3068,7 +3222,7 @@ void *__init alloc_large_system_hash(const char *tablename, /* allow the kernel cmdline to have a say */ if (!numentries) { /* round applicable memory size up to nearest megabyte */ - numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages; + numentries = nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; @@ -3135,3 +3289,19 @@ unsigned long page_to_pfn(struct page *page) EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ + +#if MAX_NUMNODES > 1 +/* + * Find the highest possible node id. + */ +int highest_possible_node_id(void) +{ + unsigned int node; + unsigned int highest = 0; + + for_each_node_mask(node, node_possible_map) + highest = node; + return highest; +} +EXPORT_SYMBOL(highest_possible_node_id); +#endif