#include "internal.h"
/*
- * MCD - HACK: Find somewhere to initialize this EARLY, or make this
- * initializer cleaner
+ * Array of node states.
*/
-nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
-EXPORT_SYMBOL(node_online_map);
-nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
-EXPORT_SYMBOL(node_possible_map);
+nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
+ [N_POSSIBLE] = NODE_MASK_ALL,
+ [N_ONLINE] = { { [0] = 1UL } },
+#ifndef CONFIG_NUMA
+ [N_NORMAL_MEMORY] = { { [0] = 1UL } },
+#ifdef CONFIG_HIGHMEM
+ [N_HIGH_MEMORY] = { { [0] = 1UL } },
+#endif
+ [N_CPU] = { { [0] = 1UL } },
+#endif /* NUMA */
+};
+EXPORT_SYMBOL(node_states);
+
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
EXPORT_SYMBOL(nr_node_ids);
#endif
+static inline int get_pageblock_migratetype(struct page *page)
+{
+ return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+}
+
+static void set_pageblock_migratetype(struct page *page, int migratetype)
+{
+ set_pageblock_flags_group(page, (unsigned long)migratetype,
+ PB_migrate, PB_migrate_end);
+}
+
+static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
+{
+ return ((gfp_flags & __GFP_MOVABLE) != 0);
+}
+
#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
{
unsigned long page_idx;
int order_size = 1 << order;
+ int migratetype = get_pageblock_migratetype(page);
if (unlikely(PageCompound(page)))
destroy_compound_page(page, order);
__mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
while (order < MAX_ORDER-1) {
unsigned long combined_idx;
- struct free_area *area;
struct page *buddy;
buddy = __page_find_buddy(page, page_idx, order);
break; /* Move the buddy up one level. */
list_del(&buddy->lru);
- area = zone->free_area + order;
- area->nr_free--;
+ zone->free_area[order].nr_free--;
rmv_page_order(buddy);
combined_idx = __find_combined_index(page_idx, order);
page = page + (combined_idx - page_idx);
order++;
}
set_page_order(page, order);
- list_add(&page->lru, &zone->free_area[order].free_list);
+ list_add(&page->lru,
+ &zone->free_area[order].free_list[migratetype]);
zone->free_area[order].nr_free++;
}
* -- wli
*/
static inline void expand(struct zone *zone, struct page *page,
- int low, int high, struct free_area *area)
+ int low, int high, struct free_area *area,
+ int migratetype)
{
unsigned long size = 1 << high;
high--;
size >>= 1;
VM_BUG_ON(bad_range(zone, &page[size]));
- list_add(&page[size].lru, &area->free_list);
+ list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
}
return 0;
}
+/*
+ * This array describes the order lists are fallen back to when
+ * the free lists for the desirable migrate type are depleted
+ */
+static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
+ [MIGRATE_UNMOVABLE] = { MIGRATE_MOVABLE },
+ [MIGRATE_MOVABLE] = { MIGRATE_UNMOVABLE },
+};
+
+/* Remove an element from the buddy allocator from the fallback list */
+static struct page *__rmqueue_fallback(struct zone *zone, int order,
+ int start_migratetype)
+{
+ struct free_area * area;
+ int current_order;
+ struct page *page;
+ int migratetype, i;
+
+ /* Find the largest possible block of pages in the other list */
+ for (current_order = MAX_ORDER-1; current_order >= order;
+ --current_order) {
+ for (i = 0; i < MIGRATE_TYPES - 1; i++) {
+ migratetype = fallbacks[start_migratetype][i];
+
+ area = &(zone->free_area[current_order]);
+ if (list_empty(&area->free_list[migratetype]))
+ continue;
+
+ page = list_entry(area->free_list[migratetype].next,
+ struct page, lru);
+ area->nr_free--;
+
+ /*
+ * If breaking a large block of pages, place the buddies
+ * on the preferred allocation list
+ */
+ if (unlikely(current_order >= MAX_ORDER / 2))
+ migratetype = start_migratetype;
+
+ /* Remove the page from the freelists */
+ list_del(&page->lru);
+ rmv_page_order(page);
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -(1UL << order));
+
+ if (current_order == MAX_ORDER - 1)
+ set_pageblock_migratetype(page,
+ start_migratetype);
+
+ expand(zone, page, order, current_order, area, migratetype);
+ return page;
+ }
+ }
+
+ return NULL;
+}
+
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
-static struct page *__rmqueue(struct zone *zone, unsigned int order)
+static struct page *__rmqueue(struct zone *zone, unsigned int order,
+ int migratetype)
{
struct free_area * area;
unsigned int current_order;
struct page *page;
+ /* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
- area = zone->free_area + current_order;
- if (list_empty(&area->free_list))
+ area = &(zone->free_area[current_order]);
+ if (list_empty(&area->free_list[migratetype]))
continue;
- page = list_entry(area->free_list.next, struct page, lru);
+ page = list_entry(area->free_list[migratetype].next,
+ struct page, lru);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
__mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
- expand(zone, page, order, current_order, area);
- return page;
+ expand(zone, page, order, current_order, area, migratetype);
+ goto got_page;
}
- return NULL;
+ page = __rmqueue_fallback(zone, order, migratetype);
+
+got_page:
+
+ return page;
}
/*
* Returns the number of new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
- unsigned long count, struct list_head *list)
+ unsigned long count, struct list_head *list,
+ int migratetype)
{
int i;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order);
+ struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
break;
- list_add_tail(&page->lru, list);
+ list_add(&page->lru, list);
+ set_page_private(page, migratetype);
}
spin_unlock(&zone->lock);
return i;
}
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_HIBERNATION
void mark_free_pages(struct zone *zone)
{
unsigned long pfn, max_zone_pfn;
unsigned long flags;
- int order;
+ int order, t;
struct list_head *curr;
if (!zone->spanned_pages)
swsusp_unset_page_free(page);
}
- for (order = MAX_ORDER - 1; order >= 0; --order)
- list_for_each(curr, &zone->free_area[order].free_list) {
+ for_each_migratetype_order(order, t) {
+ list_for_each(curr, &zone->free_area[order].free_list[t]) {
unsigned long i;
pfn = page_to_pfn(list_entry(curr, struct page, lru));
for (i = 0; i < (1UL << order); i++)
swsusp_set_page_free(pfn_to_page(pfn + i));
}
-
+ }
spin_unlock_irqrestore(&zone->lock, flags);
}
__drain_pages(smp_processor_id());
local_irq_restore(flags);
}
-#endif /* CONFIG_PM */
+#endif /* CONFIG_HIBERNATION */
/*
* Free a 0-order page
local_irq_save(flags);
__count_vm_event(PGFREE);
list_add(&page->lru, &pcp->list);
+ set_page_private(page, get_pageblock_migratetype(page));
pcp->count++;
if (pcp->count >= pcp->high) {
free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
int cpu;
+ int migratetype = gfpflags_to_migratetype(gfp_flags);
again:
cpu = get_cpu();
local_irq_save(flags);
if (!pcp->count) {
pcp->count = rmqueue_bulk(zone, 0,
- pcp->batch, &pcp->list);
+ pcp->batch, &pcp->list, migratetype);
if (unlikely(!pcp->count))
goto failed;
}
- page = list_entry(pcp->list.next, struct page, lru);
- list_del(&page->lru);
- pcp->count--;
+ /* Find a page of the appropriate migrate type */
+ list_for_each_entry(page, &pcp->list, lru) {
+ if (page_private(page) == migratetype) {
+ list_del(&page->lru);
+ pcp->count--;
+ break;
+ }
+ }
+
+ /*
+ * Check if a page of the appropriate migrate type
+ * was found. If not, allocate more to the pcp list
+ */
+ if (&page->lru == &pcp->list) {
+ pcp->count += rmqueue_bulk(zone, 0,
+ pcp->batch, &pcp->list, migratetype);
+ page = list_entry(pcp->list.next, struct page, lru);
+ VM_BUG_ON(page_private(page) != migratetype);
+ list_del(&page->lru);
+ pcp->count--;
+ }
} else {
spin_lock_irqsave(&zone->lock, flags);
- page = __rmqueue(zone, order);
+ page = __rmqueue(zone, order, migratetype);
spin_unlock(&zone->lock);
if (!page)
goto failed;
*
* If the zonelist cache is present in the passed in zonelist, then
* returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_online_map.)
+ * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
*
* If the zonelist cache is not available for this zonelist, does
* nothing and returns NULL.
allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
&cpuset_current_mems_allowed :
- &node_online_map;
+ &node_states[N_HIGH_MEMORY];
return allowednodes;
}
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */
+ enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
zonelist_scan:
/*
z = zonelist->zones;
do {
+ /*
+ * In NUMA, this could be a policy zonelist which contains
+ * zones that may not be allowed by the current gfp_mask.
+ * Check the zone is allowed by the current flags
+ */
+ if (unlikely(alloc_should_filter_zonelist(zonelist))) {
+ if (highest_zoneidx == -1)
+ highest_zoneidx = gfp_zone(gfp_mask);
+ if (zone_idx(*z) > highest_zoneidx)
+ continue;
+ }
+
if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
zone = *z;
- if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) &&
- zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
- break;
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask))
goto try_next_zone;
z = zonelist->zones; /* the list of zones suitable for gfp_mask */
if (unlikely(*z == NULL)) {
- /* Should this ever happen?? */
+ /*
+ * Happens if we have an empty zonelist as a result of
+ * GFP_THISNODE being used on a memoryless node
+ */
return NULL;
}
if (page)
goto got_pg;
+ /* The OOM killer will not help higher order allocs so fail */
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ goto nopage;
+
out_of_memory(zonelist, gfp_mask, order);
goto restart;
}
return node;
}
- for_each_online_node(n) {
+ for_each_node_state(n, N_HIGH_MEMORY) {
cpumask_t tmp;
/* Don't want a node to appear more than once */
}
}
+/*
+ * Build gfp_thisnode zonelists
+ */
+static void build_thisnode_zonelists(pg_data_t *pgdat)
+{
+ enum zone_type i;
+ int j;
+ struct zonelist *zonelist;
+
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i;
+ j = build_zonelists_node(pgdat, zonelist, 0, i);
+ zonelist->zones[j] = NULL;
+ }
+}
+
/*
* Build zonelists ordered by zone and nodes within zones.
* This results in conserving DMA zone[s] until all Normal memory is
* If there is a node whose DMA/DMA32 memory is very big area on
* local memory, NODE_ORDER may be suitable.
*/
- average_size = total_size / (num_online_nodes() + 1);
+ average_size = total_size /
+ (nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
for_each_online_node(nid) {
low_kmem_size = 0;
total_size = 0;
int order = current_zonelist_order;
/* initialize zonelists */
- for (i = 0; i < MAX_NR_ZONES; i++) {
+ for (i = 0; i < MAX_ZONELISTS; i++) {
zonelist = pgdat->node_zonelists + i;
zonelist->zones[0] = NULL;
}
/* calculate node order -- i.e., DMA last! */
build_zonelists_in_zone_order(pgdat, j);
}
+
+ build_thisnode_zonelists(pgdat);
}
/* Construct the zonelist performance cache - see further mmzone.h */
int nid;
for_each_online_node(nid) {
- build_zonelists(NODE_DATA(nid));
- build_zonelist_cache(NODE_DATA(nid));
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ build_zonelists(pgdat);
+ build_zonelist_cache(pgdat);
}
return 0;
}
init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
+
+ /*
+ * Mark the block movable so that blocks are reserved for
+ * movable at startup. This will force kernel allocations
+ * to reserve their blocks rather than leaking throughout
+ * the address space during boot when many long-lived
+ * kernel allocations are made
+ */
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+
INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL
/* The shift won't overflow because ZONE_NORMAL is below 4G. */
static void __meminit zone_init_free_lists(struct pglist_data *pgdat,
struct zone *zone, unsigned long size)
{
- int order;
- for (order = 0; order < MAX_ORDER ; order++) {
- INIT_LIST_HEAD(&zone->free_area[order].free_list);
+ int order, t;
+ for_each_migratetype_order(order, t) {
+ INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
static int __cpuinit process_zones(int cpu)
{
struct zone *zone, *dzone;
+ int node = cpu_to_node(cpu);
+
+ node_set_state(node, N_CPU); /* this node has a cpu */
for_each_zone(zone) {
continue;
zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
- GFP_KERNEL, cpu_to_node(cpu));
+ GFP_KERNEL, node);
if (!zone_pcp(zone, cpu))
goto bad;
return 0;
bad:
for_each_zone(dzone) {
+ if (!populated_zone(dzone))
+ continue;
if (dzone == zone)
break;
kfree(zone_pcp(dzone, cpu));
* To use this new node's memory, further consideration will be
* necessary.
*/
- zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size);
+ zone->wait_table = vmalloc(alloc_size);
}
if (!zone->wait_table)
return -ENOMEM;
*end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
}
- if (*start_pfn == -1UL) {
- printk(KERN_WARNING "Node %u active with no memory\n", nid);
+ if (*start_pfn == -1UL)
*start_pfn = 0;
- }
/* Push the node boundaries out if requested */
account_node_boundary(nid, start_pfn, end_pfn);
if (i == -1)
return 0;
+ prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
+
/* Account for ranges before physical memory on this node */
if (early_node_map[i].start_pfn > range_start_pfn)
- hole_pages = early_node_map[i].start_pfn - range_start_pfn;
-
- prev_end_pfn = early_node_map[i].start_pfn;
+ hole_pages = prev_end_pfn - range_start_pfn;
/* Find all holes for the zone within the node */
for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
realtotalpages);
}
+#ifndef CONFIG_SPARSEMEM
+/*
+ * Calculate the size of the zone->blockflags rounded to an unsigned long
+ * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up
+ * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally
+ * round what is now in bits to nearest long in bits, then return it in
+ * bytes.
+ */
+static unsigned long __init usemap_size(unsigned long zonesize)
+{
+ unsigned long usemapsize;
+
+ usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES);
+ usemapsize = usemapsize >> (MAX_ORDER-1);
+ usemapsize *= NR_PAGEBLOCK_BITS;
+ usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
+
+ return usemapsize / 8;
+}
+
+static void __init setup_usemap(struct pglist_data *pgdat,
+ struct zone *zone, unsigned long zonesize)
+{
+ unsigned long usemapsize = usemap_size(zonesize);
+ zone->pageblock_flags = NULL;
+ if (usemapsize) {
+ zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+ memset(zone->pageblock_flags, 0, usemapsize);
+ }
+}
+#else
+static void inline setup_usemap(struct pglist_data *pgdat,
+ struct zone *zone, unsigned long zonesize) {}
+#endif /* CONFIG_SPARSEMEM */
+
/*
* Set up the zone data structures:
* - mark all pages reserved
if (!size)
continue;
+ setup_usemap(pgdat, zone, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
return max_pfn;
}
+/*
+ * early_calculate_totalpages()
+ * Sum pages in active regions for movable zone.
+ * Populate N_HIGH_MEMORY for calculating usable_nodes.
+ */
unsigned long __init early_calculate_totalpages(void)
{
int i;
unsigned long totalpages = 0;
- for (i = 0; i < nr_nodemap_entries; i++)
- totalpages += early_node_map[i].end_pfn -
+ for (i = 0; i < nr_nodemap_entries; i++) {
+ unsigned long pages = early_node_map[i].end_pfn -
early_node_map[i].start_pfn;
-
- return totalpages;
+ totalpages += pages;
+ if (pages)
+ node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
+ }
+ return totalpages;
}
/*
int i, nid;
unsigned long usable_startpfn;
unsigned long kernelcore_node, kernelcore_remaining;
- int usable_nodes = num_online_nodes();
+ unsigned long totalpages = early_calculate_totalpages();
+ int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
/*
* If movablecore was specified, calculate what size of
* what movablecore would have allowed.
*/
if (required_movablecore) {
- unsigned long totalpages = early_calculate_totalpages();
unsigned long corepages;
/*
restart:
/* Spread kernelcore memory as evenly as possible throughout nodes */
kernelcore_node = required_kernelcore / usable_nodes;
- for_each_online_node(nid) {
+ for_each_node_state(nid, N_HIGH_MEMORY) {
/*
* Recalculate kernelcore_node if the division per node
* now exceeds what is necessary to satisfy the requested
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
}
+/* Any regular memory on that node ? */
+static void check_for_regular_memory(pg_data_t *pgdat)
+{
+#ifdef CONFIG_HIGHMEM
+ enum zone_type zone_type;
+
+ for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
+ struct zone *zone = &pgdat->node_zones[zone_type];
+ if (zone->present_pages)
+ node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+ }
+#endif
+}
+
/**
* free_area_init_nodes - Initialise all pg_data_t and zone data
* @max_zone_pfn: an array of max PFNs for each zone
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, pgdat, NULL,
find_min_pfn_for_node(nid), NULL);
+
+ /* Any memory on that node */
+ if (pgdat->node_present_pages)
+ node_set_state(nid, N_HIGH_MEMORY);
+ check_for_regular_memory(pgdat);
}
}
EXPORT_SYMBOL(page_to_pfn);
#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
+/* Return a pointer to the bitmap storing bits affecting a block of pages */
+static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
+ unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+ return __pfn_to_section(pfn)->pageblock_flags;
+#else
+ return zone->pageblock_flags;
+#endif /* CONFIG_SPARSEMEM */
+}
+static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+ pfn &= (PAGES_PER_SECTION-1);
+ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
+#else
+ pfn = pfn - zone->zone_start_pfn;
+ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
+#endif /* CONFIG_SPARSEMEM */
+}
+
+/**
+ * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages
+ * @page: The page within the block of interest
+ * @start_bitidx: The first bit of interest to retrieve
+ * @end_bitidx: The last bit of interest
+ * returns pageblock_bits flags
+ */
+unsigned long get_pageblock_flags_group(struct page *page,
+ int start_bitidx, int end_bitidx)
+{
+ struct zone *zone;
+ unsigned long *bitmap;
+ unsigned long pfn, bitidx;
+ unsigned long flags = 0;
+ unsigned long value = 1;
+
+ zone = page_zone(page);
+ pfn = page_to_pfn(page);
+ bitmap = get_pageblock_bitmap(zone, pfn);
+ bitidx = pfn_to_bitidx(zone, pfn);
+
+ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+ if (test_bit(bitidx + start_bitidx, bitmap))
+ flags |= value;
+
+ return flags;
+}
+
+/**
+ * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages
+ * @page: The page within the block of interest
+ * @start_bitidx: The first bit of interest
+ * @end_bitidx: The last bit of interest
+ * @flags: The flags to set
+ */
+void set_pageblock_flags_group(struct page *page, unsigned long flags,
+ int start_bitidx, int end_bitidx)
+{
+ struct zone *zone;
+ unsigned long *bitmap;
+ unsigned long pfn, bitidx;
+ unsigned long value = 1;
+
+ zone = page_zone(page);
+ pfn = page_to_pfn(page);
+ bitmap = get_pageblock_bitmap(zone, pfn);
+ bitidx = pfn_to_bitidx(zone, pfn);
+
+ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+ if (flags & value)
+ __set_bit(bitidx + start_bitidx, bitmap);
+ else
+ __clear_bit(bitidx + start_bitidx, bitmap);
+}