#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
#include <linux/memcontrol.h>
+#include <linux/debugobjects.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
if (reserved)
return;
- if (!PageHighMem(page))
+ if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
+ debug_check_no_obj_freed(page_address(page),
+ PAGE_SIZE << order);
+ }
arch_free_page(page, order);
kernel_map_pages(page, 1 << order, 0);
/*
* permit the bootmem allocator to evade page validation on high-order frees
*/
-void __init __free_pages_bootmem(struct page *page, unsigned int order)
+void __free_pages_bootmem(struct page *page, unsigned int order)
{
if (order == 0) {
__ClearPageReserved(page);
if (PageReserved(page))
return 1;
- page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_readahead |
+ page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
set_page_private(page, 0);
if (free_pages_check(page))
return;
- if (!PageHighMem(page))
+ if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
+ debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
+ }
arch_free_page(page, 0);
kernel_map_pages(page, 1, 0);
if (!zlc)
return NULL;
- if (time_after(jiffies, zlc->last_full_zap + HZ)) {
+ if (time_after(jiffies, zlc->last_full_zap + HZ)) {
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
zlc->last_full_zap = jiffies;
}
* a page.
*/
static struct page *
-get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
{
struct zoneref *z;
int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */
- z = first_zones_zonelist(zonelist, high_zoneidx);
- classzone_idx = zonelist_zone_idx(z);
- preferred_zone = zonelist_zone(z);
+ (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
+ &preferred_zone);
+ classzone_idx = zone_idx(preferred_zone);
zonelist_scan:
/*
* Scan zonelist, looking for a zone with enough free.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ high_zoneidx, nodemask) {
if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
/*
* This is the 'heart' of the zoned buddy allocator.
*/
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist)
+static struct page *
+__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, nodemask_t *nodemask)
{
const gfp_t wait = gfp_mask & __GFP_WAIT;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct task_struct *p = current;
int do_retry;
int alloc_flags;
- int did_some_progress;
+ unsigned long did_some_progress;
+ unsigned long pages_reclaimed = 0;
might_sleep_if(wait);
return NULL;
}
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
if (page)
goto got_pg;
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
- page = get_page_from_freelist(gfp_mask, order, zonelist,
+ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
high_zoneidx, alloc_flags);
if (page)
goto got_pg;
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
nofail_alloc:
/* go through the zonelist yet again, ignoring mins */
- page = get_page_from_freelist(gfp_mask, order,
+ page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
if (page)
goto got_pg;
drain_all_pages();
if (likely(did_some_progress)) {
- page = get_page_from_freelist(gfp_mask, order,
+ page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx, alloc_flags);
if (page)
goto got_pg;
* a parallel oom killing, we must fail if we're still
* under heavy pressure.
*/
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
- zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
+ order, zonelist, high_zoneidx,
+ ALLOC_WMARK_HIGH|ALLOC_CPUSET);
if (page) {
clear_zonelist_oom(zonelist, gfp_mask);
goto got_pg;
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
*
- * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order
- * <= 3, but that may not be true in other implementations.
+ * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
+ * means __GFP_NOFAIL, but that may not be true in other
+ * implementations.
+ *
+ * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is
+ * specified, then we retry until we no longer reclaim any pages
+ * (above), or we've reclaimed an order of pages at least as
+ * large as the allocation's order. In both cases, if the
+ * allocation still fails, we stop retrying.
*/
+ pages_reclaimed += did_some_progress;
do_retry = 0;
if (!(gfp_mask & __GFP_NORETRY)) {
- if ((order <= PAGE_ALLOC_COSTLY_ORDER) ||
- (gfp_mask & __GFP_REPEAT))
+ if (order <= PAGE_ALLOC_COSTLY_ORDER) {
do_retry = 1;
+ } else {
+ if (gfp_mask & __GFP_REPEAT &&
+ pages_reclaimed < (1 << order))
+ do_retry = 1;
+ }
if (gfp_mask & __GFP_NOFAIL)
do_retry = 1;
}
return page;
}
+struct page *
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist)
+{
+ return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+}
+
+struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, nodemask_t *nodemask)
+{
+ return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
+}
+
EXPORT_SYMBOL(__alloc_pages);
/*
show_swap_cache_info();
}
+static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
+{
+ zoneref->zone = zone;
+ zoneref->zone_idx = zone_idx(zone);
+}
+
/*
* Builds allocation fallback zone lists.
*
struct page *page;
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
+ struct zone *z;
+ z = &NODE_DATA(nid)->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
* There can be holes in boot-time mem_map[]s
init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
-
/*
* Mark the block movable so that blocks are reserved for
* movable at startup. This will force kernel allocations
* kernel allocations are made. Later some blocks near
* the start are marked MIGRATE_RESERVE by
* setup_zone_migrate_reserve()
+ *
+ * bitmap is created for zone's valid pfn range. but memmap
+ * can be created for invalid pages (for alignment)
+ * check here not to call set_pageblock_migratetype() against
+ * pfn out of zone.
*/
- if ((pfn & (pageblock_nr_pages-1)))
+ if ((z->zone_start_pfn <= pfn)
+ && (pfn < z->zone_start_pfn + z->spanned_pages)
+ && !(pfn & (pageblock_nr_pages - 1)))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
INIT_LIST_HEAD(&page->lru);
zone->zone_start_pfn = zone_start_pfn;
- memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
-
zone_init_free_lists(zone);
return 0;
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
+ memmap_init(size, nid, j, zone_start_pfn);
zone_start_pfn += size;
}
}
else if (hashdist)
table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
else {
- unsigned long order;
- for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++)
- ;
+ unsigned long order = get_order(size);
table = (void*) __get_free_pages(GFP_ATOMIC, order);
/*
* If bucketsize is not a power-of-two, we may free
pfn = page_to_pfn(page);
bitmap = get_pageblock_bitmap(zone, pfn);
bitidx = pfn_to_bitidx(zone, pfn);
+ VM_BUG_ON(pfn < zone->zone_start_pfn);
+ VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
if (flags & value)