X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=mm%2Fpage_alloc.c;h=d315e1127dc91891980d180618d78ca51277aae7;hb=0c0e6195896535481173df98935ad8db174f4d45;hp=f7873a47fa8e3e73dd696c2c7c4b4d4cccd8e787;hpb=56fd56b868f19385c50af8941a4c78df433b2d32;p=linux-2.6 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f7873a47fa..d315e1127d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,10 @@ unsigned long totalreserve_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; +#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE +int pageblock_order __read_mostly; +#endif + static void __free_pages_ok(struct page *page, unsigned int order); /* @@ -145,7 +150,7 @@ static unsigned long __meminitdata dma_reserve; static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ unsigned long __initdata required_kernelcore; - unsigned long __initdata required_movablecore; + static unsigned long __initdata required_movablecore; unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ @@ -158,56 +163,14 @@ int nr_node_ids __read_mostly = MAX_NUMNODES; EXPORT_SYMBOL(nr_node_ids); #endif -#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY int page_group_by_mobility_disabled __read_mostly; -static inline int get_pageblock_migratetype(struct page *page) -{ - if (unlikely(page_group_by_mobility_disabled)) - return MIGRATE_UNMOVABLE; - - return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); -} - static void set_pageblock_migratetype(struct page *page, int migratetype) { set_pageblock_flags_group(page, (unsigned long)migratetype, PB_migrate, PB_migrate_end); } -static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order) -{ - WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); - - if (unlikely(page_group_by_mobility_disabled)) - return MIGRATE_UNMOVABLE; - - /* Cluster high-order atomic allocations together */ - if (unlikely(order > 0) && - (!(gfp_flags & __GFP_WAIT) || in_interrupt())) - return MIGRATE_HIGHATOMIC; - - /* Cluster based on mobility */ - return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | - ((gfp_flags & __GFP_RECLAIMABLE) != 0); -} - -#else -static inline int get_pageblock_migratetype(struct page *page) -{ - return MIGRATE_UNMOVABLE; -} - -static void set_pageblock_migratetype(struct page *page, int migratetype) -{ -} - -static inline int allocflags_to_migratetype(gfp_t gfp_flags, int order) -{ - return MIGRATE_UNMOVABLE; -} -#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ - #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -351,16 +314,6 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) clear_highpage(page + i); } -/* - * function for dealing with page's order in buddy system. - * zone->lock is already acquired when we use these. - * So, we don't need atomic page->flags operations here. - */ -static inline unsigned long page_order(struct page *page) -{ - return page_private(page); -} - static inline void set_page_order(struct page *page, int order) { set_page_private(page, order); @@ -718,22 +671,20 @@ static struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, } -#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY /* * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_HIGHATOMIC, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_HIGHATOMIC, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_HIGHATOMIC, MIGRATE_RESERVE }, - [MIGRATE_HIGHATOMIC] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ }; /* * Move the free pages in a range to the free lists of the requested type. - * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES + * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ int move_freepages(struct zone *zone, @@ -742,7 +693,7 @@ int move_freepages(struct zone *zone, { struct page *page; unsigned long order; - int blocks_moved = 0; + int pages_moved = 0; #ifndef CONFIG_HOLES_IN_ZONE /* @@ -750,7 +701,7 @@ int move_freepages(struct zone *zone, * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant * anyway as we check zone boundaries in move_freepages_block(). * Remove at a later date when no bug reports exist related to - * CONFIG_PAGE_GROUP_BY_MOBILITY + * grouping pages by mobility */ BUG_ON(page_zone(start_page) != page_zone(end_page)); #endif @@ -771,10 +722,10 @@ int move_freepages(struct zone *zone, list_add(&page->lru, &zone->free_area[order].free_list[migratetype]); page += 1 << order; - blocks_moved++; + pages_moved += 1 << order; } - return blocks_moved; + return pages_moved; } int move_freepages_block(struct zone *zone, struct page *page, int migratetype) @@ -783,10 +734,10 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype) struct page *start_page, *end_page; start_pfn = page_to_pfn(page); - start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1); + start_pfn = start_pfn & ~(pageblock_nr_pages-1); start_page = pfn_to_page(start_pfn); - end_page = start_page + MAX_ORDER_NR_PAGES - 1; - end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1; + end_page = start_page + pageblock_nr_pages - 1; + end_pfn = start_pfn + pageblock_nr_pages - 1; /* Do not cross zone boundaries */ if (start_pfn < zone->zone_start_pfn) @@ -822,9 +773,7 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order, int current_order; struct page *page; int migratetype, i; - int nonatomic_fallback_atomic = 0; -retry: /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { @@ -834,14 +783,6 @@ retry: /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) continue; - /* - * Make it hard to fallback to blocks used for - * high-order atomic allocations - */ - if (migratetype == MIGRATE_HIGHATOMIC && - start_migratetype != MIGRATE_UNMOVABLE && - !nonatomic_fallback_atomic) - continue; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) @@ -860,15 +801,14 @@ retry: * back for a reclaimable kernel allocation, be more * agressive about taking ownership of free pages */ - if (unlikely(current_order >= MAX_ORDER / 2) || + if (unlikely(current_order >= (pageblock_order >> 1)) || start_migratetype == MIGRATE_RECLAIMABLE) { unsigned long pages; pages = move_freepages_block(zone, page, start_migratetype); /* Claim the whole block if over half of it is free */ - if ((pages << current_order) >= (1 << (MAX_ORDER-2)) && - migratetype != MIGRATE_HIGHATOMIC) + if (pages >= (1 << (pageblock_order-1))) set_pageblock_migratetype(page, start_migratetype); @@ -881,7 +821,7 @@ retry: __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); - if (current_order == MAX_ORDER - 1) + if (current_order == pageblock_order) set_pageblock_migratetype(page, start_migratetype); @@ -890,22 +830,9 @@ retry: } } - /* Allow fallback to high-order atomic blocks if memory is that low */ - if (!nonatomic_fallback_atomic) { - nonatomic_fallback_atomic = 1; - goto retry; - } - /* Use MIGRATE_RESERVE rather than fail an allocation */ return __rmqueue_smallest(zone, order, MIGRATE_RESERVE); } -#else -static struct page *__rmqueue_fallback(struct zone *zone, int order, - int start_migratetype) -{ - return NULL; -} -#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ /* * Do the hard work of removing an element from the buddy allocator. @@ -1033,7 +960,6 @@ void mark_free_pages(struct zone *zone) } #endif /* CONFIG_PM */ -#if defined(CONFIG_HIBERNATION) || defined(CONFIG_PAGE_GROUP_BY_MOBILITY) /* * Spill all of this CPU's per-cpu pages back into the buddy allocator. */ @@ -1064,9 +990,6 @@ void drain_all_local_pages(void) smp_call_function(smp_drain_local_pages, NULL, 0, 1); } -#else -void drain_all_local_pages(void) {} -#endif /* CONFIG_HIBERNATION || CONFIG_PAGE_GROUP_BY_MOBILITY */ /* * Free a 0-order page @@ -1141,7 +1064,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist, struct page *page; int cold = !!(gfp_flags & __GFP_COLD); int cpu; - int migratetype = allocflags_to_migratetype(gfp_flags, order); + int migratetype = allocflags_to_migratetype(gfp_flags); again: cpu = get_cpu(); @@ -1157,7 +1080,6 @@ again: goto failed; } -#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY /* Find a page of the appropriate migrate type */ list_for_each_entry(page, &pcp->list, lru) if (page_private(page) == migratetype) @@ -1169,9 +1091,6 @@ again: pcp->batch, &pcp->list, migratetype); page = list_entry(pcp->list.next, struct page, lru); } -#else - page = list_entry(pcp->list.next, struct page, lru); -#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ list_del(&page->lru); pcp->count--; @@ -2441,7 +2360,7 @@ void build_all_zonelists(void) * made on memory-hotadd so a system can start with mobility * disabled and enable it later */ - if (vm_total_pages < (MAX_ORDER_NR_PAGES * MIGRATE_TYPES)) + if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES)) page_group_by_mobility_disabled = 1; else page_group_by_mobility_disabled = 0; @@ -2525,9 +2444,8 @@ static inline unsigned long wait_table_bits(unsigned long size) #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) -#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY /* - * Mark a number of MAX_ORDER_NR_PAGES blocks as MIGRATE_RESERVE. The number + * Mark a number of pageblocks as MIGRATE_RESERVE. The number * of blocks reserved is based on zone->pages_min. The memory within the * reserve will tend to store contiguous free pages. Setting min_free_kbytes * higher will lead to a bigger reserve which will get freed as contiguous @@ -2542,9 +2460,10 @@ static void setup_zone_migrate_reserve(struct zone *zone) /* Get the start pfn, end pfn and the number of blocks to reserve */ start_pfn = zone->zone_start_pfn; end_pfn = start_pfn + zone->spanned_pages; - reserve = roundup(zone->pages_min, MAX_ORDER_NR_PAGES) >> (MAX_ORDER-1); + reserve = roundup(zone->pages_min, pageblock_nr_pages) >> + pageblock_order; - for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); @@ -2579,11 +2498,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) } } } -#else -static inline void setup_zone_migrate_reserve(struct zone *zone) -{ -} -#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ + /* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is @@ -2623,7 +2538,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * the start are marked MIGRATE_RESERVE by * setup_zone_migrate_reserve() */ - if ((pfn & (MAX_ORDER_NR_PAGES-1))) + if ((pfn & (pageblock_nr_pages-1))) set_pageblock_migratetype(page, MIGRATE_MOVABLE); INIT_LIST_HEAD(&page->lru); @@ -3327,8 +3242,8 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, #ifndef CONFIG_SPARSEMEM /* * Calculate the size of the zone->blockflags rounded to an unsigned long - * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up - * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally + * Start by making sure zonesize is a multiple of pageblock_order by rounding + * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally * round what is now in bits to nearest long in bits, then return it in * bytes. */ @@ -3336,8 +3251,8 @@ static unsigned long __init usemap_size(unsigned long zonesize) { unsigned long usemapsize; - usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES); - usemapsize = usemapsize >> (MAX_ORDER-1); + usemapsize = roundup(zonesize, pageblock_nr_pages); + usemapsize = usemapsize >> pageblock_order; usemapsize *= NR_PAGEBLOCK_BITS; usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long)); @@ -3359,6 +3274,27 @@ static void inline setup_usemap(struct pglist_data *pgdat, struct zone *zone, unsigned long zonesize) {} #endif /* CONFIG_SPARSEMEM */ +#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE +/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ +static inline void __init set_pageblock_order(unsigned int order) +{ + /* Check that pageblock_nr_pages has not already been setup */ + if (pageblock_order) + return; + + /* + * Assume the largest contiguous order of interest is a huge page. + * This value may be variable depending on boot parameters on IA64 + */ + pageblock_order = order; +} +#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ + +/* Defined this way to avoid accidently referencing HUGETLB_PAGE_ORDER */ +#define set_pageblock_order(x) do {} while (0) + +#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ + /* * Set up the zone data structures: * - mark all pages reserved @@ -3439,6 +3375,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (!size) continue; + set_pageblock_order(HUGETLB_PAGE_ORDER); setup_usemap(pgdat, zone, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); @@ -3698,7 +3635,7 @@ unsigned long __init find_max_pfn_with_active_regions(void) * Sum pages in active regions for movable zone. * Populate N_HIGH_MEMORY for calculating usable_nodes. */ -unsigned long __init early_calculate_totalpages(void) +static unsigned long __init early_calculate_totalpages(void) { int i; unsigned long totalpages = 0; @@ -4436,15 +4373,15 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM pfn &= (PAGES_PER_SECTION-1); - return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; + return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #else pfn = pfn - zone->zone_start_pfn; - return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; + return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #endif /* CONFIG_SPARSEMEM */ } /** - * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages + * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages * @page: The page within the block of interest * @start_bitidx: The first bit of interest to retrieve * @end_bitidx: The last bit of interest @@ -4472,7 +4409,7 @@ unsigned long get_pageblock_flags_group(struct page *page, } /** - * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages + * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages * @page: The page within the block of interest * @start_bitidx: The first bit of interest * @end_bitidx: The last bit of interest @@ -4497,3 +4434,93 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, else __clear_bit(bitidx + start_bitidx, bitmap); } + +/* + * This is designed as sub function...plz see page_isolation.c also. + * set/clear page block's type to be ISOLATE. + * page allocater never alloc memory from ISOLATE block. + */ + +int set_migratetype_isolate(struct page *page) +{ + struct zone *zone; + unsigned long flags; + int ret = -EBUSY; + + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + /* + * In future, more migrate types will be able to be isolation target. + */ + if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) + goto out; + set_pageblock_migratetype(page, MIGRATE_ISOLATE); + move_freepages_block(zone, page, MIGRATE_ISOLATE); + ret = 0; +out: + spin_unlock_irqrestore(&zone->lock, flags); + if (!ret) + drain_all_local_pages(); + return ret; +} + +void unset_migratetype_isolate(struct page *page) +{ + struct zone *zone; + unsigned long flags; + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + goto out; + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + move_freepages_block(zone, page, MIGRATE_MOVABLE); +out: + spin_unlock_irqrestore(&zone->lock, flags); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +/* + * All pages in the range must be isolated before calling this. + */ +void +__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *page; + struct zone *zone; + int order, i; + unsigned long pfn; + unsigned long flags; + /* find the first valid pfn */ + for (pfn = start_pfn; pfn < end_pfn; pfn++) + if (pfn_valid(pfn)) + break; + if (pfn == end_pfn) + return; + zone = page_zone(pfn_to_page(pfn)); + spin_lock_irqsave(&zone->lock, flags); + pfn = start_pfn; + while (pfn < end_pfn) { + if (!pfn_valid(pfn)) { + pfn++; + continue; + } + page = pfn_to_page(pfn); + BUG_ON(page_count(page)); + BUG_ON(!PageBuddy(page)); + order = page_order(page); +#ifdef CONFIG_DEBUG_VM + printk(KERN_INFO "remove from free list %lx %d %lx\n", + pfn, 1 << order, end_pfn); +#endif + list_del(&page->lru); + rmv_page_order(page); + zone->free_area[order].nr_free--; + __mod_zone_page_state(zone, NR_FREE_PAGES, + - (1UL << order)); + for (i = 0; i < (1 << order); i++) + SetPageReserved((page+i)); + pfn += (1 << order); + } + spin_unlock_irqrestore(&zone->lock, flags); +} +#endif