X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fvmscan.c;h=518540a4a2a66a1ade20312d1d655ba6923e05b5;hb=92703eee4ccde3c55ee067a89c373e8a51a8adf9;hp=eca70310adb26239e94c5435eaf2abf0271c89fa;hpb=cdb8355add9b1d87ecfcb58b12879897dc1e3e36;p=linux-2.6 diff --git a/mm/vmscan.c b/mm/vmscan.c index eca70310ad..518540a4a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -378,6 +378,12 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) return PAGE_CLEAN; } +/* + * Attempt to detach a locked page from its ->mapping. If it is dirty or if + * someone else has a ref on the page, abort and return 0. If it was + * successfully detached, return 1. Assumes the caller has a single ref on + * this page. + */ int remove_mapping(struct address_space *mapping, struct page *page) { BUG_ON(!PageLocked(page)); @@ -717,6 +723,20 @@ done: return nr_reclaimed; } +/* + * We are about to scan this zone at a certain priority level. If that priority + * level is smaller (ie: more urgent) than the previous priority, then note + * that priority level within the zone. This is done so that when the next + * process comes in to scan this zone, it will immediately start out at this + * priority level rather than having to build up its own scanning priority. + * Here, this priority affects only the reclaim-mapped threshold. + */ +static inline void note_zone_scanning_priority(struct zone *zone, int priority) +{ + if (priority < zone->prev_priority) + zone->prev_priority = priority; +} + static inline int zone_is_near_oom(struct zone *zone) { return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; @@ -740,7 +760,7 @@ static inline int zone_is_near_oom(struct zone *zone) * But we had to alter page->flags anyway. */ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, - struct scan_control *sc) + struct scan_control *sc, int priority) { unsigned long pgmoved; int pgdeactivate = 0; @@ -764,7 +784,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * `distress' is a measure of how much trouble we're having * reclaiming pages. 0 -> no problems. 100 -> great trouble. */ - distress = 100 >> zone->prev_priority; + distress = 100 >> min(zone->prev_priority, priority); /* * The point of this algorithm is to decide when to start @@ -916,7 +936,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, nr_to_scan = min(nr_active, (unsigned long)sc->swap_cluster_max); nr_active -= nr_to_scan; - shrink_active_list(nr_to_scan, zone, sc); + shrink_active_list(nr_to_scan, zone, sc, priority); } if (nr_inactive) { @@ -966,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; - zone->temp_priority = priority; - if (zone->prev_priority > priority) - zone->prev_priority = priority; + note_zone_scanning_priority(zone, priority); if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ @@ -1018,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; - zone->temp_priority = DEF_PRIORITY; lru_pages += zone->nr_active + zone->nr_inactive; } @@ -1053,19 +1070,28 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) /* Take a nap, wait for some writeback to complete */ if (sc.nr_scanned && priority < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ if (!sc.all_unreclaimable) ret = 1; out: + /* + * Now that we've scanned all the zones at this priority level, note + * that level within the zone so that the next thread which performs + * scanning of this zone will immediately start out at this priority + * level. This affects only the decision whether or not to bring + * mapped pages onto the inactive list. + */ + if (priority < 0) + priority = 0; for (i = 0; zones[i] != 0; i++) { struct zone *zone = zones[i]; if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; - zone->prev_priority = zone->temp_priority; + zone->prev_priority = priority; } return ret; } @@ -1105,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) .swap_cluster_max = SWAP_CLUSTER_MAX, .swappiness = vm_swappiness, }; + /* + * temp_priority is used to remember the scanning priority at which + * this zone was successfully refilled to free_pages == pages_high. + */ + int temp_priority[MAX_NR_ZONES]; loop_again: total_scanned = 0; @@ -1112,11 +1143,8 @@ loop_again: sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - for (i = 0; i < pgdat->nr_zones; i++) { - struct zone *zone = pgdat->node_zones + i; - - zone->temp_priority = DEF_PRIORITY; - } + for (i = 0; i < pgdat->nr_zones; i++) + temp_priority[i] = DEF_PRIORITY; for (priority = DEF_PRIORITY; priority >= 0; priority--) { int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ @@ -1177,10 +1205,9 @@ scan: if (!zone_watermark_ok(zone, order, zone->pages_high, end_zone, 0)) all_zones_ok = 0; - zone->temp_priority = priority; - if (zone->prev_priority > priority) - zone->prev_priority = priority; + temp_priority[i] = priority; sc.nr_scanned = 0; + note_zone_scanning_priority(zone, priority); nr_reclaimed += shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, @@ -1208,7 +1235,7 @@ scan: * another pass across the zones. */ if (total_scanned && priority < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); /* * We do this so kswapd doesn't build up large priorities for @@ -1220,10 +1247,15 @@ scan: break; } out: + /* + * Note within each zone the priority level at which this zone was + * brought into a happy state. So that the next thread which scans this + * zone will start out at that priority level. + */ for (i = 0; i < pgdat->nr_zones; i++) { struct zone *zone = pgdat->node_zones + i; - zone->prev_priority = zone->temp_priority; + zone->prev_priority = temp_priority[i]; } if (!all_zones_ok) { cond_resched(); @@ -1352,7 +1384,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int pass, if (zone->nr_scan_active >= nr_pages || pass > 3) { zone->nr_scan_active = 0; nr_to_scan = min(nr_pages, zone->nr_active); - shrink_active_list(nr_to_scan, zone, sc); + shrink_active_list(nr_to_scan, zone, sc, prio); } } @@ -1452,7 +1484,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) goto out; if (sc.nr_scanned && prio < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ / 10); + congestion_wait(WRITE, HZ / 10); } lru_pages = 0; @@ -1608,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) */ priority = ZONE_RECLAIM_PRIORITY; do { + note_zone_scanning_priority(zone, priority); nr_reclaimed += shrink_zone(priority, zone, &sc); priority--; } while (priority >= 0 && nr_reclaimed < nr_pages);