X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fvmscan.c;h=0095533cdde963143f7988c048a110519cc50a31;hb=7bf07f3d4b4358aa6d99a26d7a0165f1e91c3fcc;hp=269eded9b459804a8f090ea8dad0b908a4afef0d;hpb=323aca6c0bda611d0f31b3234d9fe291d31a9207;p=linux-2.6 diff --git a/mm/vmscan.c b/mm/vmscan.c index 269eded9b4..0095533cdd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -74,6 +74,9 @@ struct scan_control { int may_writepage; + /* Can pages be swapped as part of reclaim? */ + int may_swap; + /* This context's SWAP_CLUSTER_MAX. If freeing memory for * suspend, we effectively ignore SWAP_CLUSTER_MAX. * In this context, it doesn't matter that we scan the @@ -180,17 +183,20 @@ EXPORT_SYMBOL(remove_shrinker); * `lru_pages' represents the number of on-LRU pages in all the zones which * are eligible for the caller's allocation attempt. It is used for balancing * slab reclaim versus page reclaim. + * + * Returns the number of slab objects which we shrunk. */ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, unsigned long lru_pages) { struct shrinker *shrinker; + int ret = 0; if (scanned == 0) scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) - return 0; + return 1; /* Assume we'll be able to shrink next time */ list_for_each_entry(shrinker, &shrinker_list, list) { unsigned long long delta; @@ -209,10 +215,14 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, while (total_scan >= SHRINK_BATCH) { long this_scan = SHRINK_BATCH; int shrink_ret; + int nr_before; + nr_before = (*shrinker->shrinker)(0, gfp_mask); shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask); if (shrink_ret == -1) break; + if (shrink_ret < nr_before) + ret += nr_before - shrink_ret; mod_page_state(slabs_scanned, this_scan); total_scan -= this_scan; @@ -222,7 +232,7 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, shrinker->nr += total_scan; } up_read(&shrinker_rwsem); - return 0; + return ret; } /* Called without lock on whether page is mapped, so answer is unstable */ @@ -407,7 +417,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * Anonymous process memory has backing store? * Try to allocate it some swap space here. */ - if (PageAnon(page) && !PageSwapCache(page)) { + if (PageAnon(page) && !PageSwapCache(page) && sc->may_swap) { if (!add_to_swap(page)) goto activate_locked; } @@ -812,6 +822,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc) unsigned long nr_active; unsigned long nr_inactive; + atomic_inc(&zone->reclaim_in_progress); + /* * Add one to `nr_to_scan' just to make sure that the kernel will * slowly sift through the active list. @@ -851,6 +863,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc) } throttle_vm_writeout(); + + atomic_dec(&zone->reclaim_in_progress); } /* @@ -907,8 +921,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc) * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. */ -int try_to_free_pages(struct zone **zones, - unsigned int gfp_mask, unsigned int order) +int try_to_free_pages(struct zone **zones, unsigned int gfp_mask) { int priority; int ret = 0; @@ -920,6 +933,7 @@ int try_to_free_pages(struct zone **zones, sc.gfp_mask = gfp_mask; sc.may_writepage = 0; + sc.may_swap = 1; inc_page_state(allocstall); @@ -960,7 +974,7 @@ int try_to_free_pages(struct zone **zones, * writeout. So in laptop mode, write out the whole world. */ if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) { - wakeup_bdflush(laptop_mode ? 0 : total_scanned); + wakeup_pdflush(laptop_mode ? 0 : total_scanned); sc.may_writepage = 1; } @@ -1020,6 +1034,7 @@ loop_again: total_reclaimed = 0; sc.gfp_mask = GFP_KERNEL; sc.may_writepage = 0; + sc.may_swap = 1; sc.nr_mapped = read_page_state(nr_mapped); inc_page_state(pageoutrun); @@ -1079,6 +1094,7 @@ scan: */ for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; + int nr_slab; if (zone->present_pages == 0) continue; @@ -1098,16 +1114,19 @@ scan: sc.nr_reclaimed = 0; sc.priority = priority; sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; + atomic_inc(&zone->reclaim_in_progress); shrink_zone(zone, &sc); + atomic_dec(&zone->reclaim_in_progress); reclaim_state->reclaimed_slab = 0; - shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); + nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, + lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_reclaimed += sc.nr_reclaimed; total_scanned += sc.nr_scanned; if (zone->all_unreclaimable) continue; - if (zone->pages_scanned >= (zone->nr_active + - zone->nr_inactive) * 4) + if (nr_slab == 0 && zone->pages_scanned >= + (zone->nr_active + zone->nr_inactive) * 4) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and @@ -1199,8 +1218,8 @@ static int kswapd(void *p) order = 0; for ( ; ; ) { unsigned long new_order; - if (current->flags & PF_FREEZE) - refrigerator(PF_FREEZE); + + try_to_freeze(); prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); new_order = pgdat->kswapd_max_order; @@ -1309,3 +1328,75 @@ static int __init kswapd_init(void) } module_init(kswapd_init) + + +/* + * Try to free up some pages from this zone through reclaim. + */ +int zone_reclaim(struct zone *zone, unsigned int gfp_mask, unsigned int order) +{ + struct scan_control sc; + int nr_pages = 1 << order; + int total_reclaimed = 0; + + /* The reclaim may sleep, so don't do it if sleep isn't allowed */ + if (!(gfp_mask & __GFP_WAIT)) + return 0; + if (zone->all_unreclaimable) + return 0; + + sc.gfp_mask = gfp_mask; + sc.may_writepage = 0; + sc.may_swap = 0; + sc.nr_mapped = read_page_state(nr_mapped); + sc.nr_scanned = 0; + sc.nr_reclaimed = 0; + /* scan at the highest priority */ + sc.priority = 0; + + if (nr_pages > SWAP_CLUSTER_MAX) + sc.swap_cluster_max = nr_pages; + else + sc.swap_cluster_max = SWAP_CLUSTER_MAX; + + /* Don't reclaim the zone if there are other reclaimers active */ + if (atomic_read(&zone->reclaim_in_progress) > 0) + goto out; + + shrink_zone(zone, &sc); + total_reclaimed = sc.nr_reclaimed; + + out: + return total_reclaimed; +} + +asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone, + unsigned int state) +{ + struct zone *z; + int i; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (node >= MAX_NUMNODES || !node_online(node)) + return -EINVAL; + + /* This will break if we ever add more zones */ + if (!(zone & (1<node_zones[i]; + + if (state) + z->reclaim_pages = 1; + else + z->reclaim_pages = 0; + } + + return 0; +}