X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fvmscan.c;h=bbd194630c5b0c16dd550d21b81fb2b0865c3538;hb=467c996c1e1910633fa8e7adc9b052aa3ed5f97c;hp=a6e65d024995ad49a1e9675dea9d36b343ceda60;hpb=532df780a2012ad75b3f078647f229c4dabd99d1;p=linux-2.6 diff --git a/mm/vmscan.c b/mm/vmscan.c index a6e65d0249..bbd194630c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -932,6 +932,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, long mapped_ratio; long distress; long swap_tendency; + long imbalance; if (zone_is_near_oom(zone)) goto force_reclaim_mapped; @@ -966,6 +967,46 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, */ swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; + /* + * If there's huge imbalance between active and inactive + * (think active 100 times larger than inactive) we should + * become more permissive, or the system will take too much + * cpu before it start swapping during memory pressure. + * Distress is about avoiding early-oom, this is about + * making swappiness graceful despite setting it to low + * values. + * + * Avoid div by zero with nr_inactive+1, and max resulting + * value is vm_total_pages. + */ + imbalance = zone_page_state(zone, NR_ACTIVE); + imbalance /= zone_page_state(zone, NR_INACTIVE) + 1; + + /* + * Reduce the effect of imbalance if swappiness is low, + * this means for a swappiness very low, the imbalance + * must be much higher than 100 for this logic to make + * the difference. + * + * Max temporary value is vm_total_pages*100. + */ + imbalance *= (vm_swappiness + 1); + imbalance /= 100; + + /* + * If not much of the ram is mapped, makes the imbalance + * less relevant, it's high priority we refill the inactive + * list with mapped pages only in presence of high ratio of + * mapped pages. + * + * Max temporary value is vm_total_pages*100. + */ + imbalance *= mapped_ratio; + imbalance /= 100; + + /* apply imbalance feedback to swap_tendency */ + swap_tendency += imbalance; + /* * Now use this metric to decide whether to start moving mapped * memory onto the inactive list. @@ -1371,7 +1412,13 @@ loop_again: temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); - nr_reclaimed += shrink_zone(priority, zone, &sc); + /* + * We put equal pressure on every zone, unless one + * zone has way too many pages free already. + */ + if (!zone_watermark_ok(zone, order, 8*zone->pages_high, + end_zone, 0)) + nr_reclaimed += shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); @@ -1688,9 +1735,11 @@ static int __devinit cpu_callback(struct notifier_block *nfb, { pg_data_t *pgdat; cpumask_t mask; + int nid; if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { - for_each_online_pgdat(pgdat) { + for_each_node_state(nid, N_HIGH_MEMORY) { + pgdat = NODE_DATA(nid); mask = node_to_cpumask(pgdat->node_id); if (any_online_cpu(mask) != NR_CPUS) /* One of our CPUs online: restore mask */ @@ -1727,7 +1776,7 @@ static int __init kswapd_init(void) int nid; swap_setup(); - for_each_online_node(nid) + for_each_node_state(nid, N_HIGH_MEMORY) kswapd_run(nid); hotcpu_notifier(cpu_callback, 0); return 0; @@ -1847,7 +1896,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) { - cpumask_t mask; int node_id; /* @@ -1884,8 +1932,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * as wide as possible. */ node_id = zone_to_nid(zone); - mask = node_to_cpumask(node_id); - if (!cpus_empty(mask) && node_id != numa_node_id()) + if (node_state(node_id, N_CPU) && node_id != numa_node_id()) return 0; return __zone_reclaim(zone, gfp_mask, order); }