[PATCH] re-export clear_page_dirty_for_io()

[linux-2.6] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index c62cadce04261e55ce1e7ce7ae15ae183006024a..28130541270f5ad35b3e18ac878b349c61c6e01f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -70,10 +70,13 @@ struct scan_control {
         unsigned int priority;
  
         /* This context's GFP mask */
-       unsigned int gfp_mask;
+       gfp_t gfp_mask;
  
         int may_writepage;
  
+       /* Can pages be swapped as part of reclaim? */
+       int may_swap;
+
         /* This context's SWAP_CLUSTER_MAX. If freeing memory for
          * suspend, we effectively ignore SWAP_CLUSTER_MAX.
          * In this context, it doesn't matter that we scan the
@@ -183,7 +186,7 @@ EXPORT_SYMBOL(remove_shrinker);
   *
   * Returns the number of slab objects which we shrunk.
   */
-static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
+static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                         unsigned long lru_pages)
  {
         struct shrinker *shrinker;
@@ -415,6 +418,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
                  * Try to allocate it some swap space here.
                  */
                 if (PageAnon(page) && !PageSwapCache(page)) {
+                       if (!sc->may_swap)
+                               goto keep_locked;
                         if (!add_to_swap(page))
                                 goto activate_locked;
                 }
@@ -508,14 +513,15 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
                  * PageDirty _after_ making sure that the page is freeable and
                  * not in use by anybody.       (pagecache + us == 2)
                  */
-               if (page_count(page) != 2 || PageDirty(page)) {
-                       write_unlock_irq(&mapping->tree_lock);
-                       goto keep_locked;
-               }
+               if (unlikely(page_count(page) != 2))
+                       goto cannot_free;
+               smp_rmb();
+               if (unlikely(PageDirty(page)))
+                       goto cannot_free;
  
  #ifdef CONFIG_SWAP
                 if (PageSwapCache(page)) {
-                       swp_entry_t swap = { .val = page->private };
+                       swp_entry_t swap = { .val = page_private(page) };
                         __delete_from_swap_cache(page);
                         write_unlock_irq(&mapping->tree_lock);
                         swap_free(swap);
@@ -535,6 +541,10 @@ free_it:
                         __pagevec_release_nonlru(&freed_pvec);
                 continue;
  
+cannot_free:
+               write_unlock_irq(&mapping->tree_lock);
+               goto keep_locked;
+
  activate_locked:
                 SetPageActive(page);
                 pgactivate++;
@@ -819,6 +829,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
         unsigned long nr_active;
         unsigned long nr_inactive;
  
+       atomic_inc(&zone->reclaim_in_progress);
+
         /*
          * Add one to `nr_to_scan' just to make sure that the kernel will
          * slowly sift through the active list.
@@ -858,6 +870,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
         }
  
         throttle_vm_writeout();
+
+       atomic_dec(&zone->reclaim_in_progress);
  }
  
  /*
@@ -887,7 +901,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
                 if (zone->present_pages == 0)
                         continue;
  
-               if (!cpuset_zone_allowed(zone))
+               if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                         continue;
  
                 zone->temp_priority = sc->priority;
@@ -914,8 +928,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
   * holds filesystem locks which prevent writeout this might not work, and the
   * allocation attempt will fail.
   */
-int try_to_free_pages(struct zone **zones,
-               unsigned int gfp_mask, unsigned int order)
+int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
  {
         int priority;
         int ret = 0;
@@ -927,13 +940,14 @@ int try_to_free_pages(struct zone **zones,
  
         sc.gfp_mask = gfp_mask;
         sc.may_writepage = 0;
+       sc.may_swap = 1;
  
         inc_page_state(allocstall);
  
         for (i = 0; zones[i] != NULL; i++) {
                 struct zone *zone = zones[i];
  
-               if (!cpuset_zone_allowed(zone))
+               if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                         continue;
  
                 zone->temp_priority = DEF_PRIORITY;
@@ -967,7 +981,7 @@ int try_to_free_pages(struct zone **zones,
                  * writeout.  So in laptop mode, write out the whole world.
                  */
                 if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) {
-                       wakeup_bdflush(laptop_mode ? 0 : total_scanned);
+                       wakeup_pdflush(laptop_mode ? 0 : total_scanned);
                         sc.may_writepage = 1;
                 }
  
@@ -979,7 +993,7 @@ out:
         for (i = 0; zones[i] != 0; i++) {
                 struct zone *zone = zones[i];
  
-               if (!cpuset_zone_allowed(zone))
+               if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                         continue;
  
                 zone->prev_priority = zone->temp_priority;
@@ -1027,6 +1041,7 @@ loop_again:
         total_reclaimed = 0;
         sc.gfp_mask = GFP_KERNEL;
         sc.may_writepage = 0;
+       sc.may_swap = 1;
         sc.nr_mapped = read_page_state(nr_mapped);
  
         inc_page_state(pageoutrun);
@@ -1059,7 +1074,7 @@ loop_again:
                                         continue;
  
                                 if (!zone_watermark_ok(zone, order,
-                                               zone->pages_high, 0, 0, 0)) {
+                                               zone->pages_high, 0, 0)) {
                                         end_zone = i;
                                         goto scan;
                                 }
@@ -1096,7 +1111,7 @@ scan:
  
                         if (nr_pages == 0) {    /* Not software suspend */
                                 if (!zone_watermark_ok(zone, order,
-                                               zone->pages_high, end_zone, 0, 0))
+                                               zone->pages_high, end_zone, 0))
                                         all_zones_ok = 0;
                         }
                         zone->temp_priority = priority;
@@ -1106,7 +1121,9 @@ scan:
                         sc.nr_reclaimed = 0;
                         sc.priority = priority;
                         sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
+                       atomic_inc(&zone->reclaim_in_progress);
                         shrink_zone(zone, &sc);
+                       atomic_dec(&zone->reclaim_in_progress);
                         reclaim_state->reclaimed_slab = 0;
                         nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
                                                 lru_pages);
@@ -1208,8 +1225,8 @@ static int kswapd(void *p)
         order = 0;
         for ( ; ; ) {
                 unsigned long new_order;
-               if (current->flags & PF_FREEZE)
-                       refrigerator(PF_FREEZE);
+
+               try_to_freeze();
  
                 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
                 new_order = pgdat->kswapd_max_order;
@@ -1242,15 +1259,15 @@ void wakeup_kswapd(struct zone *zone, int order)
                 return;
  
         pgdat = zone->zone_pgdat;
-       if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+       if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
                 return;
         if (pgdat->kswapd_max_order < order)
                 pgdat->kswapd_max_order = order;
-       if (!cpuset_zone_allowed(zone))
+       if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                 return;
-       if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
+       if (!waitqueue_active(&pgdat->kswapd_wait))
                 return;
-       wake_up_interruptible(&zone->zone_pgdat->kswapd_wait);
+       wake_up_interruptible(&pgdat->kswapd_wait);
  }
  
  #ifdef CONFIG_PM
@@ -1318,3 +1335,75 @@ static int __init kswapd_init(void)
  }
  
  module_init(kswapd_init)
+
+
+/*
+ * Try to free up some pages from this zone through reclaim.
+ */
+int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+{
+       struct scan_control sc;
+       int nr_pages = 1 << order;
+       int total_reclaimed = 0;
+
+       /* The reclaim may sleep, so don't do it if sleep isn't allowed */
+       if (!(gfp_mask & __GFP_WAIT))
+               return 0;
+       if (zone->all_unreclaimable)
+               return 0;
+
+       sc.gfp_mask = gfp_mask;
+       sc.may_writepage = 0;
+       sc.may_swap = 0;
+       sc.nr_mapped = read_page_state(nr_mapped);
+       sc.nr_scanned = 0;
+       sc.nr_reclaimed = 0;
+       /* scan at the highest priority */
+       sc.priority = 0;
+
+       if (nr_pages > SWAP_CLUSTER_MAX)
+               sc.swap_cluster_max = nr_pages;
+       else
+               sc.swap_cluster_max = SWAP_CLUSTER_MAX;
+
+       /* Don't reclaim the zone if there are other reclaimers active */
+       if (atomic_read(&zone->reclaim_in_progress) > 0)
+               goto out;
+
+       shrink_zone(zone, &sc);
+       total_reclaimed = sc.nr_reclaimed;
+
+ out:
+       return total_reclaimed;
+}
+
+asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone,
+                                    unsigned int state)
+{
+       struct zone *z;
+       int i;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (node >= MAX_NUMNODES || !node_online(node))
+               return -EINVAL;
+
+       /* This will break if we ever add more zones */
+       if (!(zone & (1<<ZONE_DMA|1<<ZONE_NORMAL|1<<ZONE_HIGHMEM)))
+               return -EINVAL;
+
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               if (!(zone & 1<<i))
+                       continue;
+
+               z = &NODE_DATA(node)->node_zones[i];
+
+               if (state)
+                       z->reclaim_pages = 1;
+               else
+                       z->reclaim_pages = 0;
+       }
+
+       return 0;
+}