#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
+#include <linux/vmstat.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
-
+ inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
BUG_ON(mapping != page_mapping(page));
write_lock_irq(&mapping->tree_lock);
-
/*
- * The non-racy check for busy page. It is critical to check
- * PageDirty _after_ making sure that the page is freeable and
- * not in use by anybody. (pagecache + us == 2)
+ * The non racy check for a busy page.
+ *
+ * Must be careful with the order of the tests. When someone has
+ * a ref to the page, it may be possible that they dirty it then
+ * drop the reference. So if PageDirty is tested before page_count
+ * here, then the following race may occur:
+ *
+ * get_user_pages(&page);
+ * [user mapping goes away]
+ * write_to(page);
+ * !PageDirty(page) [good]
+ * SetPageDirty(page);
+ * put_page(page);
+ * !page_count(page) [good, discard it]
+ *
+ * [oops, our write_to data is lost]
+ *
+ * Reversing the order of the tests ensures such a situation cannot
+ * escape unnoticed. The smp_rmb is needed to ensure the page->flags
+ * load is not satisfied before that of page->_count.
+ *
+ * Note that if SetPageDirty is always performed via set_page_dirty,
+ * and thus under tree_lock, then this ordering is not required.
*/
if (unlikely(page_count(page) != 2))
goto cannot_free;
for_each_zone(zone)
lru_pages += zone->nr_active + zone->nr_inactive;
- nr_slab = global_page_state(NR_SLAB);
+ nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
reclaim_state.reclaimed_slab = 0;
#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
-#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
/*
* Priority for ZONE_RECLAIM. This determines the fraction of pages
*/
int sysctl_min_unmapped_ratio = 1;
+/*
+ * If the number of slab pages in a zone grows beyond this percentage then
+ * slab reclaim needs to occur.
+ */
+int sysctl_min_slab_ratio = 5;
+
/*
* Try to free up some pages from this zone through reclaim.
*/
.gfp_mask = gfp_mask,
.swappiness = vm_swappiness,
};
+ unsigned long slab_reclaimable;
disable_swap_token();
cond_resched();
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- /*
- * Free memory by calling shrink zone with increasing priorities
- * until we have enough memory freed.
- */
- priority = ZONE_RECLAIM_PRIORITY;
- do {
- nr_reclaimed += shrink_zone(priority, zone, &sc);
- priority--;
- } while (priority >= 0 && nr_reclaimed < nr_pages);
+ if (zone_page_state(zone, NR_FILE_PAGES) -
+ zone_page_state(zone, NR_FILE_MAPPED) >
+ zone->min_unmapped_pages) {
+ /*
+ * Free memory by calling shrink zone with increasing
+ * priorities until we have enough memory freed.
+ */
+ priority = ZONE_RECLAIM_PRIORITY;
+ do {
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
+ priority--;
+ } while (priority >= 0 && nr_reclaimed < nr_pages);
+ }
- if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
+ slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+ if (slab_reclaimable > zone->min_slab_pages) {
/*
* shrink_slab() does not currently allow us to determine how
- * many pages were freed in this zone. So we just shake the slab
- * a bit and then go off node for this particular allocation
- * despite possibly having freed enough memory to allocate in
- * this zone. If we freed local memory then the next
- * allocations will be local again.
+ * many pages were freed in this zone. So we take the current
+ * number of slab pages and shake the slab until it is reduced
+ * by the same nr_pages that we used for reclaiming unmapped
+ * pages.
*
- * shrink_slab will free memory on all zones and may take
- * a long time.
+ * Note that shrink_slab will free memory on all zones and may
+ * take a long time.
+ */
+ while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
+ zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
+ slab_reclaimable - nr_pages)
+ ;
+
+ /*
+ * Update nr_reclaimed by the number of slab pages we
+ * reclaimed from this zone.
*/
- shrink_slab(sc.nr_scanned, gfp_mask, order);
+ nr_reclaimed += slab_reclaimable -
+ zone_page_state(zone, NR_SLAB_RECLAIMABLE);
}
p->reclaim_state = NULL;
int node_id;
/*
- * Zone reclaim reclaims unmapped file backed pages.
+ * Zone reclaim reclaims unmapped file backed pages and
+ * slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
* unmapped file backed pages.
*/
if (zone_page_state(zone, NR_FILE_PAGES) -
- zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
+ zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
+ && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
+ <= zone->min_slab_pages)
return 0;
/*
* over remote processors and spread off node memory allocations
* as wide as possible.
*/
- node_id = zone->zone_pgdat->node_id;
+ node_id = zone_to_nid(zone);
mask = node_to_cpumask(node_id);
if (!cpus_empty(mask) && node_id != numa_node_id())
return 0;