/*
* The maximum number of pages to writeout in a single bdflush/kupdate
- * operation. We do this so we don't hold I_LOCK against an inode for
+ * operation. We do this so we don't hold I_SYNC against an inode for
* enormous amounts of time, which would block a userspace task which has
* been forced to throttle against that inode. Also, the code reevaluates
* the dirty each time it has written this many pages.
*/
int dirty_background_ratio = 5;
+/*
+ * free highmem will not be subtracted from the total free memory
+ * for calculating free ratios if vm_highmem_is_dirtyable is true
+ */
+int vm_highmem_is_dirtyable;
+
/*
* The generator of dirty data starts writeback at this percentage
*/
*/
static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
{
- __prop_inc_percpu(&vm_completions, &bdi->completions);
+ __prop_inc_percpu_max(&vm_completions, &bdi->completions,
+ bdi->max_prop_frac);
+}
+
+void bdi_writeout_inc(struct backing_dev_info *bdi)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __bdi_writeout_inc(bdi);
+ local_irq_restore(flags);
}
+EXPORT_SYMBOL_GPL(bdi_writeout_inc);
static inline void task_dirty_inc(struct task_struct *tsk)
{
avail_dirty = dirty -
(global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_WRITEBACK) +
- global_page_state(NR_UNSTABLE_NFS));
+ global_page_state(NR_UNSTABLE_NFS) +
+ global_page_state(NR_WRITEBACK_TEMP));
if (avail_dirty < 0)
avail_dirty = 0;
*
* dirty -= (dirty/8) * p_{t}
*/
-void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+static void task_dirty_limit(struct task_struct *tsk, long *pdirty)
{
long numerator, denominator;
long dirty = *pdirty;
*pdirty = dirty;
}
+/*
+ *
+ */
+static DEFINE_SPINLOCK(bdi_lock);
+static unsigned int bdi_min_ratio;
+
+int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bdi_lock, flags);
+ if (min_ratio > bdi->max_ratio) {
+ ret = -EINVAL;
+ } else {
+ min_ratio -= bdi->min_ratio;
+ if (bdi_min_ratio + min_ratio < 100) {
+ bdi_min_ratio += min_ratio;
+ bdi->min_ratio += min_ratio;
+ } else {
+ ret = -EINVAL;
+ }
+ }
+ spin_unlock_irqrestore(&bdi_lock, flags);
+
+ return ret;
+}
+
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (max_ratio > 100)
+ return -EINVAL;
+
+ spin_lock_irqsave(&bdi_lock, flags);
+ if (bdi->min_ratio > max_ratio) {
+ ret = -EINVAL;
+ } else {
+ bdi->max_ratio = max_ratio;
+ bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
+ }
+ spin_unlock_irqrestore(&bdi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(bdi_set_max_ratio);
+
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
x = global_page_state(NR_FREE_PAGES)
+ global_page_state(NR_INACTIVE)
+ global_page_state(NR_ACTIVE);
- x -= highmem_dirtyable_memory(x);
+
+ if (!vm_highmem_is_dirtyable)
+ x -= highmem_dirtyable_memory(x);
+
return x + 1; /* Ensure that we never return 0 */
}
-static void
+void
get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
struct backing_dev_info *bdi)
{
int background_ratio; /* Percentages */
int dirty_ratio;
- int unmapped_ratio;
long background;
long dirty;
unsigned long available_memory = determine_dirtyable_memory();
struct task_struct *tsk;
- unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
- global_page_state(NR_ANON_PAGES)) * 100) /
- available_memory;
-
dirty_ratio = vm_dirty_ratio;
- if (dirty_ratio > unmapped_ratio / 2)
- dirty_ratio = unmapped_ratio / 2;
-
if (dirty_ratio < 5)
dirty_ratio = 5;
*pdirty = dirty;
if (bdi) {
- u64 bdi_dirty = dirty;
+ u64 bdi_dirty;
long numerator, denominator;
/*
*/
bdi_writeout_fraction(bdi, &numerator, &denominator);
+ bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
bdi_dirty *= numerator;
do_div(bdi_dirty, denominator);
+ bdi_dirty += (dirty * bdi->min_ratio) / 100;
+ if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
+ bdi_dirty = dirty * bdi->max_ratio / 100;
*pbdi_dirty = bdi_dirty;
clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
*/
static void balance_dirty_pages(struct address_space *mapping)
{
- long bdi_nr_reclaimable;
- long bdi_nr_writeback;
+ long nr_reclaimable, bdi_nr_reclaimable;
+ long nr_writeback, bdi_nr_writeback;
long background_thresh;
long dirty_thresh;
long bdi_thresh;
get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);
+
+ nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ nr_writeback = global_page_state(NR_WRITEBACK);
+
bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
break;
+ /*
+ * Throttle it only when the background writeback cannot
+ * catch-up. This avoids (excessively) small writeouts
+ * when the bdi limits are ramping up.
+ */
+ if (nr_reclaimable + nr_writeback <
+ (background_thresh + dirty_thresh) / 2)
+ break;
+
if (!bdi->dirty_exceeded)
bdi->dirty_exceeded = 1;
long background_thresh;
long dirty_thresh;
- if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
- /*
- * The caller might hold locks which can prevent IO completion
- * or progress in the filesystem. So we cannot just sit here
- * waiting for IO to complete.
- */
- congestion_wait(WRITE, HZ/10);
- return;
- }
-
for ( ; ; ) {
get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
global_page_state(NR_WRITEBACK) <= dirty_thresh)
break;
congestion_wait(WRITE, HZ/10);
+
+ /*
+ * The caller might hold locks which can prevent IO completion
+ * or progress in the filesystem. So we cannot just sit here
+ * waiting for IO to complete.
+ */
+ if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
+ break;
}
}
global_page_state(NR_UNSTABLE_NFS) < background_thresh
&& min_pages <= 0)
break;
+ wbc.more_io = 0;
wbc.encountered_congestion = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
/* Wrote less than expected */
- congestion_wait(WRITE, HZ/10);
- if (!wbc.encountered_congestion)
+ if (wbc.encountered_congestion || wbc.more_io)
+ congestion_wait(WRITE, HZ/10);
+ else
break;
}
}
global_page_state(NR_UNSTABLE_NFS) +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
while (nr_to_write > 0) {
+ wbc.more_io = 0;
wbc.encountered_congestion = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
writeback_inodes(&wbc);
if (wbc.nr_to_write > 0) {
- if (wbc.encountered_congestion)
+ if (wbc.encountered_congestion || wbc.more_io)
congestion_wait(WRITE, HZ/10);
else
break; /* All the old data is written */
* mapping is pinned by the vma's ->vm_file reference.
*
* We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() insode tree_lock.
+ * mapping by re-checking page_mapping() inside tree_lock.
*/
int __set_page_dirty_nobuffers(struct page *page)
{
return 0;
}
-int fastcall set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
int ret = __set_page_dirty(page);
if (ret)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_writeback_dirty(bdi)) {
+ if (bdi_cap_account_writeback(bdi)) {
__dec_bdi_stat(bdi, BDI_WRITEBACK);
__bdi_writeout_inc(bdi);
}
radix_tree_tag_set(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_writeback_dirty(bdi))
+ if (bdi_cap_account_writeback(bdi))
__inc_bdi_stat(bdi, BDI_WRITEBACK);
}
if (!PageDirty(page))