/*
* The maximum number of pages to writeout in a single bdflush/kupdate
- * operation. We do this so we don't hold I_LOCK against an inode for
+ * operation. We do this so we don't hold I_SYNC against an inode for
* enormous amounts of time, which would block a userspace task which has
* been forced to throttle against that inode. Also, the code reevaluates
* the dirty each time it has written this many pages.
*
*/
static struct prop_descriptor vm_completions;
+static struct prop_descriptor vm_dirties;
static unsigned long determine_dirtyable_memory(void);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
int shift = calc_period_shift();
prop_change_shift(&vm_completions, shift);
+ prop_change_shift(&vm_dirties, shift);
}
return ret;
}
__prop_inc_percpu(&vm_completions, &bdi->completions);
}
+static inline void task_dirty_inc(struct task_struct *tsk)
+{
+ prop_inc_single(&vm_dirties, &tsk->dirties);
+}
+
/*
* Obtain an accurate fraction of the BDI's portion.
*/
*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
}
+static inline void task_dirties_fraction(struct task_struct *tsk,
+ long *numerator, long *denominator)
+{
+ prop_fraction_single(&vm_dirties, &tsk->dirties,
+ numerator, denominator);
+}
+
+/*
+ * scale the dirty limit
+ *
+ * task specific dirty limit:
+ *
+ * dirty -= (dirty/8) * p_{t}
+ */
+void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+{
+ long numerator, denominator;
+ long dirty = *pdirty;
+ u64 inv = dirty >> 3;
+
+ task_dirties_fraction(tsk, &numerator, &denominator);
+ inv *= numerator;
+ do_div(inv, denominator);
+
+ dirty -= inv;
+ if (dirty < *pdirty/2)
+ dirty = *pdirty/2;
+
+ *pdirty = dirty;
+}
+
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
{
int background_ratio; /* Percentages */
int dirty_ratio;
- int unmapped_ratio;
long background;
long dirty;
unsigned long available_memory = determine_dirtyable_memory();
struct task_struct *tsk;
- unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
- global_page_state(NR_ANON_PAGES)) * 100) /
- available_memory;
-
dirty_ratio = vm_dirty_ratio;
- if (dirty_ratio > unmapped_ratio / 2)
- dirty_ratio = unmapped_ratio / 2;
-
if (dirty_ratio < 5)
dirty_ratio = 5;
*pbdi_dirty = bdi_dirty;
clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
+ task_dirty_limit(current, pbdi_dirty);
}
}
*/
static void balance_dirty_pages(struct address_space *mapping)
{
- long bdi_nr_reclaimable;
- long bdi_nr_writeback;
+ long nr_reclaimable, bdi_nr_reclaimable;
+ long nr_writeback, bdi_nr_writeback;
long background_thresh;
long dirty_thresh;
long bdi_thresh;
get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);
+
+ nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ nr_writeback = global_page_state(NR_WRITEBACK);
+
bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
break;
+ /*
+ * Throttle it only when the background writeback cannot
+ * catch-up. This avoids (excessively) small writeouts
+ * when the bdi limits are ramping up.
+ */
+ if (nr_reclaimable + nr_writeback <
+ (background_thresh + dirty_thresh) / 2)
+ break;
+
if (!bdi->dirty_exceeded)
bdi->dirty_exceeded = 1;
long background_thresh;
long dirty_thresh;
- if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
- /*
- * The caller might hold locks which can prevent IO completion
- * or progress in the filesystem. So we cannot just sit here
- * waiting for IO to complete.
- */
- congestion_wait(WRITE, HZ/10);
- return;
- }
-
for ( ; ; ) {
get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
global_page_state(NR_WRITEBACK) <= dirty_thresh)
break;
congestion_wait(WRITE, HZ/10);
+
+ /*
+ * The caller might hold locks which can prevent IO completion
+ * or progress in the filesystem. So we cannot just sit here
+ * waiting for IO to complete.
+ */
+ if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
+ break;
}
}
shift = calc_period_shift();
prop_descriptor_init(&vm_completions, shift);
+ prop_descriptor_init(&vm_dirties, shift);
}
/**
ret = (*writepage)(page, wbc, data);
- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+ if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
unlock_page(page);
+ ret = 0;
+ }
if (ret || (--(wbc->nr_to_write) <= 0))
done = 1;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
* mapping is pinned by the vma's ->vm_file reference.
*
* We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() insode tree_lock.
+ * mapping by re-checking page_mapping() inside tree_lock.
*/
int __set_page_dirty_nobuffers(struct page *page)
{
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-int fastcall set_page_dirty(struct page *page)
+static int __set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
}
return 0;
}
+
+int fastcall set_page_dirty(struct page *page)
+{
+ int ret = __set_page_dirty(page);
+ if (ret)
+ task_dirty_inc(current);
+ return ret;
+}
EXPORT_SYMBOL(set_page_dirty);
/*