From: Chris Mason <mason@suse.com>
Date: Wed, 1 Feb 2006 11:06:49 +0000 (-0800)
Subject: [PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode
X-Git-Tag: v2.6.16-rc2~21
X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e0e851cf30f1a9bd2e2a7624e9810378d6a2b072;p=linux-2.6

[PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode

In data=journal mode, reiserfs writepage needs to make sure not to trigger
transactions while being run under PF_MEMALLOC.  This patch makes sure to
redirty the page instead of forcing a transaction start in this case.

Also, calling filemap_fdata* in order to trigger io on the block device can
cause lock inversions on the page lock.  Instead, do simple batching from
flush_commit_list.

Signed-off-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 60e2f23447..b33d67bba2 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2363,6 +2363,13 @@ static int reiserfs_write_full_page(struct page *page,
 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
 	th.t_trans_id = 0;
 
+	/* no logging allowed when nonblocking or from PF_MEMALLOC */
+	if (checked && (current->flags & PF_MEMALLOC)) {
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return 0;
+	}
+
 	/* The page dirty bit is cleared before writepage is called, which
 	 * means we have to tell create_empty_buffers to make dirty buffers
 	 * The page really should be up to date at this point, so tossing
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bc8fe963b3..1b2402a9a8 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -988,6 +988,7 @@ static int flush_commit_list(struct super_block *s,
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	int barrier = 0;
 	int retval = 0;
+	int write_len;
 
 	reiserfs_check_lock_depth(s, "flush_commit_list");
 
@@ -1037,16 +1038,24 @@ static int flush_commit_list(struct super_block *s,
 	BUG_ON(!list_empty(&jl->j_bh_list));
 	/*
 	 * for the description block and all the log blocks, submit any buffers
-	 * that haven't already reached the disk
+	 * that haven't already reached the disk.  Try to write at least 256
+	 * log blocks. later on, we will only wait on blocks that correspond
+	 * to this transaction, but while we're unplugging we might as well
+	 * get a chunk of data on there.
 	 */
 	atomic_inc(&journal->j_async_throttle);
-	for (i = 0; i < (jl->j_len + 1); i++) {
+	write_len = jl->j_len + 1;
+	if (write_len < 256)
+		write_len = 256;
+	for (i = 0 ; i < write_len ; i++) {
 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
 		    SB_ONDISK_JOURNAL_SIZE(s);
 		tbh = journal_find_get_block(s, bn);
-		if (buffer_dirty(tbh))	/* redundant, ll_rw_block() checks */
-			ll_rw_block(SWRITE, 1, &tbh);
-		put_bh(tbh);
+		if (tbh) {
+			if (buffer_dirty(tbh))
+			    ll_rw_block(WRITE, 1, &tbh) ;
+			put_bh(tbh) ;
+		}
 	}
 	atomic_dec(&journal->j_async_throttle);