ext4: Use new framework for data=ordered mode in JBD2

author Jan Kara <jack@suse.cz>

Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)

committer Theodore Ts'o <tytso@mit.edu>

Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)
author Jan Kara <jack@suse.cz>
Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)
committer Theodore Ts'o <tytso@mit.edu>
Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h

index abf2744164e07202d64acc462910c8f65cdc776d..c2903ef72159f0d435127e3dfc3703dff701eabd 100644 (file)
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -150,6 +150,7 @@ struct ext4_inode_info {
          */
         struct rw_semaphore i_data_sem;
         struct inode vfs_inode;
+       struct jbd2_inode jinode;
  
         unsigned long i_ext_generation;
         struct ext4_ext_cache i_cached_extent;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h

index d0aa9ee20f88fc999f5e009864b96b5a9005ecb3..eb8bc3afe6e9f590f491bf9098980f39f27bf245 100644 (file)
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -154,8 +154,6 @@ int __ext4_journal_dirty_metadata(const char *where,
  #define ext4_journal_forget(handle, bh) \
         __ext4_journal_forget(__func__, (handle), (bh))
  
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
-
  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
  int __ext4_journal_stop(const char *where, handle_t *handle);
  
@@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal)
         return jbd2_journal_force_commit(journal);
  }
  
+static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
+{
+       return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+}
+
  /* super.c */
  int ext4_force_commit(struct super_block *sb);
  
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 320acb6c35bfc356243b30a3ff8f72e66d4b913d..7b9569179fdf51041e08e4982189dea26d1ad6cf 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,13 @@
  #include "xattr.h"
  #include "acl.h"
  
+static inline int ext4_begin_ordered_truncate(struct inode *inode,
+                                             loff_t new_size)
+{
+       return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
+                                                  new_size);
+}
+
  /*
   * Test whether an inode is a fast symlink.
   */
@@ -181,6 +188,8 @@ void ext4_delete_inode (struct inode * inode)
  {
         handle_t *handle;
  
+       if (ext4_should_order_data(inode))
+               ext4_begin_ordered_truncate(inode, 0);
         truncate_inode_pages(&inode->i_data, 0);
  
         if (is_bad_inode(inode))
@@ -1273,15 +1282,6 @@ out:
         return ret;
  }
  
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
-{
-       int err = jbd2_journal_dirty_data(handle, bh);
-       if (err)
-               ext4_journal_abort_handle(__func__, __func__,
-                                               bh, handle, err);
-       return err;
-}
-
  /* For write_end() in data=journal mode */
  static int write_end_fn(handle_t *handle, struct buffer_head *bh)
  {
@@ -1311,8 +1311,7 @@ static int ext4_ordered_write_end(struct file *file,
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
  
-       ret = walk_page_buffers(handle, page_buffers(page),
-               from, to, NULL, ext4_journal_dirty_data);
+       ret = ext4_jbd2_file_inode(handle, inode);
  
         if (ret == 0) {
                 /*
@@ -1472,25 +1471,22 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
         return 0;
  }
  
-static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
-{
-       if (buffer_mapped(bh))
-               return ext4_journal_dirty_data(handle, bh);
-       return 0;
-}
-
  static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
  {
         return !buffer_mapped(bh) || buffer_delay(bh);
  }
  
  /*
- * Note that we don't need to start a transaction unless we're journaling
- * data because we should have holes filled from ext4_page_mkwrite(). If
- * we are journaling data, we cannot start transaction directly because
- * transaction start ranks above page lock so we have to do some magic...
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
   *
- * In all journalling modes block_write_full_page() will start the I/O.
+ * In all journaling modes block_write_full_page() will start the I/O.
   *
   * Problem:
   *
@@ -1533,86 +1529,7 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
   * us.
   *
   */
-static int __ext4_ordered_writepage(struct page *page,
-                               struct writeback_control *wbc)
-{
-       struct inode *inode = page->mapping->host;
-       struct buffer_head *page_bufs;
-       handle_t *handle = NULL;
-       int ret = 0;
-       int err;
-
-       if (!page_has_buffers(page)) {
-               create_empty_buffers(page, inode->i_sb->s_blocksize,
-                               (1 << BH_Dirty)|(1 << BH_Uptodate));
-       }
-       page_bufs = page_buffers(page);
-       walk_page_buffers(handle, page_bufs, 0,
-                       PAGE_CACHE_SIZE, NULL, bget_one);
-
-       ret = block_write_full_page(page, ext4_get_block, wbc);
-
-       /*
-        * The page can become unlocked at any point now, and
-        * truncate can then come in and change things.  So we
-        * can't touch *page from now on.  But *page_bufs is
-        * safe due to elevated refcount.
-        */
-
-       /*
-        * And attach them to the current transaction.  But only if
-        * block_write_full_page() succeeded.  Otherwise they are unmapped,
-        * and generally junk.
-        */
-       if (ret == 0) {
-               handle = ext4_journal_start(inode,
-                                       ext4_writepage_trans_blocks(inode));
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out_put;
-               }
-
-               ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
-                                       NULL, jbd2_journal_dirty_data_fn);
-               err = ext4_journal_stop(handle);
-               if (!ret)
-                       ret = err;
-       }
-out_put:
-       walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
-                         bput_one);
-       return ret;
-}
-
-static int ext4_ordered_writepage(struct page *page,
-                               struct writeback_control *wbc)
-{
-       struct inode *inode = page->mapping->host;
-       loff_t size = i_size_read(inode);
-       loff_t len;
-
-       J_ASSERT(PageLocked(page));
-       J_ASSERT(page_has_buffers(page));
-       if (page->index == size >> PAGE_CACHE_SHIFT)
-               len = size & ~PAGE_CACHE_MASK;
-       else
-               len = PAGE_CACHE_SIZE;
-       BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-                                ext4_bh_unmapped_or_delay));
-
-       /*
-        * We give up here if we're reentered, because it might be for a
-        * different filesystem.
-        */
-       if (!ext4_journal_current_handle())
-               return __ext4_ordered_writepage(page, wbc);
-
-       redirty_page_for_writepage(wbc, page);
-       unlock_page(page);
-       return 0;
-}
-
-static int __ext4_writeback_writepage(struct page *page,
+static int __ext4_normal_writepage(struct page *page,
                                 struct writeback_control *wbc)
  {
         struct inode *inode = page->mapping->host;
@@ -1624,7 +1541,7 @@ static int __ext4_writeback_writepage(struct page *page,
  }
  
  
-static int ext4_writeback_writepage(struct page *page,
+static int ext4_normal_writepage(struct page *page,
                                 struct writeback_control *wbc)
  {
         struct inode *inode = page->mapping->host;
@@ -1641,7 +1558,7 @@ static int ext4_writeback_writepage(struct page *page,
                                  ext4_bh_unmapped_or_delay));
  
         if (!ext4_journal_current_handle())
-               return __ext4_writeback_writepage(page, wbc);
+               return __ext4_normal_writepage(page, wbc);
  
         redirty_page_for_writepage(wbc, page);
         unlock_page(page);
@@ -1877,7 +1794,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
  static const struct address_space_operations ext4_ordered_aops = {
         .readpage       = ext4_readpage,
         .readpages      = ext4_readpages,
-       .writepage      = ext4_ordered_writepage,
+       .writepage      = ext4_normal_writepage,
         .sync_page      = block_sync_page,
         .write_begin    = ext4_write_begin,
         .write_end      = ext4_ordered_write_end,
@@ -1891,7 +1808,7 @@ static const struct address_space_operations ext4_ordered_aops = {
  static const struct address_space_operations ext4_writeback_aops = {
         .readpage       = ext4_readpage,
         .readpages      = ext4_readpages,
-       .writepage      = ext4_writeback_writepage,
+       .writepage      = ext4_normal_writepage,
         .sync_page      = block_sync_page,
         .write_begin    = ext4_write_begin,
         .write_end      = ext4_writeback_write_end,
@@ -2019,7 +1936,7 @@ int ext4_block_truncate_page(handle_t *handle,
                 err = ext4_journal_dirty_metadata(handle, bh);
         } else {
                 if (ext4_should_order_data(inode))
-                       err = ext4_journal_dirty_data(handle, bh);
+                       err = ext4_jbd2_file_inode(handle, inode);
                 mark_buffer_dirty(bh);
         }
  
@@ -3171,7 +3088,14 @@ int ext4_write_inode(struct inode *inode, int wait)
   * be freed, so we have a strong guarantee that no future commit will
   * leave these blocks visible to the user.)
   *
- * Called with inode->sem down.
+ * Another thing we have to assure is that if we are in ordered mode
+ * and inode is still attached to the committing transaction, we must
+ * we start writeout of all the dirty pages which are being truncated.
+ * This way we are sure that all the data written in the previous
+ * transaction are already on disk (truncate waits for pages under
+ * writeback).
+ *
+ * Called with inode->i_mutex down.
   */
  int ext4_setattr(struct dentry *dentry, struct iattr *attr)
  {
@@ -3237,6 +3161,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 if (!error)
                         error = rc;
                 ext4_journal_stop(handle);
+
+               if (ext4_should_order_data(inode)) {
+                       error = ext4_begin_ordered_truncate(inode,
+                                                           attr->ia_size);
+                       if (error) {
+                               /* Do as much error cleanup as possible */
+                               handle = ext4_journal_start(inode, 3);
+                               if (IS_ERR(handle)) {
+                                       ext4_orphan_del(NULL, inode);
+                                       goto err_out;
+                               }
+                               ext4_orphan_del(handle, inode);
+                               ext4_journal_stop(handle);
+                               goto err_out;
+                       }
+               }
         }
  
         rc = inode_setattr(inode, attr);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 1b330cd71ca842b44b53adc350cd65bcbdef188d..629d0fa27e3a1a0c20f56f564c3734c35d2ee9b4 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -573,6 +573,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
         INIT_LIST_HEAD(&ei->i_prealloc_list);
         spin_lock_init(&ei->i_prealloc_lock);
+       jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
         return &ei->vfs_inode;
  }
  
@@ -637,6 +638,8 @@ static void ext4_clear_inode(struct inode *inode)
         EXT4_I(inode)->i_block_alloc_info = NULL;
         if (unlikely(rsv))
                 kfree(rsv);
+       jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+                                      &EXT4_I(inode)->jinode);
  }
  
  static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -3378,7 +3381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
                         err = ext4_journal_dirty_metadata(handle, bh);
                 else {
                         /* Always do at least ordered writes for quotas */
-                       err = ext4_journal_dirty_data(handle, bh);
+                       err = ext4_jbd2_file_inode(handle, inode);
                         mark_buffer_dirty(bh);
                 }
                 brelse(bh);
author	Jan Kara <jack@suse.cz>
	Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)
committer	Theodore Ts'o <tytso@mit.edu>
	Fri, 11 Jul 2008 23:27:31 +0000 (19:27 -0400)
fs/ext4/ext4_i.h		patch \| blob \| history
fs/ext4/ext4_jbd2.h		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history