vfs: pagecache usage optimization for pagesize!=blocksize

[linux-2.6] / fs / ext3 / inode.c
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index eb95670a27ebafac6a9a4c593ef37e98a0f29ebf..507d8689b111662b403c874e2c269417635f5555 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -95,7 +95,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
         BUFFER_TRACE(bh, "call ext3_journal_revoke");
         err = ext3_journal_revoke(handle, blocknr, bh);
         if (err)
-               ext3_abort(inode->i_sb, __FUNCTION__,
+               ext3_abort(inode->i_sb, __func__,
                            "error %d when attempting revoke", err);
         BUFFER_TRACE(bh, "exit");
         return err;
@@ -392,7 +392,7 @@ no_block:
   *     @inode: owner
   *     @ind: descriptor of indirect block.
   *
- *     This function returns the prefered place for block allocation.
+ *     This function returns the preferred place for block allocation.
   *     It is used when heuristic for sequential allocation fails.
   *     Rules are:
   *       + if there is a block to the left of our position - allocate near it.
@@ -436,12 +436,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
  }
  
  /**
- *     ext3_find_goal - find a prefered place for allocation.
+ *     ext3_find_goal - find a preferred place for allocation.
   *     @inode: owner
   *     @block:  block we want
   *     @partial: pointer to the last triple within a chain
   *
- *     Normally this function find the prefered place for block allocation,
+ *     Normally this function find the preferred place for block allocation,
   *     returns it.
   */
  
@@ -1190,7 +1190,7 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
  {
         int err = journal_dirty_data(handle, bh);
         if (err)
-               ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+               ext3_journal_abort_handle(__func__, __func__,
                                                 bh, handle, err);
         return err;
  }
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
                 new_i_size = pos + copied;
                 if (new_i_size > EXT3_I(inode)->i_disksize)
                         EXT3_I(inode)->i_disksize = new_i_size;
-               copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+               ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
-               if (copied < 0)
-                       ret = copied;
+               copied = ret2;
+               if (ret2 < 0)
+                       ret = ret2;
         }
         ret2 = ext3_journal_stop(handle);
         if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
         if (new_i_size > EXT3_I(inode)->i_disksize)
                 EXT3_I(inode)->i_disksize = new_i_size;
  
-       copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+       ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
-       if (copied < 0)
-               ret = copied;
+       copied = ret2;
+       if (ret2 < 0)
+               ret = ret2;
  
         ret2 = ext3_journal_stop(handle);
         if (!ret)
@@ -1765,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
  }
  
  static const struct address_space_operations ext3_ordered_aops = {
-       .readpage       = ext3_readpage,
-       .readpages      = ext3_readpages,
-       .writepage      = ext3_ordered_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext3_write_begin,
-       .write_end      = ext3_ordered_write_end,
-       .bmap           = ext3_bmap,
-       .invalidatepage = ext3_invalidatepage,
-       .releasepage    = ext3_releasepage,
-       .direct_IO      = ext3_direct_IO,
-       .migratepage    = buffer_migrate_page,
+       .readpage               = ext3_readpage,
+       .readpages              = ext3_readpages,
+       .writepage              = ext3_ordered_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext3_write_begin,
+       .write_end              = ext3_ordered_write_end,
+       .bmap                   = ext3_bmap,
+       .invalidatepage         = ext3_invalidatepage,
+       .releasepage            = ext3_releasepage,
+       .direct_IO              = ext3_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  static const struct address_space_operations ext3_writeback_aops = {
-       .readpage       = ext3_readpage,
-       .readpages      = ext3_readpages,
-       .writepage      = ext3_writeback_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext3_write_begin,
-       .write_end      = ext3_writeback_write_end,
-       .bmap           = ext3_bmap,
-       .invalidatepage = ext3_invalidatepage,
-       .releasepage    = ext3_releasepage,
-       .direct_IO      = ext3_direct_IO,
-       .migratepage    = buffer_migrate_page,
+       .readpage               = ext3_readpage,
+       .readpages              = ext3_readpages,
+       .writepage              = ext3_writeback_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext3_write_begin,
+       .write_end              = ext3_writeback_write_end,
+       .bmap                   = ext3_bmap,
+       .invalidatepage         = ext3_invalidatepage,
+       .releasepage            = ext3_releasepage,
+       .direct_IO              = ext3_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  static const struct address_space_operations ext3_journalled_aops = {
-       .readpage       = ext3_readpage,
-       .readpages      = ext3_readpages,
-       .writepage      = ext3_journalled_writepage,
-       .sync_page      = block_sync_page,
-       .write_begin    = ext3_write_begin,
-       .write_end      = ext3_journalled_write_end,
-       .set_page_dirty = ext3_journalled_set_page_dirty,
-       .bmap           = ext3_bmap,
-       .invalidatepage = ext3_invalidatepage,
-       .releasepage    = ext3_releasepage,
+       .readpage               = ext3_readpage,
+       .readpages              = ext3_readpages,
+       .writepage              = ext3_journalled_writepage,
+       .sync_page              = block_sync_page,
+       .write_begin            = ext3_write_begin,
+       .write_end              = ext3_journalled_write_end,
+       .set_page_dirty         = ext3_journalled_set_page_dirty,
+       .bmap                   = ext3_bmap,
+       .invalidatepage         = ext3_invalidatepage,
+       .releasepage            = ext3_releasepage,
+       .is_partially_uptodate  = block_is_partially_uptodate,
  };
  
  void ext3_set_aops(struct inode *inode)
@@ -2125,7 +2130,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
  
         if (this_bh) {
                 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-               ext3_journal_dirty_metadata(handle, this_bh);
+
+               /*
+                * The buffer head should have an attached journal head at this
+                * point. However, if the data is corrupted and an indirect
+                * block pointed to itself, it would have been detached when
+                * the block was cleared. Check for this instead of OOPSing.
+                */
+               if (bh2jh(this_bh))
+                       ext3_journal_dirty_metadata(handle, this_bh);
+               else
+                       ext3_error(inode->i_sb, "ext3_free_data",
+                                  "circular indirect block detected, "
+                                  "inode=%lu, block=%llu",
+                                  inode->i_ino,
+                                  (unsigned long long)this_bh->b_blocknr);
         }
  }
  
@@ -2251,6 +2270,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
         }
  }
  
+int ext3_can_truncate(struct inode *inode)
+{
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return 0;
+       if (S_ISREG(inode->i_mode))
+               return 1;
+       if (S_ISDIR(inode->i_mode))
+               return 1;
+       if (S_ISLNK(inode->i_mode))
+               return !ext3_inode_is_fast_symlink(inode);
+       return 0;
+}
+
  /*
   * ext3_truncate()
   *
@@ -2295,12 +2327,7 @@ void ext3_truncate(struct inode *inode)
         unsigned blocksize = inode->i_sb->s_blocksize;
         struct page *page;
  
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-           S_ISLNK(inode->i_mode)))
-               return;
-       if (ext3_inode_is_fast_symlink(inode))
-               return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+       if (!ext3_can_truncate(inode))
                 return;
  
         /*
@@ -2454,11 +2481,10 @@ out_stop:
  static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
                 unsigned long ino, struct ext3_iloc *iloc)
  {
-       unsigned long desc, group_desc, block_group;
+       unsigned long block_group;
         unsigned long offset;
         ext3_fsblk_t block;
-       struct buffer_head *bh;
-       struct ext3_group_desc * gdp;
+       struct ext3_group_desc *gdp;
  
         if (!ext3_valid_inum(sb, ino)) {
                 /*
@@ -2470,27 +2496,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
         }
  
         block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-       if (block_group >= EXT3_SB(sb)->s_groups_count) {
-               ext3_error(sb,"ext3_get_inode_block","group >= groups count");
-               return 0;
-       }
-       smp_rmb();
-       group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-       desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
-       bh = EXT3_SB(sb)->s_group_desc[group_desc];
-       if (!bh) {
-               ext3_error (sb, "ext3_get_inode_block",
-                           "Descriptor not loaded");
+       gdp = ext3_get_group_desc(sb, block_group, NULL);
+       if (!gdp)
                 return 0;
-       }
-
-       gdp = (struct ext3_group_desc *)bh->b_data;
         /*
          * Figure out the offset within the block group inode table
          */
         offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
                 EXT3_INODE_SIZE(sb);
-       block = le32_to_cpu(gdp[desc].bg_inode_table) +
+       block = le32_to_cpu(gdp->bg_inode_table) +
                 (offset >> EXT3_BLOCK_SIZE_BITS(sb));
  
         iloc->block_group = block_group;
@@ -2524,6 +2538,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
         }
         if (!buffer_uptodate(bh)) {
                 lock_buffer(bh);
+
+               /*
+                * If the buffer has the write error flag, we have failed
+                * to write out another inode in the same block.  In this
+                * case, we don't have to read the block because we may
+                * read the old inode data successfully.
+                */
+               if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+                       set_buffer_uptodate(bh);
+
                 if (buffer_uptodate(bh)) {
                         /* someone brought it uptodate while we waited */
                         unlock_buffer(bh);
@@ -3214,7 +3238,7 @@ void ext3_dirty_inode(struct inode *inode)
                 current_handle->h_transaction != handle->h_transaction) {
                 /* This task has a transaction open against a different fs */
                 printk(KERN_EMERG "%s: transactions do not match!\n",
-                      __FUNCTION__);
+                      __func__);
         } else {
                 jbd_debug(5, "marking dirty.  outer handle=%p\n",
                                 current_handle);