]> err.no Git - linux-2.6/blobdiff - fs/ext4/inode.c
Merge branch 'x86/crashdump' into x86/urgent
[linux-2.6] / fs / ext4 / inode.c
index 24518b57733efcd5f7b57cd3ffc02dd64dfb9dc2..8ca2763df091051fea3e02ae7bba35a1e82e21d9 100644 (file)
@@ -2003,11 +2003,15 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
        handle_t *handle = NULL;
 
        handle = ext4_journal_current_handle();
-       BUG_ON(handle == NULL);
-       BUG_ON(create == 0);
-
-       ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+       if (!handle) {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+               BUG_ON(!ret);
+       } else {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
                                   bh_result, create, 0, EXT4_DELALLOC_RSVED);
+       }
+
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
 
@@ -2040,15 +2044,37 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 
 static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
 {
-       return !buffer_mapped(bh) || buffer_delay(bh);
+       /*
+        * unmapped buffer is possible for holes.
+        * delay buffer is possible with delayed allocation
+        */
+       return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
+}
+
+static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create)
+{
+       int ret = 0;
+       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+
+       /*
+        * we don't want to do block allocation in writepage
+        * so call get_block_wrap with create = 0
+        */
+       ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
+       }
+       return ret;
 }
 
 /*
- * get called vi ext4_da_writepages after taking page lock
- * We may end up doing block allocation here in case
- * mpage_da_map_blocks failed to allocate blocks.
- *
- * We also get called via journal_submit_inode_data_buffers
+ * get called vi ext4_da_writepages after taking page lock (have journal handle)
+ * get called via journal_submit_inode_data_buffers (no journal handle)
+ * get called via shrink_page_list via pdflush (no journal handle)
+ * or grab_page_cache when doing write_begin (have journal handle)
  */
 static int ext4_da_writepage(struct page *page,
                                struct writeback_control *wbc)
@@ -2056,37 +2082,61 @@ static int ext4_da_writepage(struct page *page,
        int ret = 0;
        loff_t size;
        unsigned long len;
-       handle_t *handle = NULL;
        struct buffer_head *page_bufs;
        struct inode *inode = page->mapping->host;
 
-       handle = ext4_journal_current_handle();
-       if (!handle) {
-               /*
-                * This can happen when we aren't called via
-                * ext4_da_writepages() but directly (shrink_page_list).
-                * We cannot easily start a transaction here so we just skip
-                * writing the page in case we would have to do so.
-                * We reach here also via journal_submit_inode_data_buffers
-                */
-               size = i_size_read(inode);
+       size = i_size_read(inode);
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
 
+       if (page_has_buffers(page)) {
                page_bufs = page_buffers(page);
-               if (page->index == size >> PAGE_CACHE_SHIFT)
-                       len = size & ~PAGE_CACHE_MASK;
-               else
-                       len = PAGE_CACHE_SIZE;
-
-               if (walk_page_buffers(NULL, page_bufs, 0,
-                               len, NULL, ext4_bh_unmapped_or_delay)) {
+               if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay)) {
                        /*
-                        * We can't do block allocation under
-                        * page lock without a handle . So redirty
-                        * the page and return
+                        * We don't want to do  block allocation
+                        * So redirty the page and return
                         * We may reach here when we do a journal commit
                         * via journal_submit_inode_data_buffers.
                         * If we don't have mapping block we just ignore
-                        * them
+                        * them. We can also reach here via shrink_page_list
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
+       } else {
+               /*
+                * The test for page_has_buffers() is subtle:
+                * We know the page is dirty but it lost buffers. That means
+                * that at some moment in time after write_begin()/write_end()
+                * has been called all buffers have been clean and thus they
+                * must have been written at least once. So they are all
+                * mapped and we can happily proceed with mapping them
+                * and writing the page.
+                *
+                * Try to initialize the buffer_heads and check whether
+                * all are mapped and non delay. We don't want to
+                * do block allocation here.
+                */
+               ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                               ext4_normal_get_block_write);
+               if (!ret) {
+                       page_bufs = page_buffers(page);
+                       /* check whether all are mapped and non delay */
+                       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                               ext4_bh_unmapped_or_delay)) {
+                               redirty_page_for_writepage(wbc, page);
+                               unlock_page(page);
+                               return 0;
+                       }
+               } else {
+                       /*
+                        * We can't do block allocation here
+                        * so just redity the page and unlock
+                        * and return
                         */
                        redirty_page_for_writepage(wbc, page);
                        unlock_page(page);
@@ -2095,9 +2145,11 @@ static int ext4_da_writepage(struct page *page,
        }
 
        if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, ext4_da_get_block_write, wbc);
+               ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
        else
-               ret = block_write_full_page(page, ext4_da_get_block_write, wbc);
+               ret = block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
 
        return ret;
 }
@@ -2246,6 +2298,29 @@ out:
        return ret;
 }
 
+/*
+ * Check if we should update i_disksize
+ * when write to the end of file but not require block allocation
+ */
+static int ext4_da_should_update_i_disksize(struct page *page,
+                                        unsigned long offset)
+{
+       struct buffer_head *bh;
+       struct inode *inode = page->mapping->host;
+       unsigned int idx;
+       int i;
+
+       bh = page_buffers(page);
+       idx = offset >> inode->i_blkbits;
+
+       for (i=0; i < idx; i++)
+               bh = bh->b_this_page;
+
+       if (!buffer_mapped(bh) || (buffer_delay(bh)))
+               return 0;
+       return 1;
+}
+
 static int ext4_da_write_end(struct file *file,
                                struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned copied,
@@ -2255,6 +2330,10 @@ static int ext4_da_write_end(struct file *file,
        int ret = 0, ret2;
        handle_t *handle = ext4_journal_current_handle();
        loff_t new_i_size;
+       unsigned long start, end;
+
+       start = pos & (PAGE_CACHE_SIZE - 1);
+       end = start + copied -1;
 
        /*
         * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2263,18 +2342,23 @@ static int ext4_da_write_end(struct file *file,
         */
 
        new_i_size = pos + copied;
-       if (new_i_size > EXT4_I(inode)->i_disksize)
-               if (!walk_page_buffers(NULL, page_buffers(page),
-                                      0, len, NULL, ext4_bh_unmapped_or_delay)){
-                       /*
-                        * Updating i_disksize when extending file without
-                        * needing block allocation
-                        */
-                       if (ext4_should_order_data(inode))
-                               ret = ext4_jbd2_file_inode(handle, inode);
+       if (new_i_size > EXT4_I(inode)->i_disksize) {
+               if (ext4_da_should_update_i_disksize(page, end)) {
+                       down_write(&EXT4_I(inode)->i_data_sem);
+                       if (new_i_size > EXT4_I(inode)->i_disksize) {
+                               /*
+                                * Updating i_disksize when extending file
+                                * without needing block allocation
+                                */
+                               if (ext4_should_order_data(inode))
+                                       ret = ext4_jbd2_file_inode(handle,
+                                                                  inode);
 
-                       EXT4_I(inode)->i_disksize = new_i_size;
+                               EXT4_I(inode)->i_disksize = new_i_size;
+                       }
+                       up_write(&EXT4_I(inode)->i_data_sem);
                }
+       }
        ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
        copied = ret2;
@@ -2438,12 +2522,14 @@ static int __ext4_normal_writepage(struct page *page,
        struct inode *inode = page->mapping->host;
 
        if (test_opt(inode->i_sb, NOBH))
-               return nobh_writepage(page, ext4_get_block, wbc);
+               return nobh_writepage(page,
+                                       ext4_normal_get_block_write, wbc);
        else
-               return block_write_full_page(page, ext4_get_block, wbc);
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
 }
 
-
 static int ext4_normal_writepage(struct page *page,
                                struct writeback_control *wbc)
 {
@@ -2452,13 +2538,24 @@ static int ext4_normal_writepage(struct page *page,
        loff_t len;
 
        J_ASSERT(PageLocked(page));
-       J_ASSERT(page_has_buffers(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
        else
                len = PAGE_CACHE_SIZE;
-       BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-                                ext4_bh_unmapped_or_delay));
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
+       }
 
        if (!ext4_journal_current_handle())
                return __ext4_normal_writepage(page, wbc);
@@ -2478,7 +2575,8 @@ static int __ext4_journalled_writepage(struct page *page,
        int ret = 0;
        int err;
 
-       ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext4_get_block);
+       ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                       ext4_normal_get_block_write);
        if (ret != 0)
                goto out_unlock;
 
@@ -2525,13 +2623,24 @@ static int ext4_journalled_writepage(struct page *page,
        loff_t len;
 
        J_ASSERT(PageLocked(page));
-       J_ASSERT(page_has_buffers(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
        else
                len = PAGE_CACHE_SIZE;
-       BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-                                ext4_bh_unmapped_or_delay));
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
+       }
 
        if (ext4_journal_current_handle())
                goto no_write;
@@ -2549,7 +2658,9 @@ static int ext4_journalled_writepage(struct page *page,
                 * really know unless we go poke around in the buffer_heads.
                 * But block_write_full_page will do the right thing.
                 */
-               return block_write_full_page(page, ext4_get_block, wbc);
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
        }
 no_write:
        redirty_page_for_writepage(wbc, page);
@@ -3314,6 +3425,11 @@ void ext4_truncate(struct inode *inode)
        if (ext4_orphan_add(handle, inode))
                goto out_stop;
 
+       /*
+        * From here we block out all ext4_get_block() callers who want to
+        * modify the block allocation tree.
+        */
+       down_write(&ei->i_data_sem);
        /*
         * The orphan list entry will now protect us from any crash which
         * occurs before the truncate completes, so it is now safe to propagate
@@ -3323,12 +3439,6 @@ void ext4_truncate(struct inode *inode)
         */
        ei->i_disksize = inode->i_size;
 
-       /*
-        * From here we block out all ext4_get_block() callers who want to
-        * modify the block allocation tree.
-        */
-       down_write(&ei->i_data_sem);
-
        if (n == 1) {           /* direct blocks */
                ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                               i_data + EXT4_NDIR_BLOCKS);
@@ -4121,6 +4231,32 @@ err_out:
        return error;
 }
 
+int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                struct kstat *stat)
+{
+       struct inode *inode;
+       unsigned long delalloc_blocks;
+
+       inode = dentry->d_inode;
+       generic_fillattr(inode, stat);
+
+       /*
+        * We can't update i_blocks if the block allocation is delayed
+        * otherwise in the case of system crash before the real block
+        * allocation is done, we will have i_blocks inconsistent with
+        * on-disk file blocks.
+        * We always keep i_blocks updated together with real
+        * allocation. But to not confuse with user, stat
+        * will return the blocks that include the delayed allocation
+        * blocks for this file.
+        */
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+       stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
+       return 0;
+}
 
 /*
  * How many blocks doth make a writepage()?