]> err.no Git - linux-2.6/blobdiff - fs/ext4/extents.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / fs / ext4 / extents.c
index 47929c4e3dae66d104f5bdd6ac39f5436183dff5..b24d3c53f20cd20407d5d5373c79b725d308cc45 100644 (file)
@@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
        ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
-static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+static int ext4_ext_journal_restart(handle_t *handle, int needed)
 {
        int err;
 
        if (handle->h_buffer_credits > needed)
-               return handle;
-       if (!ext4_journal_extend(handle, needed))
-               return handle;
-       err = ext4_journal_restart(handle, needed);
-
-       return handle;
+               return 0;
+       err = ext4_journal_extend(handle, needed);
+       if (err <= 0)
+               return err;
+       return ext4_journal_restart(handle, needed);
 }
 
 /*
@@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        return bg_start + colour + block;
 }
 
+/*
+ * Allocation for a meta data block
+ */
 static ext4_fsblk_t
-ext4_ext_new_block(handle_t *handle, struct inode *inode,
+ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path,
                        struct ext4_extent *ex, int *err)
 {
        ext4_fsblk_t goal, newblock;
 
        goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_block(handle, inode, goal, err);
+       newblock = ext4_new_meta_block(handle, inode, goal, err);
        return newblock;
 }
 
@@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
        return size;
 }
 
+/*
+ * Calculate the number of metadata blocks needed
+ * to allocate @blocks
+ * Worse case is one block per extent
+ */
+int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+{
+       int lcap, icap, rcap, leafs, idxs, num;
+       int newextents = blocks;
+
+       rcap = ext4_ext_space_root_idx(inode);
+       lcap = ext4_ext_space_block(inode);
+       icap = ext4_ext_space_block_idx(inode);
+
+       /* number of new leaf blocks needed */
+       num = leafs = (newextents + lcap - 1) / lcap;
+
+       /*
+        * Worse case, we need separate index block(s)
+        * to link all new leaf blocks
+        */
+       idxs = (leafs + icap - 1) / icap;
+       do {
+               num += idxs;
+               idxs = (idxs + icap - 1) / icap;
+       } while (idxs > rcap);
+
+       return num;
+}
+
 static int
 ext4_ext_max_entries(struct inode *inode, int depth)
 {
@@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                alloc = 1;
        }
        path[0].p_hdr = eh;
+       path[0].p_bh = NULL;
 
        i = depth;
        /* walk through the tree */
@@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        }
 
        path[ppos].p_depth = i;
-       path[ppos].p_hdr = eh;
        path[ppos].p_ext = NULL;
        path[ppos].p_idx = NULL;
 
        /* find extent */
        ext4_ext_binsearch(inode, path + ppos, block);
+       /* if not an empty leaf */
+       if (path[ppos].p_ext)
+               path[ppos].p_block = ext_pblock(path[ppos].p_ext);
 
        ext4_ext_show_path(inode, path);
 
@@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        /* allocate all needed blocks */
        ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
        for (a = 0; a < depth - at; a++) {
-               newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+               newblock = ext4_ext_new_meta_block(handle, inode, path,
+                                                  newext, &err);
                if (newblock == 0)
                        goto cleanup;
                ablocks[a] = newblock;
@@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock;
        int err = 0;
 
-       newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+       newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
        if (newblock == 0)
                return err;
 
@@ -981,6 +1017,8 @@ repeat:
                /* if we found index with free entry, then use that
                 * entry: create all needed subtree and add new leaf */
                err = ext4_ext_split(handle, inode, path, newext, i);
+               if (err)
+                       goto out;
 
                /* refill path */
                ext4_ext_drop_refs(path);
@@ -1403,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
 
        /*
         * get the next allocated block if the extent in the path
-        * is before the requested block(s) 
+        * is before the requested block(s)
         */
        if (b2 < b1) {
                b2 = ext4_ext_next_allocated_block(path);
@@ -1709,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 
 /*
- * ext4_ext_calc_credits_for_insert:
- * This routine returns max. credits that the extent tree can consume.
- * It should be OK for low-performance paths like ->writepage()
- * To allow many writing processes to fit into a single transaction,
- * the caller should calculate credits under i_data_sem and
- * pass the actual path.
+ * ext4_ext_calc_credits_for_single_extent:
+ * This routine returns max. credits that needed to insert an extent
+ * to the extent tree.
+ * When pass the actual path, the caller should calculate credits
+ * under i_data_sem.
  */
-int ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
                                                struct ext4_ext_path *path)
 {
-       int depth, needed;
-
        if (path) {
+               int depth = ext_depth(inode);
+               int ret = 0;
+
                /* probably there is space in leaf? */
-               depth = ext_depth(inode);
                if (le16_to_cpu(path[depth].p_hdr->eh_entries)
-                               < le16_to_cpu(path[depth].p_hdr->eh_max))
-                       return 1;
-       }
+                               < le16_to_cpu(path[depth].p_hdr->eh_max)) {
 
-       /*
-        * given 32-bit logical block (4294967296 blocks), max. tree
-        * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
-        * Let's also add one more level for imbalance.
-        */
-       depth = 5;
-
-       /* allocation of new data block(s) */
-       needed = 2;
+                       /*
+                        *  There are some space in the leaf tree, no
+                        *  need to account for leaf block credit
+                        *
+                        *  bitmaps and block group descriptor blocks
+                        *  and other metadat blocks still need to be
+                        *  accounted.
+                        */
+                       /* 1 bitmap, 1 block group descriptor */
+                       ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+               }
+       }
 
-       /*
-        * tree can be full, so it would need to grow in depth:
-        * we need one credit to modify old root, credits for
-        * new root will be added in split accounting
-        */
-       needed += 1;
+       return ext4_chunk_trans_blocks(inode, nrblocks);
+}
 
-       /*
-        * Index split can happen, we would need:
-        *    allocate intermediate indexes (bitmap + group)
-        *  + change two blocks at each level, but root (already included)
-        */
-       needed += (depth * 2) + (depth * 2);
+/*
+ * How many index/leaf blocks need to change/allocate to modify nrblocks?
+ *
+ * if nrblocks are fit in a single extent (chunk flag is 1), then
+ * in the worse case, each tree level index/leaf need to be changed
+ * if the tree split due to insert a new extent, then the old tree
+ * index/leaf need to be updated too
+ *
+ * If the nrblocks are discontiguous, they could cause
+ * the whole tree split more than once, but this is really rare.
+ */
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+       int index;
+       int depth = ext_depth(inode);
 
-       /* any allocation modifies superblock */
-       needed += 1;
+       if (chunk)
+               index = depth * 2;
+       else
+               index = depth * 3;
 
-       return needed;
+       return index;
 }
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1872,22 +1917,22 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        BUG_ON(b != ex_ee_block + ex_ee_len - 1);
                }
 
-               /* at present, extent can't cross block group: */
-               /* leaf + bitmap + group desc + sb + inode */
-               credits = 5;
+               /*
+                * 3 for leaf, sb, and inode plus 2 (bmap and group
+                * descriptor) for each block group; assume two block
+                * groups plus ex_ee_len/blocks_per_block_group for
+                * the worst case
+                */
+               credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
                if (ex == EXT_FIRST_EXTENT(eh)) {
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-#ifdef CONFIG_QUOTA
                credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
 
-               handle = ext4_ext_journal_restart(handle, credits);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
+               err = ext4_ext_journal_restart(handle, credits);
+               if (err)
                        goto out;
-               }
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
@@ -2287,7 +2332,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                unsigned int newdepth;
                /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
                if (allocated <= EXT4_EXT_ZERO_LEN) {
-                       /* Mark first half uninitialized.
+                       /*
+                        * iblock == ee_block is handled by the zerouout
+                        * at the beginning.
+                        * Mark first half uninitialized.
                         * Mark second half initialized and zero out the
                         * initialized extent
                         */
@@ -2310,7 +2358,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                ex->ee_len   = orig_ex.ee_len;
                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                                ext4_ext_dirty(handle, inode, path + depth);
-                               /* zeroed the full extent */
+                               /* blocks available from iblock */
                                return allocated;
 
                        } else if (err)
@@ -2338,6 +2386,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                        err = PTR_ERR(path);
                                        return err;
                                }
+                               /* get the second half extent details */
                                ex = path[depth].p_ext;
                                err = ext4_ext_get_access(handle, inode,
                                                                path + depth);
@@ -2367,6 +2416,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zeroed the full extent */
+                       /* blocks available from iblock */
                        return allocated;
 
                } else if (err)
@@ -2382,23 +2432,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 */
                orig_ex.ee_len = cpu_to_le16(ee_len -
                                                ext4_ext_get_actual_len(ex3));
-               if (newdepth != depth) {
-                       depth = newdepth;
-                       ext4_ext_drop_refs(path);
-                       path = ext4_ext_find_extent(inode, iblock, path);
-                       if (IS_ERR(path)) {
-                               err = PTR_ERR(path);
-                               goto out;
-                       }
-                       eh = path[depth].p_hdr;
-                       ex = path[depth].p_ext;
-                       if (ex2 != &newex)
-                               ex2 = ex;
-
-                       err = ext4_ext_get_access(handle, inode, path + depth);
-                       if (err)
-                               goto out;
+               depth = newdepth;
+               ext4_ext_drop_refs(path);
+               path = ext4_ext_find_extent(inode, iblock, path);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       goto out;
                }
+               eh = path[depth].p_hdr;
+               ex = path[depth].p_ext;
+               if (ex2 != &newex)
+                       ex2 = ex;
+
+               err = ext4_ext_get_access(handle, inode, path + depth);
+               if (err)
+                       goto out;
+
                allocated = max_blocks;
 
                /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2416,6 +2465,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zero out the first half */
+                       /* blocks available from iblock */
                        return allocated;
                }
        }
@@ -2529,6 +2579,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        int err = 0, depth, ret;
        unsigned long allocated = 0;
        struct ext4_allocation_request ar;
+       loff_t disksize;
 
        __clear_bit(BH_New, &bh_result->b_state);
        ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2616,8 +2667,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                 */
                                if (allocated > max_blocks)
                                        allocated = max_blocks;
-                               /* mark the buffer unwritten */
-                               __set_bit(BH_Unwritten, &bh_result->b_state);
+                               set_buffer_unwritten(bh_result);
                                goto out2;
                        }
 
@@ -2716,14 +2766,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                goto out2;
        }
 
-       if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = inode->i_size;
-
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-       __set_bit(BH_New, &bh_result->b_state);
+       if (extend_disksize) {
+               disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
+               if (disksize > i_size_read(inode))
+                       disksize = i_size_read(inode);
+               if (disksize > EXT4_I(inode)->i_disksize)
+                       EXT4_I(inode)->i_disksize = disksize;
+       }
+
+       set_buffer_new(bh_result);
 
        /* Cache only when it is _not_ an uninitialized extent */
        if (create != EXT4_CREATE_UNINITIALIZED_EXT)
@@ -2733,7 +2788,7 @@ out:
        if (allocated > max_blocks)
                allocated = max_blocks;
        ext4_ext_show_leaf(inode, path);
-       __set_bit(BH_Mapped, &bh_result->b_state);
+       set_buffer_mapped(bh_result);
        bh_result->b_bdev = inode->i_sb->s_bdev;
        bh_result->b_blocknr = newblock;
 out2:
@@ -2744,7 +2799,7 @@ out2:
        return err ? err : allocated;
 }
 
-void ext4_ext_truncate(struct inode * inode, struct page *page)
+void ext4_ext_truncate(struct inode *inode)
 {
        struct address_space *mapping = inode->i_mapping;
        struct super_block *sb = inode->i_sb;
@@ -2755,33 +2810,27 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
        /*
         * probably first extent we're gonna free will be last in block
         */
-       err = ext4_writepage_trans_blocks(inode) + 3;
+       err = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, err);
-       if (IS_ERR(handle)) {
-               if (page) {
-                       clear_highpage(page);
-                       flush_dcache_page(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
+       if (IS_ERR(handle))
                return;
-       }
 
-       if (page)
-               ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+       if (inode->i_size & (sb->s_blocksize - 1))
+               ext4_block_truncate_page(handle, mapping, inode->i_size);
+
+       if (ext4_orphan_add(handle, inode))
+               goto out_stop;
 
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_ext_invalidate_cache(inode);
 
-       ext4_mb_discard_inode_preallocations(inode);
+       ext4_discard_reservation(inode);
 
        /*
         * TODO: optimization is possible here.
         * Probably we need not scan at all,
         * because page truncation is enough.
         */
-       if (ext4_orphan_add(handle, inode))
-               goto out_stop;
 
        /* we have to know where to truncate from in crash case */
        EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2798,6 +2847,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
                handle->h_sync = 1;
 
 out_stop:
+       up_write(&EXT4_I(inode)->i_data_sem);
        /*
         * If this was a simple ftruncate() and the file will remain alive,
         * then we need to clear up the orphan record which we created above.
@@ -2808,33 +2858,11 @@ out_stop:
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
 
-       up_write(&EXT4_I(inode)->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 }
 
-/*
- * ext4_ext_writepage_trans_blocks:
- * calculate max number of blocks we could modify
- * in order to allocate new block for an inode
- */
-int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
-{
-       int needed;
-
-       needed = ext4_ext_calc_credits_for_insert(inode, NULL);
-
-       /* caller wants to allocate num blocks, but note it includes sb */
-       needed = needed * num - (num - 1);
-
-#ifdef CONFIG_QUOTA
-       needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
-       return needed;
-}
-
 static void ext4_falloc_update_inode(struct inode *inode,
                                int mode, loff_t new_size, int update_ctime)
 {
@@ -2895,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
                                                        - block;
        /*
-        * credits to insert 1 extent into extent tree + buffers to be able to
-        * modify 1 super block, 1 block bitmap and 1 group descriptor.
+        * credits to insert 1 extent into extent tree
         */
-       credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+       credits = ext4_chunk_trans_blocks(inode, max_blocks);
        mutex_lock(&inode->i_mutex);
 retry:
        while (ret >= 0 && ret < max_blocks) {
@@ -2911,7 +2938,7 @@ retry:
                }
                ret = ext4_get_blocks_wrap(handle, inode, block,
                                          max_blocks, &map_bh,
-                                         EXT4_CREATE_UNINITIALIZED_EXT, 0);
+                                         EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
                        WARN_ON(ret <= 0);