Merge branch 'libertas-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/linvil...

[linux-2.6] / fs / ocfs2 / aops.c
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index acf8f0006725e26c7a58f922e69e1c3ffde2e4d6..a480b09c79b916de88252129919bedbbfb10850a 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -25,6 +25,7 @@
  #include <linux/pagemap.h>
  #include <asm/byteorder.h>
  #include <linux/swap.h>
+#include <linux/pipe_fs_i.h>
  
  #define MLOG_MASK_PREFIX ML_FILE_IO
  #include <cluster/masklog.h>
@@ -77,7 +78,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
  
         if (!OCFS2_IS_VALID_DINODE(fe)) {
                 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
-                    (unsigned long long)fe->i_blkno, 7, fe->i_signature);
+                    (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
+                    fe->i_signature);
                 goto bail;
         }
  
@@ -136,6 +138,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                            struct buffer_head *bh_result, int create)
  {
         int err = 0;
+       unsigned int ext_flags;
         u64 p_blkno, past_eof;
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  
@@ -152,7 +155,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                 goto bail;
         }
  
-       err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL);
+       err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL,
+                                         &ext_flags);
         if (err) {
                 mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
                      "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
@@ -170,7 +174,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                         "ino %lu, iblock %llu\n", inode->i_ino,
                         (unsigned long long)iblock);
  
-       if (p_blkno)
+       /* Treat the unwritten extent as a hole for zeroing purposes. */
+       if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
                 map_bh(bh_result, inode->i_sb, p_blkno);
  
         if (!ocfs2_sparse_alloc(osb)) {
@@ -217,7 +222,10 @@ static int ocfs2_readpage(struct file *file, struct page *page)
                 goto out;
         }
  
-       down_read(&OCFS2_I(inode)->ip_alloc_sem);
+       if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) {
+               ret = AOP_TRUNCATED_PAGE;
+               goto out_meta_unlock;
+       }
  
         /*
          * i_size might have just been updated as we grabed the meta lock.  We
@@ -230,10 +238,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
          * XXX sys_readahead() seems to get that wrong?
          */
         if (start >= i_size_read(inode)) {
-               char *addr = kmap(page);
-               memset(addr, 0, PAGE_SIZE);
-               flush_dcache_page(page);
-               kunmap(page);
+               zero_user_page(page, 0, PAGE_SIZE, KM_USER0);
                 SetPageUptodate(page);
                 ret = 0;
                 goto out_alloc;
@@ -253,6 +258,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
         ocfs2_data_unlock(inode, 0);
  out_alloc:
         up_read(&OCFS2_I(inode)->ip_alloc_sem);
+out_meta_unlock:
         ocfs2_meta_unlock(inode, 0);
  out:
         if (unlock)
@@ -308,13 +314,13 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
   * functionality yet, but IMHO it's better to cut and paste the whole
   * thing so we can avoid introducing our own bugs (and easily pick up
   * their fixes when they happen) --Mark */
-static int walk_page_buffers(  handle_t *handle,
-                               struct buffer_head *head,
-                               unsigned from,
-                               unsigned to,
-                               int *partial,
-                               int (*fn)(      handle_t *handle,
-                                               struct buffer_head *bh))
+int walk_page_buffers( handle_t *handle,
+                       struct buffer_head *head,
+                       unsigned from,
+                       unsigned to,
+                       int *partial,
+                       int (*fn)(      handle_t *handle,
+                                       struct buffer_head *bh))
  {
         struct buffer_head *bh;
         unsigned block_start, block_end;
@@ -395,7 +401,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
                 down_read(&OCFS2_I(inode)->ip_alloc_sem);
         }
  
-       err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL);
+       err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL);
  
         if (!INODE_JOURNAL(inode)) {
                 up_read(&OCFS2_I(inode)->ip_alloc_sem);
@@ -435,8 +441,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                                      struct buffer_head *bh_result, int create)
  {
         int ret;
-       u64 p_blkno, inode_blocks;
-       int contig_blocks;
+       u64 p_blkno, inode_blocks, contig_blocks;
+       unsigned int ext_flags;
         unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
         unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
  
@@ -457,7 +463,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
         /* This figures out the size of the next contiguous block, and
          * our logical offset */
         ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
-                                         &contig_blocks);
+                                         &contig_blocks, &ext_flags);
         if (ret) {
                 mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
                      (unsigned long long)iblock);
@@ -477,8 +483,10 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
         /*
          * get_more_blocks() expects us to describe a hole by clearing
          * the mapped bit on bh_result().
+        *
+        * Consider an unwritten extent as a hole.
          */
-       if (p_blkno)
+       if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
                 map_bh(bh_result, inode->i_sb, p_blkno);
         else {
                 /*
@@ -516,12 +524,17 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                              void *private)
  {
         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+       int level;
  
         /* this io's submitter should not have unlocked this before we could */
         BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+
         ocfs2_iocb_clear_rw_locked(iocb);
-       up_read(&inode->i_alloc_sem);
-       ocfs2_rw_unlock(inode, 0);
+
+       level = ocfs2_iocb_rw_locked_level(iocb);
+       if (!level)
+               up_read(&inode->i_alloc_sem);
+       ocfs2_rw_unlock(inode, level);
  }
  
  /*
@@ -654,9 +667,9 @@ static void ocfs2_clear_page_regions(struct page *page,
   *
   * This will also skip zeroing, which is handled externally.
   */
-static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
-                                struct inode *inode, unsigned int from,
-                                unsigned int to, int new)
+int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+                         struct inode *inode, unsigned int from,
+                         unsigned int to, int new)
  {
         int ret = 0;
         struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
@@ -675,8 +688,7 @@ static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
                  * Ignore blocks outside of our i/o range -
                  * they may belong to unallocated clusters.
                  */
-               if (block_start >= to ||
-                   (block_start + bsize) <= from) {
+               if (block_start >= to || block_end <= from) {
                         if (PageUptodate(page))
                                 set_buffer_uptodate(bh);
                         continue;
@@ -749,6 +761,74 @@ next_bh:
         return ret;
  }
  
+/*
+ * This will copy user data from the buffer page in the splice
+ * context.
+ *
+ * For now, we ignore SPLICE_F_MOVE as that would require some extra
+ * communication out all the way to ocfs2_write().
+ */
+int ocfs2_map_and_write_splice_data(struct inode *inode,
+                                 struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+                                 unsigned int *ret_from, unsigned int *ret_to)
+{
+       int ret;
+       unsigned int to, from, cluster_start, cluster_end;
+       char *src, *dst;
+       struct ocfs2_splice_write_priv *sp = wc->w_private;
+       struct pipe_buffer *buf = sp->s_buf;
+       unsigned long bytes, src_from;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+                                       &cluster_end);
+
+       from = sp->s_offset;
+       src_from = sp->s_buf_offset;
+       bytes = wc->w_count;
+
+       if (wc->w_large_pages) {
+               /*
+                * For cluster size < page size, we have to
+                * calculate pos within the cluster and obey
+                * the rightmost boundary.
+                */
+               bytes = min(bytes, (unsigned long)(osb->s_clustersize
+                                  - (wc->w_pos & (osb->s_clustersize - 1))));
+       }
+       to = from + bytes;
+
+       BUG_ON(from > PAGE_CACHE_SIZE);
+       BUG_ON(to > PAGE_CACHE_SIZE);
+       BUG_ON(from < cluster_start);
+       BUG_ON(to > cluster_end);
+
+       if (wc->w_this_page_new)
+               ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+                                           cluster_start, cluster_end, 1);
+       else
+               ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+                                           from, to, 0);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       src = buf->ops->map(sp->s_pipe, buf, 1);
+       dst = kmap_atomic(wc->w_this_page, KM_USER1);
+       memcpy(dst + from, src + src_from, bytes);
+       kunmap_atomic(wc->w_this_page, KM_USER1);
+       buf->ops->unmap(sp->s_pipe, buf, src);
+
+       wc->w_finished_copy = 1;
+
+       *ret_from = from;
+       *ret_to = to;
+out:
+
+       return bytes ? (unsigned int)bytes : ret;
+}
+
  /*
   * This will copy user data from the iovec in the buffered write
   * context.
@@ -810,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
  
         to = from + bytes;
  
+       BUG_ON(from > PAGE_CACHE_SIZE);
+       BUG_ON(to > PAGE_CACHE_SIZE);
+       BUG_ON(from < cluster_start);
+       BUG_ON(to > cluster_end);
+
         if (wc->w_this_page_new)
                 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
                                             cluster_start, cluster_end, 1);
@@ -821,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
                 goto out;
         }
  
-       BUG_ON(from > PAGE_CACHE_SIZE);
-       BUG_ON(to > PAGE_CACHE_SIZE);
-       BUG_ON(from > osb->s_clustersize);
-       BUG_ON(to > osb->s_clustersize);
-
         dst = kmap(wc->w_this_page);
         memcpy(dst + from, bp->b_src_buf + src_from, bytes);
         kunmap(wc->w_this_page);
@@ -861,9 +941,9 @@ out:
   * Returns a negative error code or the number of bytes copied into
   * the page.
   */
-int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
-                         u64 *p_blkno, struct page *page,
-                         struct ocfs2_write_ctxt *wc, int new)
+static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
+                                u64 *p_blkno, struct page *page,
+                                struct ocfs2_write_ctxt *wc, int new)
  {
         int ret, copied = 0;
         unsigned int from = 0, to = 0;
@@ -971,7 +1051,6 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
         u64 v_blkno, p_blkno;
         struct address_space *mapping = file->f_mapping;
         struct inode *inode = mapping->host;
-       unsigned int cbits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
         unsigned long index, start;
         struct page **cpages;
  
@@ -979,13 +1058,11 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
  
         /*
          * Figure out how many pages we'll be manipulating here. For
-        * non-allocating write, or any writes where cluster size is
-        * less than page size, we only need one page. Otherwise,
-        * allocating writes of cluster size larger than page size
-        * need cluster size pages.
+        * non allocating write, we just change the one
+        * page. Otherwise, we'll need a whole clusters worth.
          */
-       if (new && !wc->w_large_pages)
-               numpages = (1 << cbits) / PAGE_SIZE;
+       if (new)
+               numpages = ocfs2_pages_per_cluster(inode->i_sb);
  
         cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
         if (!cpages) {
@@ -1011,7 +1088,7 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
         for(i = 0; i < numpages; i++) {
                 index = start + i;
  
-               cpages[i] = grab_cache_page(mapping, index);
+               cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
                 if (!cpages[i]) {
                         ret = -ENOMEM;
                         mlog_errno(ret);
@@ -1046,7 +1123,8 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
                 }
         }
  
-       ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL);
+       ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
+                                         NULL);
         if (ret < 0) {
  
                 /*
@@ -1150,7 +1228,7 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
          */
         down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL);
+       ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL);
         if (ret) {
                 mlog_errno(ret);
                 goto out_meta;
@@ -1200,7 +1278,7 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
                 i_size_write(inode, pos);
                 mark_inode_dirty(inode);
         }
-       inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+       inode->i_blocks = ocfs2_inode_sector_count(inode);
         di->i_size = cpu_to_le64((u64)i_size_read(inode));
         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);