#include <linux/pagemap.h>
#include <asm/byteorder.h>
#include <linux/swap.h>
+#include <linux/pipe_fs_i.h>
#define MLOG_MASK_PREFIX ML_FILE_IO
#include <cluster/masklog.h>
if (!OCFS2_IS_VALID_DINODE(fe)) {
mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)fe->i_blkno, 7, fe->i_signature);
+ (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
+ fe->i_signature);
goto bail;
}
struct buffer_head *bh_result, int create)
{
int err = 0;
+ unsigned int ext_flags;
u64 p_blkno, past_eof;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
goto bail;
}
- err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL);
+ err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL,
+ &ext_flags);
if (err) {
mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
"%llu, NULL)\n", err, inode, (unsigned long long)iblock,
"ino %lu, iblock %llu\n", inode->i_ino,
(unsigned long long)iblock);
- if (p_blkno)
+ /* Treat the unwritten extent as a hole for zeroing purposes. */
+ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
map_bh(bh_result, inode->i_sb, p_blkno);
if (!ocfs2_sparse_alloc(osb)) {
goto out;
}
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) {
+ ret = AOP_TRUNCATED_PAGE;
+ goto out_meta_unlock;
+ }
/*
* i_size might have just been updated as we grabed the meta lock. We
* XXX sys_readahead() seems to get that wrong?
*/
if (start >= i_size_read(inode)) {
- char *addr = kmap(page);
- memset(addr, 0, PAGE_SIZE);
- flush_dcache_page(page);
- kunmap(page);
+ zero_user_page(page, 0, PAGE_SIZE, KM_USER0);
SetPageUptodate(page);
ret = 0;
goto out_alloc;
ocfs2_data_unlock(inode, 0);
out_alloc:
up_read(&OCFS2_I(inode)->ip_alloc_sem);
+out_meta_unlock:
ocfs2_meta_unlock(inode, 0);
out:
if (unlock)
* functionality yet, but IMHO it's better to cut and paste the whole
* thing so we can avoid introducing our own bugs (and easily pick up
* their fixes when they happen) --Mark */
-static int walk_page_buffers( handle_t *handle,
- struct buffer_head *head,
- unsigned from,
- unsigned to,
- int *partial,
- int (*fn)( handle_t *handle,
- struct buffer_head *bh))
+int walk_page_buffers( handle_t *handle,
+ struct buffer_head *head,
+ unsigned from,
+ unsigned to,
+ int *partial,
+ int (*fn)( handle_t *handle,
+ struct buffer_head *bh))
{
struct buffer_head *bh;
unsigned block_start, block_end;
down_read(&OCFS2_I(inode)->ip_alloc_sem);
}
- err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL);
+ err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL);
if (!INODE_JOURNAL(inode)) {
up_read(&OCFS2_I(inode)->ip_alloc_sem);
struct buffer_head *bh_result, int create)
{
int ret;
- u64 p_blkno, inode_blocks;
- int contig_blocks;
+ u64 p_blkno, inode_blocks, contig_blocks;
+ unsigned int ext_flags;
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
/* This figures out the size of the next contiguous block, and
* our logical offset */
ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
- &contig_blocks);
+ &contig_blocks, &ext_flags);
if (ret) {
mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
(unsigned long long)iblock);
/*
* get_more_blocks() expects us to describe a hole by clearing
* the mapped bit on bh_result().
+ *
+ * Consider an unwritten extent as a hole.
*/
- if (p_blkno)
+ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
map_bh(bh_result, inode->i_sb, p_blkno);
else {
/*
void *private)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+ int level;
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+
ocfs2_iocb_clear_rw_locked(iocb);
- up_read(&inode->i_alloc_sem);
- ocfs2_rw_unlock(inode, 0);
+
+ level = ocfs2_iocb_rw_locked_level(iocb);
+ if (!level)
+ up_read(&inode->i_alloc_sem);
+ ocfs2_rw_unlock(inode, level);
}
/*
*
* This will also skip zeroing, which is handled externally.
*/
-static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
- struct inode *inode, unsigned int from,
- unsigned int to, int new)
+int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+ struct inode *inode, unsigned int from,
+ unsigned int to, int new)
{
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
* Ignore blocks outside of our i/o range -
* they may belong to unallocated clusters.
*/
- if (block_start >= to ||
- (block_start + bsize) <= from) {
+ if (block_start >= to || block_end <= from) {
if (PageUptodate(page))
set_buffer_uptodate(bh);
continue;
return ret;
}
+/*
+ * This will copy user data from the buffer page in the splice
+ * context.
+ *
+ * For now, we ignore SPLICE_F_MOVE as that would require some extra
+ * communication out all the way to ocfs2_write().
+ */
+int ocfs2_map_and_write_splice_data(struct inode *inode,
+ struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+ unsigned int *ret_from, unsigned int *ret_to)
+{
+ int ret;
+ unsigned int to, from, cluster_start, cluster_end;
+ char *src, *dst;
+ struct ocfs2_splice_write_priv *sp = wc->w_private;
+ struct pipe_buffer *buf = sp->s_buf;
+ unsigned long bytes, src_from;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+ &cluster_end);
+
+ from = sp->s_offset;
+ src_from = sp->s_buf_offset;
+ bytes = wc->w_count;
+
+ if (wc->w_large_pages) {
+ /*
+ * For cluster size < page size, we have to
+ * calculate pos within the cluster and obey
+ * the rightmost boundary.
+ */
+ bytes = min(bytes, (unsigned long)(osb->s_clustersize
+ - (wc->w_pos & (osb->s_clustersize - 1))));
+ }
+ to = from + bytes;
+
+ BUG_ON(from > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from < cluster_start);
+ BUG_ON(to > cluster_end);
+
+ if (wc->w_this_page_new)
+ ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+ cluster_start, cluster_end, 1);
+ else
+ ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+ from, to, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ src = buf->ops->map(sp->s_pipe, buf, 1);
+ dst = kmap_atomic(wc->w_this_page, KM_USER1);
+ memcpy(dst + from, src + src_from, bytes);
+ kunmap_atomic(wc->w_this_page, KM_USER1);
+ buf->ops->unmap(sp->s_pipe, buf, src);
+
+ wc->w_finished_copy = 1;
+
+ *ret_from = from;
+ *ret_to = to;
+out:
+
+ return bytes ? (unsigned int)bytes : ret;
+}
+
/*
* This will copy user data from the iovec in the buffered write
* context.
to = from + bytes;
+ BUG_ON(from > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from < cluster_start);
+ BUG_ON(to > cluster_end);
+
if (wc->w_this_page_new)
ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
cluster_start, cluster_end, 1);
goto out;
}
- BUG_ON(from > PAGE_CACHE_SIZE);
- BUG_ON(to > PAGE_CACHE_SIZE);
- BUG_ON(from > osb->s_clustersize);
- BUG_ON(to > osb->s_clustersize);
-
dst = kmap(wc->w_this_page);
memcpy(dst + from, bp->b_src_buf + src_from, bytes);
kunmap(wc->w_this_page);
* Returns a negative error code or the number of bytes copied into
* the page.
*/
-int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
- u64 *p_blkno, struct page *page,
- struct ocfs2_write_ctxt *wc, int new)
+static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
+ u64 *p_blkno, struct page *page,
+ struct ocfs2_write_ctxt *wc, int new)
{
int ret, copied = 0;
unsigned int from = 0, to = 0;
u64 v_blkno, p_blkno;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- unsigned int cbits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
unsigned long index, start;
struct page **cpages;
/*
* Figure out how many pages we'll be manipulating here. For
- * non-allocating write, or any writes where cluster size is
- * less than page size, we only need one page. Otherwise,
- * allocating writes of cluster size larger than page size
- * need cluster size pages.
+ * non allocating write, we just change the one
+ * page. Otherwise, we'll need a whole clusters worth.
*/
- if (new && !wc->w_large_pages)
- numpages = (1 << cbits) / PAGE_SIZE;
+ if (new)
+ numpages = ocfs2_pages_per_cluster(inode->i_sb);
cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
if (!cpages) {
for(i = 0; i < numpages; i++) {
index = start + i;
- cpages[i] = grab_cache_page(mapping, index);
+ cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
if (!cpages[i]) {
ret = -ENOMEM;
mlog_errno(ret);
}
}
- ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL);
+ ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
+ NULL);
if (ret < 0) {
/*
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL);
+ ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL);
if (ret) {
mlog_errno(ret);
goto out_meta;
i_size_write(inode, pos);
mark_inode_dirty(inode);
}
- inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+ inode->i_blocks = ocfs2_inode_sector_count(inode);
di->i_size = cpu_to_le64((u64)i_size_read(inode));
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);