X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Focfs2%2Ffile.c;h=10953a508f2f3dfdd158102b25d367250834d5ff;hb=00977a59b951207d38380c75f03a36829950265c;hp=fe6b795b1a45e89c13326c268490e0902e12bd10;hpb=1fabe1481fac9e01bf8bffa60a2307ef379aa5de;p=linux-2.6 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fe6b795b1a..10953a508f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -66,7 +68,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, - file->f_dentry->d_name.len, file->f_dentry->d_name.name); + file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); spin_lock(&oi->ip_lock); @@ -96,8 +98,8 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, - file->f_dentry->d_name.len, - file->f_dentry->d_name.name); + file->f_path.dentry->d_name.len, + file->f_path.dentry->d_name.name); spin_lock(&oi->ip_lock); if (!--oi->ip_open_count) @@ -134,6 +136,76 @@ bail: return (err < 0) ? -EIO : 0; } +int ocfs2_should_update_atime(struct inode *inode, + struct vfsmount *vfsmnt) +{ + struct timespec now; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return 0; + + if ((inode->i_flags & S_NOATIME) || + ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) + return 0; + + /* + * We can be called with no vfsmnt structure - NFSD will + * sometimes do this. + * + * Note that our action here is different than touch_atime() - + * if we can't tell whether this is a noatime mount, then we + * don't know whether to trust the value of s_atime_quantum. + */ + if (vfsmnt == NULL) + return 0; + + if ((vfsmnt->mnt_flags & MNT_NOATIME) || + ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) + return 0; + + if (vfsmnt->mnt_flags & MNT_RELATIME) { + if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) || + (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0)) + return 1; + + return 0; + } + + now = CURRENT_TIME; + if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) + return 0; + else + return 1; +} + +int ocfs2_update_inode_atime(struct inode *inode, + struct buffer_head *bh) +{ + int ret; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + handle_t *handle; + + mlog_entry_void(); + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (handle == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + inode->i_atime = CURRENT_TIME; + ret = ocfs2_mark_inode_dirty(handle, inode, bh); + if (ret < 0) + mlog_errno(ret); + + ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); +out: + mlog_exit(ret); + return ret; +} + int ocfs2_set_inode_size(handle_t *handle, struct inode *inode, struct buffer_head *fe_bh, @@ -892,6 +964,26 @@ bail: return err; } +int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret; + + mlog_entry_void(); + + ret = ocfs2_meta_lock(inode, NULL, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = generic_permission(inode, mask, NULL); + + ocfs2_meta_unlock(inode, 0); +out: + mlog_exit(ret); + return ret; +} + static int ocfs2_write_remove_suid(struct inode *inode) { int ret; @@ -943,67 +1035,21 @@ out: return ret; } -static inline int ocfs2_write_should_remove_suid(struct inode *inode) -{ - mode_t mode = inode->i_mode; - - if (!capable(CAP_FSETID)) { - if (unlikely(mode & S_ISUID)) - return 1; - - if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) - return 1; - } - return 0; -} - -static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) +static int ocfs2_prepare_inode_for_write(struct dentry *dentry, + loff_t *ppos, + size_t count, + int appending) { - int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; + int ret = 0, meta_level = appending; + struct inode *inode = dentry->d_inode; u32 clusters; - struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_dentry->d_inode; loff_t newsize, saved_pos; - mlog_entry("(0x%p, %u, '%.*s')\n", filp, - (unsigned int)nr_segs, - filp->f_dentry->d_name.len, - filp->f_dentry->d_name.name); - - /* happy write of zero bytes */ - if (iocb->ki_left == 0) - return 0; - - if (!inode) { - mlog(0, "bad inode\n"); - return -EIO; - } - - mutex_lock(&inode->i_mutex); - /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ - if (filp->f_flags & O_DIRECT) { - have_alloc_sem = 1; - down_read(&inode->i_alloc_sem); - } - - /* concurrent O_DIRECT writes are allowed */ - rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; - ret = ocfs2_rw_lock(inode, rw_level); - if (ret < 0) { - rw_level = -1; - mlog_errno(ret); - goto out; - } - /* * We sample i_size under a read level meta lock to see if our write * is extending the file, if it is we back off and get a write level * meta lock. */ - meta_level = (filp->f_flags & O_APPEND) ? 1 : 0; for(;;) { ret = ocfs2_meta_lock(inode, NULL, meta_level); if (ret < 0) { @@ -1021,7 +1067,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ - if (ocfs2_write_should_remove_suid(inode)) { + if (should_remove_suid(dentry)) { if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; @@ -1031,19 +1077,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ret = ocfs2_write_remove_suid(inode); if (ret < 0) { mlog_errno(ret); - goto out; + goto out_unlock; } } /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ - if (filp->f_flags & O_APPEND) { + if (appending) { saved_pos = i_size_read(inode); mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); } else { - saved_pos = iocb->ki_pos; + saved_pos = *ppos; } - newsize = iocb->ki_left + saved_pos; + newsize = count + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, @@ -1076,19 +1122,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (!clusters) break; - ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); + ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); - goto out; + goto out_unlock; } break; } - /* ok, we're done with i_size and alloc work */ - iocb->ki_pos = saved_pos; + if (appending) + *ppos = saved_pos; + +out_unlock: ocfs2_meta_unlock(inode, meta_level); - meta_level = -1; + +out: + return ret; +} + +static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, + loff_t pos) +{ + int ret, rw_level, have_alloc_sem = 0; + struct file *filp = iocb->ki_filp; + struct inode *inode = filp->f_path.dentry->d_inode; + int appending = filp->f_flags & O_APPEND ? 1 : 0; + + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, + filp->f_path.dentry->d_name.len, + filp->f_path.dentry->d_name.name); + + /* happy write of zero bytes */ + if (iocb->ki_left == 0) + return 0; + + mutex_lock(&inode->i_mutex); + /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ + if (filp->f_flags & O_DIRECT) { + have_alloc_sem = 1; + down_read(&inode->i_alloc_sem); + } + + /* concurrent O_DIRECT writes are allowed */ + rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; + ret = ocfs2_rw_lock(inode, rw_level); + if (ret < 0) { + rw_level = -1; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos, + iocb->ki_left, appending); + if (ret < 0) { + mlog_errno(ret); + goto out; + } /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb); @@ -1114,8 +1207,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, } out: - if (meta_level != -1) - ocfs2_meta_unlock(inode, meta_level); if (have_alloc_sem) up_read(&inode->i_alloc_sem); if (rw_level != -1) @@ -1126,19 +1217,90 @@ out: return ret; } +static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, + loff_t *ppos, + size_t len, + unsigned int flags) +{ + int ret; + struct inode *inode = out->f_path.dentry->d_inode; + + mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, + (unsigned int)len, + out->f_path.dentry->d_name.len, + out->f_path.dentry->d_name.name); + + inode_double_lock(inode, pipe->inode); + + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; + } + + /* ok, we're done with i_size and alloc work */ + ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); + +out_unlock: + ocfs2_rw_unlock(inode, 1); +out: + inode_double_unlock(inode, pipe->inode); + + mlog_exit(ret); + return ret; +} + +static ssize_t ocfs2_file_splice_read(struct file *in, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + int ret = 0; + struct inode *inode = in->f_path.dentry->d_inode; + + mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, + (unsigned int)len, + in->f_path.dentry->d_name.len, + in->f_path.dentry->d_name.name); + + /* + * See the comment in ocfs2_file_aio_read() + */ + ret = ocfs2_meta_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + ocfs2_meta_unlock(inode, 0); + + ret = generic_file_splice_read(in, ppos, pipe, len, flags); + +bail: + mlog_exit(ret); + return ret; +} + static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - int ret = 0, rw_level = -1, have_alloc_sem = 0; + int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; mlog_entry("(0x%p, %u, '%.*s')\n", filp, (unsigned int)nr_segs, - filp->f_dentry->d_name.len, - filp->f_dentry->d_name.name); + filp->f_path.dentry->d_name.len, + filp->f_path.dentry->d_name.name); if (!inode) { ret = -EINVAL; @@ -1173,12 +1335,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * like i_size. This allows the checks down below * generic_file_aio_read() a chance of actually working. */ - ret = ocfs2_meta_lock(inode, NULL, 0); + ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } - ocfs2_meta_unlock(inode, 0); + ocfs2_meta_unlock(inode, lock_level); ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) @@ -1206,11 +1368,13 @@ bail: struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .permission = ocfs2_permission, }; struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .permission = ocfs2_permission, }; const struct file_operations ocfs2_fops = { @@ -1224,6 +1388,8 @@ const struct file_operations ocfs2_fops = { .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write, .ioctl = ocfs2_ioctl, + .splice_read = ocfs2_file_splice_read, + .splice_write = ocfs2_file_splice_write, }; const struct file_operations ocfs2_dops = {