X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Focfs2%2Ffile.c;h=10953a508f2f3dfdd158102b25d367250834d5ff;hb=00977a59b951207d38380c75f03a36829950265c;hp=fe6b795b1a45e89c13326c268490e0902e12bd10;hpb=1fabe1481fac9e01bf8bffa60a2307ef379aa5de;p=linux-2.6

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fe6b795b1a..10953a508f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,6 +31,8 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mount.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -66,7 +68,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
-		   file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
 	spin_lock(&oi->ip_lock);
 
@@ -96,8 +98,8 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
-		       file->f_dentry->d_name.len,
-		       file->f_dentry->d_name.name);
+		       file->f_path.dentry->d_name.len,
+		       file->f_path.dentry->d_name.name);
 
 	spin_lock(&oi->ip_lock);
 	if (!--oi->ip_open_count)
@@ -134,6 +136,76 @@ bail:
 	return (err < 0) ? -EIO : 0;
 }
 
+int ocfs2_should_update_atime(struct inode *inode,
+			      struct vfsmount *vfsmnt)
+{
+	struct timespec now;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+		return 0;
+
+	if ((inode->i_flags & S_NOATIME) ||
+	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
+		return 0;
+
+	/*
+	 * We can be called with no vfsmnt structure - NFSD will
+	 * sometimes do this.
+	 *
+	 * Note that our action here is different than touch_atime() -
+	 * if we can't tell whether this is a noatime mount, then we
+	 * don't know whether to trust the value of s_atime_quantum.
+	 */
+	if (vfsmnt == NULL)
+		return 0;
+
+	if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
+	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
+		return 0;
+
+	if (vfsmnt->mnt_flags & MNT_RELATIME) {
+		if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
+		    (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0))
+			return 1;
+
+		return 0;
+	}
+
+	now = CURRENT_TIME;
+	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
+		return 0;
+	else
+		return 1;
+}
+
+int ocfs2_update_inode_atime(struct inode *inode,
+			     struct buffer_head *bh)
+{
+	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	handle_t *handle;
+
+	mlog_entry_void();
+
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+	if (handle == NULL) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	inode->i_atime = CURRENT_TIME;
+	ret = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (ret < 0)
+		mlog_errno(ret);
+
+	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out:
+	mlog_exit(ret);
+	return ret;
+}
+
 int ocfs2_set_inode_size(handle_t *handle,
 			 struct inode *inode,
 			 struct buffer_head *fe_bh,
@@ -892,6 +964,26 @@ bail:
 	return err;
 }
 
+int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	int ret;
+
+	mlog_entry_void();
+
+	ret = ocfs2_meta_lock(inode, NULL, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = generic_permission(inode, mask, NULL);
+
+	ocfs2_meta_unlock(inode, 0);
+out:
+	mlog_exit(ret);
+	return ret;
+}
+
 static int ocfs2_write_remove_suid(struct inode *inode)
 {
 	int ret;
@@ -943,67 +1035,21 @@ out:
 	return ret;
 }
 
-static inline int ocfs2_write_should_remove_suid(struct inode *inode)
-{
-	mode_t mode = inode->i_mode;
-
-	if (!capable(CAP_FSETID)) {
-		if (unlikely(mode & S_ISUID))
-			return 1;
-
-		if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
-			return 1;
-	}
-	return 0;
-}
-
-static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
-				    const struct iovec *iov,
-				    unsigned long nr_segs,
-				    loff_t pos)
+static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+					 loff_t *ppos,
+					 size_t count,
+					 int appending)
 {
-	int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
+	int ret = 0, meta_level = appending;
+	struct inode *inode = dentry->d_inode;
 	u32 clusters;
-	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
 	loff_t newsize, saved_pos;
 
-	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
-		   (unsigned int)nr_segs,
-		   filp->f_dentry->d_name.len,
-		   filp->f_dentry->d_name.name);
-
-	/* happy write of zero bytes */
-	if (iocb->ki_left == 0)
-		return 0;
-
-	if (!inode) {
-		mlog(0, "bad inode\n");
-		return -EIO;
-	}
-
-	mutex_lock(&inode->i_mutex);
-	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
-	if (filp->f_flags & O_DIRECT) {
-		have_alloc_sem = 1;
-		down_read(&inode->i_alloc_sem);
-	}
-
-	/* concurrent O_DIRECT writes are allowed */
-	rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
-	ret = ocfs2_rw_lock(inode, rw_level);
-	if (ret < 0) {
-		rw_level = -1;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	/* 
 	 * We sample i_size under a read level meta lock to see if our write
 	 * is extending the file, if it is we back off and get a write level
 	 * meta lock.
 	 */
-	meta_level = (filp->f_flags & O_APPEND) ? 1 : 0;
 	for(;;) {
 		ret = ocfs2_meta_lock(inode, NULL, meta_level);
 		if (ret < 0) {
@@ -1021,7 +1067,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		 * inode. There's also the dinode i_size state which
 		 * can be lost via setattr during extending writes (we
 		 * set inode->i_size at the end of a write. */
-		if (ocfs2_write_should_remove_suid(inode)) {
+		if (should_remove_suid(dentry)) {
 			if (meta_level == 0) {
 				ocfs2_meta_unlock(inode, meta_level);
 				meta_level = 1;
@@ -1031,19 +1077,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 			ret = ocfs2_write_remove_suid(inode);
 			if (ret < 0) {
 				mlog_errno(ret);
-				goto out;
+				goto out_unlock;
 			}
 		}
 
 		/* work on a copy of ppos until we're sure that we won't have
 		 * to recalculate it due to relocking. */
-		if (filp->f_flags & O_APPEND) {
+		if (appending) {
 			saved_pos = i_size_read(inode);
 			mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
 		} else {
-			saved_pos = iocb->ki_pos;
+			saved_pos = *ppos;
 		}
-		newsize = iocb->ki_left + saved_pos;
+		newsize = count + saved_pos;
 
 		mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
 		     (long long) saved_pos, (long long) newsize,
@@ -1076,19 +1122,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		if (!clusters)
 			break;
 
-		ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left);
+		ret = ocfs2_extend_file(inode, NULL, newsize, count);
 		if (ret < 0) {
 			if (ret != -ENOSPC)
 				mlog_errno(ret);
-			goto out;
+			goto out_unlock;
 		}
 		break;
 	}
 
-	/* ok, we're done with i_size and alloc work */
-	iocb->ki_pos = saved_pos;
+	if (appending)
+		*ppos = saved_pos;
+
+out_unlock:
 	ocfs2_meta_unlock(inode, meta_level);
-	meta_level = -1;
+
+out:
+	return ret;
+}
+
+static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
+				    const struct iovec *iov,
+				    unsigned long nr_segs,
+				    loff_t pos)
+{
+	int ret, rw_level, have_alloc_sem = 0;
+	struct file *filp = iocb->ki_filp;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int appending = filp->f_flags & O_APPEND ? 1 : 0;
+
+	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+		   (unsigned int)nr_segs,
+		   filp->f_path.dentry->d_name.len,
+		   filp->f_path.dentry->d_name.name);
+
+	/* happy write of zero bytes */
+	if (iocb->ki_left == 0)
+		return 0;
+
+	mutex_lock(&inode->i_mutex);
+	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
+	if (filp->f_flags & O_DIRECT) {
+		have_alloc_sem = 1;
+		down_read(&inode->i_alloc_sem);
+	}
+
+	/* concurrent O_DIRECT writes are allowed */
+	rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
+	ret = ocfs2_rw_lock(inode, rw_level);
+	if (ret < 0) {
+		rw_level = -1;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos,
+					    iocb->ki_left, appending);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
 
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb);
@@ -1114,8 +1207,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	}
 
 out:
-	if (meta_level != -1)
-		ocfs2_meta_unlock(inode, meta_level);
 	if (have_alloc_sem)
 		up_read(&inode->i_alloc_sem);
 	if (rw_level != -1) 
@@ -1126,19 +1217,90 @@ out:
 	return ret;
 }
 
+static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
+				       struct file *out,
+				       loff_t *ppos,
+				       size_t len,
+				       unsigned int flags)
+{
+	int ret;
+	struct inode *inode = out->f_path.dentry->d_inode;
+
+	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
+		   (unsigned int)len,
+		   out->f_path.dentry->d_name.len,
+		   out->f_path.dentry->d_name.name);
+
+	inode_double_lock(inode, pipe->inode);
+
+	ret = ocfs2_rw_lock(inode, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	/* ok, we're done with i_size and alloc work */
+	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
+
+out_unlock:
+	ocfs2_rw_unlock(inode, 1);
+out:
+	inode_double_unlock(inode, pipe->inode);
+
+	mlog_exit(ret);
+	return ret;
+}
+
+static ssize_t ocfs2_file_splice_read(struct file *in,
+				      loff_t *ppos,
+				      struct pipe_inode_info *pipe,
+				      size_t len,
+				      unsigned int flags)
+{
+	int ret = 0;
+	struct inode *inode = in->f_path.dentry->d_inode;
+
+	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
+		   (unsigned int)len,
+		   in->f_path.dentry->d_name.len,
+		   in->f_path.dentry->d_name.name);
+
+	/*
+	 * See the comment in ocfs2_file_aio_read()
+	 */
+	ret = ocfs2_meta_lock(inode, NULL, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto bail;
+	}
+	ocfs2_meta_unlock(inode, 0);
+
+	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
+
+bail:
+	mlog_exit(ret);
+	return ret;
+}
+
 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 				   const struct iovec *iov,
 				   unsigned long nr_segs,
 				   loff_t pos)
 {
-	int ret = 0, rw_level = -1, have_alloc_sem = 0;
+	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
 		   (unsigned int)nr_segs,
-		   filp->f_dentry->d_name.len,
-		   filp->f_dentry->d_name.name);
+		   filp->f_path.dentry->d_name.len,
+		   filp->f_path.dentry->d_name.name);
 
 	if (!inode) {
 		ret = -EINVAL;
@@ -1173,12 +1335,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	 * like i_size. This allows the checks down below
 	 * generic_file_aio_read() a chance of actually working. 
 	 */
-	ret = ocfs2_meta_lock(inode, NULL, 0);
+	ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
 	}
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, lock_level);
 
 	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
 	if (ret == -EINVAL)
@@ -1206,11 +1368,13 @@ bail:
 struct inode_operations ocfs2_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
+	.permission	= ocfs2_permission,
 };
 
 struct inode_operations ocfs2_special_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
+	.permission	= ocfs2_permission,
 };
 
 const struct file_operations ocfs2_fops = {
@@ -1224,6 +1388,8 @@ const struct file_operations ocfs2_fops = {
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
 	.ioctl		= ocfs2_ioctl,
+	.splice_read	= ocfs2_file_splice_read,
+	.splice_write	= ocfs2_file_splice_write,
 };
 
 const struct file_operations ocfs2_dops = {