From: David Chinner Date: Tue, 1 Nov 2005 23:33:05 +0000 (+1100) Subject: [XFS] Introduce two new mount options (nolargeio/largeio) to allow X-Git-Tag: v2.6.15-rc1~450^2~2^2~47 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e8c8b3a79d85c22d3665b97dde843dc4d8d7ae37;p=linux-2.6 [XFS] Introduce two new mount options (nolargeio/largeio) to allow filesystems to expose the filesystem stripe width in stat(2) rather than the page cache size. This allows applications requiring high bandwidth to easily determine the optimum I/O size for the underlying filesystem. The default is to report the page cache size (i.e. "nolargeio"). SGI-PV: 942818 SGI-Modid: xfs-linux:xfs-kern:23830a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index fa87279405..f6f6b6750d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -189,7 +189,7 @@ xfs_revalidate_inode( break; } - inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blksize = xfs_preferred_iosize(mp); inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_blocks = diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 268f45bf6a..61999649ec 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -124,6 +124,7 @@ vn_revalidate_core( inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; inode->i_atime = vap->va_atime; + inode->i_blksize = vap->va_blocksize; if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index c93cb282f3..90d9d56c4d 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h @@ -106,5 +106,7 @@ struct xfs_mount_args { #define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename * symlink,mkdir,rmdir,mknod */ +#define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred + * I/O size in stat() */ #endif /* __XFS_CLNT_H__ */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0653beecf9..b71af184ae 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -421,6 +421,9 @@ typedef struct xfs_mount { * allocation */ #define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */ #define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */ +#define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred + * I/O size in stat() */ + /* * Default minimum read and write sizes. @@ -442,6 +445,30 @@ typedef struct xfs_mount { #define XFS_WSYNC_READIO_LOG 15 /* 32K */ #define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ +/* + * Allow large block sizes to be reported to userspace programs if the + * "largeio" mount option is used. + * + * If compatibility mode is specified, simply return the basic unit of caching + * so that we don't get inefficient read/modify/write I/O from user apps. + * Otherwise.... + * + * If the underlying volume is a stripe, then return the stripe width in bytes + * as the recommended I/O size. It is not a stripe and we've set a default + * buffered I/O size, return that, otherwise return the compat default. + */ +static inline unsigned long +xfs_preferred_iosize(xfs_mount_t *mp) +{ + if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE) + return PAGE_CACHE_SIZE; + return (mp->m_swidth ? + (mp->m_swidth << mp->m_sb.sb_blocklog) : + ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ? + (1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) : + PAGE_CACHE_SIZE)); +} + #define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 9142351df5..7227baee89 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -307,6 +307,9 @@ xfs_start_flags( if (ap->flags & XFSMNT_DIRSYNC) mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (ap->flags & XFSMNT_COMPAT_IOSIZE) + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + /* * no recovery flag requires a read-only mount */ @@ -1645,6 +1648,9 @@ xfs_vget( #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ +#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes + * in stat(). */ STATIC unsigned long suffix_strtoul(const char *cp, char **endp, unsigned int base) @@ -1681,6 +1687,7 @@ xfs_parseargs( int dsunit, dswidth, vol_dsunit, vol_dswidth; int iosize; + args->flags |= XFSMNT_COMPAT_IOSIZE; #if 0 /* XXX: off by default, until some remaining issues ironed out */ args->flags |= XFSMNT_IDELETE; /* default to on */ #endif @@ -1809,6 +1816,10 @@ xfs_parseargs( args->flags &= ~XFSMNT_IDELETE; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { args->flags |= XFSMNT_IDELETE; + } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { + args->flags &= ~XFSMNT_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { + args->flags |= XFSMNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index e2bf2ef58b..8221b11a48 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -181,40 +181,7 @@ xfs_getattr( vap->va_rdev = 0; if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { - -#if 0 - /* Large block sizes confuse various - * user space programs, so letting the - * stripe size through is not a good - * idea for now. - */ - vap->va_blocksize = mp->m_swidth ? - /* - * If the underlying volume is a stripe, then - * return the stripe width in bytes as the - * recommended I/O size. - */ - (mp->m_swidth << mp->m_sb.sb_blocklog) : - /* - * Return the largest of the preferred buffer - * sizes since doing small I/Os into larger - * buffers causes buffers to be decommissioned. - * The value returned is in bytes. - */ - (1 << (int)MAX(mp->m_readio_log, - mp->m_writeio_log)); - -#else - vap->va_blocksize = - /* - * Return the largest of the preferred buffer - * sizes since doing small I/Os into larger - * buffers causes buffers to be decommissioned. - * The value returned is in bytes. - */ - 1 << (int)MAX(mp->m_readio_log, - mp->m_writeio_log); -#endif + vap->va_blocksize = xfs_preferred_iosize(mp); } else { /*