Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6

[linux-2.6] / fs / xfs / linux-2.6 / xfs_super.c
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 0cb149ad65cca78789ca41083a6edbc72ea97bb2..8831d95187904f0ecf4a1aa1ac5b77cc767aec77 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -41,13 +41,17 @@
  #include "xfs_rtalloc.h"
  #include "xfs_error.h"
  #include "xfs_itable.h"
+#include "xfs_fsops.h"
  #include "xfs_rw.h"
  #include "xfs_acl.h"
  #include "xfs_attr.h"
  #include "xfs_buf_item.h"
  #include "xfs_utils.h"
  #include "xfs_vnodeops.h"
+#include "xfs_vfsops.h"
  #include "xfs_version.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
  
  #include <linux/namei.h>
  #include <linux/init.h>
@@ -86,6 +90,435 @@ xfs_args_allocate(
         return args;
  }
  
+#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
+#define MNTOPT_LOGDEV  "logdev"        /* log device */
+#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
+#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
+#define MNTOPT_INO64   "ino64"         /* force inodes into 64-bit range */
+#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
+#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
+#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
+#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
+#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
+#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
+#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
+#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
+#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
+#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
+#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
+                                        * in stat(). */
+#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
+#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
+#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
+#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
+#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
+#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
+#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
+#define MNTOPT_DMAPI   "dmapi"         /* DMI enabled (DMAPI / XDSM) */
+#define MNTOPT_XDSM    "xdsm"          /* DMI enabled (DMAPI / XDSM) */
+#define MNTOPT_DMI     "dmi"           /* DMI enabled (DMAPI / XDSM) */
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+       int     last, shift_left_factor = 0;
+       char    *value = s;
+
+       last = strlen(value) - 1;
+       if (value[last] == 'K' || value[last] == 'k') {
+               shift_left_factor = 10;
+               value[last] = '\0';
+       }
+       if (value[last] == 'M' || value[last] == 'm') {
+               shift_left_factor = 20;
+               value[last] = '\0';
+       }
+       if (value[last] == 'G' || value[last] == 'g') {
+               shift_left_factor = 30;
+               value[last] = '\0';
+       }
+
+       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+STATIC int
+xfs_parseargs(
+       struct xfs_mount        *mp,
+       char                    *options,
+       struct xfs_mount_args   *args,
+       int                     update)
+{
+       char                    *this_char, *value, *eov;
+       int                     dsunit, dswidth, vol_dsunit, vol_dswidth;
+       int                     iosize;
+       int                     dmapi_implies_ikeep = 1;
+
+       args->flags |= XFSMNT_BARRIER;
+       args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+
+       if (!options)
+               goto done;
+
+       iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
+
+       while ((this_char = strsep(&options, ",")) != NULL) {
+               if (!*this_char)
+                       continue;
+               if ((value = strchr(this_char, '=')) != NULL)
+                       *value++ = 0;
+
+               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       args->logbufs = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       args->logbufsize = suffix_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       strncpy(args->logname, value, MAXNAMELEN);
+               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       strncpy(args->mtpt, value, MAXNAMELEN);
+               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       strncpy(args->rtname, value, MAXNAMELEN);
+               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = simple_strtoul(value, &eov, 10);
+                       args->flags |= XFSMNT_IOSIZE;
+                       args->iosizelog = (uint8_t) iosize;
+               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = suffix_strtoul(value, &eov, 10);
+                       args->flags |= XFSMNT_IOSIZE;
+                       args->iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
+                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       mp->m_flags |= XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       mp->m_flags &= ~XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       args->flags |= XFSMNT_WSYNC;
+               } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
+                       args->flags |= XFSMNT_OSYNCISOSYNC;
+               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       args->flags |= XFSMNT_NORECOVERY;
+               } else if (!strcmp(this_char, MNTOPT_INO64)) {
+                       args->flags |= XFSMNT_INO64;
+#if !XFS_BIG_INUMS
+                       cmn_err(CE_WARN,
+                               "XFS: %s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+#endif
+               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       args->flags |= XFSMNT_NOALIGN;
+               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       args->flags |= XFSMNT_SWALLOC;
+               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dsunit = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+                       if (!value || !*value) {
+                               cmn_err(CE_WARN,
+                                       "XFS: %s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dswidth = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       args->flags &= ~XFSMNT_32BITINODES;
+#if !XFS_BIG_INUMS
+                       cmn_err(CE_WARN,
+                               "XFS: %s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+#endif
+               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       args->flags |= XFSMNT_NOUUID;
+               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       args->flags |= XFSMNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       args->flags &= ~XFSMNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       args->flags |= XFSMNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       dmapi_implies_ikeep = 0;
+                       args->flags &= ~XFSMNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       args->flags |= XFSMNT_ATTR2;
+               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       args->flags &= ~XFSMNT_ATTR2;
+               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       args->flags2 |= XFSMNT2_FILESTREAMS;
+               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
+                       args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+                          !strcmp(this_char, MNTOPT_UQUOTA) ||
+                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       args->flags |= XFSMNT_UQUOTA;
+                       args->flags &= ~XFSMNT_UQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       args->flags |= XFSMNT_PQUOTA;
+                       args->flags &= ~XFSMNT_PQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+                       args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       args->flags |= XFSMNT_GQUOTA;
+                       args->flags &= ~XFSMNT_GQUOTAENF;
+               } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
+                       args->flags |= XFSMNT_DMAPI;
+               } else if (!strcmp(this_char, MNTOPT_XDSM)) {
+                       args->flags |= XFSMNT_DMAPI;
+               } else if (!strcmp(this_char, MNTOPT_DMI)) {
+                       args->flags |= XFSMNT_DMAPI;
+               } else if (!strcmp(this_char, "ihashsize")) {
+                       cmn_err(CE_WARN,
+       "XFS: ihashsize no longer used, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisdsync")) {
+                       /* no-op, this is now the default */
+                       cmn_err(CE_WARN,
+       "XFS: osyncisdsync is now the default, option is deprecated.");
+               } else if (!strcmp(this_char, "irixsgid")) {
+                       cmn_err(CE_WARN,
+       "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
+               } else {
+                       cmn_err(CE_WARN,
+                               "XFS: unknown mount option [%s].", this_char);
+                       return EINVAL;
+               }
+       }
+
+       if (args->flags & XFSMNT_NORECOVERY) {
+               if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
+                       cmn_err(CE_WARN,
+                               "XFS: no-recovery mounts must be read-only.");
+                       return EINVAL;
+               }
+       }
+
+       if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+               cmn_err(CE_WARN,
+       "XFS: sunit and swidth options incompatible with the noalign option");
+               return EINVAL;
+       }
+
+       if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+               cmn_err(CE_WARN,
+                       "XFS: cannot mount with both project and group quota");
+               return EINVAL;
+       }
+
+       if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+               printk("XFS: %s option needs the mount point option as well\n",
+                       MNTOPT_DMAPI);
+               return EINVAL;
+       }
+
+       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+               cmn_err(CE_WARN,
+                       "XFS: sunit and swidth must be specified together");
+               return EINVAL;
+       }
+
+       if (dsunit && (dswidth % dsunit != 0)) {
+               cmn_err(CE_WARN,
+       "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)",
+                       dswidth, dsunit);
+               return EINVAL;
+       }
+
+       /*
+        * Applications using DMI filesystems often expect the
+        * inode generation number to be monotonically increasing.
+        * If we delete inode chunks we break this assumption, so
+        * keep unused inode chunks on disk for DMI filesystems
+        * until we come up with a better solution.
+        * Note that if "ikeep" or "noikeep" mount options are
+        * supplied, then they are honored.
+        */
+       if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
+               args->flags |= XFSMNT_IKEEP;
+
+       if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+               if (dsunit) {
+                       args->sunit = dsunit;
+                       args->flags |= XFSMNT_RETERR;
+               } else {
+                       args->sunit = vol_dsunit;
+               }
+               dswidth ? (args->swidth = dswidth) :
+                         (args->swidth = vol_dswidth);
+       } else {
+               args->sunit = args->swidth = 0;
+       }
+
+done:
+       if (args->flags & XFSMNT_32BITINODES)
+               mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+       if (args->flags2)
+               args->flags |= XFSMNT_FLAGS2;
+       return 0;
+}
+
+struct proc_xfs_info {
+       int     flag;
+       char    *str;
+};
+
+STATIC int
+xfs_showargs(
+       struct xfs_mount        *mp,
+       struct seq_file         *m)
+{
+       static struct proc_xfs_info xfs_info_set[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
+               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
+               { XFS_MOUNT_INO64,              "," MNTOPT_INO64 },
+               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
+               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
+               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
+               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
+               { XFS_MOUNT_OSYNCISOSYNC,       "," MNTOPT_OSYNCISOSYNC },
+               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
+               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
+               { XFS_MOUNT_DMAPI,              "," MNTOPT_DMAPI },
+               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
+               { 0, NULL }
+       };
+       static struct proc_xfs_info xfs_info_unset[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
+               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
+               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { 0, NULL }
+       };
+       struct proc_xfs_info    *xfs_infop;
+
+       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+               if (mp->m_flags & xfs_infop->flag)
+                       seq_puts(m, xfs_infop->str);
+       }
+       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+               if (!(mp->m_flags & xfs_infop->flag))
+                       seq_puts(m, xfs_infop->str);
+       }
+
+       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+                               (int)(1 << mp->m_writeio_log) >> 10);
+
+       if (mp->m_logbufs > 0)
+               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+       if (mp->m_logbsize > 0)
+               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+       if (mp->m_logname)
+               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+       if (mp->m_rtname)
+               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+       if (mp->m_dalign > 0)
+               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+       if (mp->m_swidth > 0)
+               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_USRQUOTA);
+       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+       if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_PRJQUOTA);
+       else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_PQUOTANOENF);
+
+       if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_GRPQUOTA);
+       else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_GQUOTANOENF);
+
+       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+               seq_puts(m, "," MNTOPT_NOQUOTA);
+
+       return 0;
+}
  __uint64_t
  xfs_max_file_offset(
         unsigned int            blockshift)
@@ -136,7 +569,7 @@ xfs_set_inodeops(
                 break;
         case S_IFLNK:
                 inode->i_op = &xfs_symlink_inode_operations;
-               if (inode->i_blocks)
+               if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
                         inode->i_mapping->a_ops = &xfs_address_space_operations;
                 break;
         default:
@@ -173,8 +606,6 @@ xfs_revalidate_inode(
  
         inode->i_generation = ip->i_d.di_gen;
         i_size_write(inode, ip->i_d.di_size);
-       inode->i_blocks =
-               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
         inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
         inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
         inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
@@ -204,8 +635,7 @@ void
  xfs_initialize_vnode(
         struct xfs_mount        *mp,
         bhv_vnode_t             *vp,
-       struct xfs_inode        *ip,
-       int                     unlock)
+       struct xfs_inode        *ip)
  {
         struct inode            *inode = vn_to_inode(vp);
  
@@ -221,7 +651,7 @@ xfs_initialize_vnode(
          * second time once the inode is properly set up, and then we can
          * finish our work.
          */
-       if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+       if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) {
                 xfs_revalidate_inode(mp, vp, ip);
                 xfs_set_inodeops(inode);
  
@@ -303,6 +733,14 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                 return;
         }
  
+       if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
+                                       QUEUE_ORDERED_NONE) {
+               xfs_fs_cmn_err(CE_NOTE, mp,
+                 "Disabling barriers, not supported by the underlying device");
+               mp->m_flags &= ~XFS_MOUNT_BARRIER;
+               return;
+       }
+
         if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
                 xfs_fs_cmn_err(CE_NOTE, mp,
                   "Disabling barriers, underlying device is readonly");
@@ -326,6 +764,64 @@ xfs_blkdev_issue_flush(
         blkdev_issue_flush(buftarg->bt_bdev, NULL);
  }
  
+/*
+ * XFS AIL push thread support
+ */
+void
+xfsaild_wakeup(
+       xfs_mount_t             *mp,
+       xfs_lsn_t               threshold_lsn)
+{
+       mp->m_ail.xa_target = threshold_lsn;
+       wake_up_process(mp->m_ail.xa_task);
+}
+
+int
+xfsaild(
+       void    *data)
+{
+       xfs_mount_t     *mp = (xfs_mount_t *)data;
+       xfs_lsn_t       last_pushed_lsn = 0;
+       long            tout = 0;
+
+       while (!kthread_should_stop()) {
+               if (tout)
+                       schedule_timeout_interruptible(msecs_to_jiffies(tout));
+               tout = 1000;
+
+               /* swsusp */
+               try_to_freeze();
+
+               ASSERT(mp->m_log);
+               if (XFS_FORCED_SHUTDOWN(mp))
+                       continue;
+
+               tout = xfsaild_push(mp, &last_pushed_lsn);
+       }
+
+       return 0;
+}      /* xfsaild */
+
+int
+xfsaild_start(
+       xfs_mount_t     *mp)
+{
+       mp->m_ail.xa_target = 0;
+       mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
+       if (IS_ERR(mp->m_ail.xa_task))
+               return -PTR_ERR(mp->m_ail.xa_task);
+       return 0;
+}
+
+void
+xfsaild_stop(
+       xfs_mount_t     *mp)
+{
+       kthread_stop(mp->m_ail.xa_task);
+}
+
+
+
  STATIC struct inode *
  xfs_fs_alloc_inode(
         struct super_block      *sb)
@@ -347,14 +843,13 @@ xfs_fs_destroy_inode(
  
  STATIC void
  xfs_fs_inode_init_once(
-       void                    *vnode,
         kmem_zone_t             *zonep,
-       unsigned long           flags)
+       void                    *vnode)
  {
         inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
  }
  
-STATIC int
+STATIC int __init
  xfs_init_zones(void)
  {
         xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
@@ -403,20 +898,18 @@ xfs_fs_write_inode(
  {
         int                     error = 0, flags = FLUSH_INODE;
  
-       vn_trace_entry(XFS_I(inode), __FUNCTION__,
-                       (inst_t *)__return_address);
+       xfs_itrace_entry(XFS_I(inode));
         if (sync) {
                 filemap_fdatawait(inode->i_mapping);
                 flags |= FLUSH_SYNC;
         }
         error = xfs_inode_flush(XFS_I(inode), flags);
-       if (error == EAGAIN) {
-               if (sync)
-                       error = xfs_inode_flush(XFS_I(inode),
-                                                      flags | FLUSH_LOG);
-               else
-                       error = 0;
-       }
+       /*
+        * if we failed to write out the inode then mark
+        * it dirty again so we'll try again later.
+        */
+       if (error)
+               mark_inode_dirty_sync(inode);
  
         return -error;
  }
@@ -432,8 +925,7 @@ xfs_fs_clear_inode(
          * find an inode with di_mode == 0 but without IGET_CREATE set.
          */
         if (ip) {
-               vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-
+               xfs_itrace_entry(ip);
                 XFS_STATS_INC(vn_rele);
                 XFS_STATS_INC(vn_remove);
                 XFS_STATS_INC(vn_reclaim);
@@ -457,9 +949,9 @@ xfs_fs_clear_inode(
   */
  STATIC void
  xfs_syncd_queue_work(
-       struct bhv_vfs  *vfs,
+       struct xfs_mount *mp,
         void            *data,
-       void            (*syncer)(bhv_vfs_t *, void *))
+       void            (*syncer)(struct xfs_mount *, void *))
  {
         struct bhv_vfs_sync_work *work;
  
@@ -467,11 +959,11 @@ xfs_syncd_queue_work(
         INIT_LIST_HEAD(&work->w_list);
         work->w_syncer = syncer;
         work->w_data = data;
-       work->w_vfs = vfs;
-       spin_lock(&vfs->vfs_sync_lock);
-       list_add_tail(&work->w_list, &vfs->vfs_sync_list);
-       spin_unlock(&vfs->vfs_sync_lock);
-       wake_up_process(vfs->vfs_sync_task);
+       work->w_mount = mp;
+       spin_lock(&mp->m_sync_lock);
+       list_add_tail(&work->w_list, &mp->m_sync_list);
+       spin_unlock(&mp->m_sync_lock);
+       wake_up_process(mp->m_sync_task);
  }
  
  /*
@@ -482,22 +974,22 @@ xfs_syncd_queue_work(
   */
  STATIC void
  xfs_flush_inode_work(
-       bhv_vfs_t       *vfs,
-       void            *inode)
+       struct xfs_mount *mp,
+       void            *arg)
  {
-       filemap_flush(((struct inode *)inode)->i_mapping);
-       iput((struct inode *)inode);
+       struct inode    *inode = arg;
+       filemap_flush(inode->i_mapping);
+       iput(inode);
  }
  
  void
  xfs_flush_inode(
         xfs_inode_t     *ip)
  {
-       struct inode    *inode = vn_to_inode(XFS_ITOV(ip));
-       struct bhv_vfs  *vfs = XFS_MTOVFS(ip->i_mount);
+       struct inode    *inode = ip->i_vnode;
  
         igrab(inode);
-       xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
         delay(msecs_to_jiffies(500));
  }
  
@@ -507,11 +999,12 @@ xfs_flush_inode(
   */
  STATIC void
  xfs_flush_device_work(
-       bhv_vfs_t       *vfs,
-       void            *inode)
+       struct xfs_mount *mp,
+       void            *arg)
  {
-       sync_blockdev(vfs->vfs_super->s_bdev);
-       iput((struct inode *)inode);
+       struct inode    *inode = arg;
+       sync_blockdev(mp->m_super->s_bdev);
+       iput(inode);
  }
  
  void
@@ -519,35 +1012,33 @@ xfs_flush_device(
         xfs_inode_t     *ip)
  {
         struct inode    *inode = vn_to_inode(XFS_ITOV(ip));
-       struct bhv_vfs  *vfs = XFS_MTOVFS(ip->i_mount);
  
         igrab(inode);
-       xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
         delay(msecs_to_jiffies(500));
         xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
  }
  
  STATIC void
-vfs_sync_worker(
-       bhv_vfs_t       *vfsp,
+xfs_sync_worker(
+       struct xfs_mount *mp,
         void            *unused)
  {
         int             error;
  
-       if (!(vfsp->vfs_flag & VFS_RDONLY))
-               error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
-                                       SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
-                                       NULL);
-       vfsp->vfs_sync_seq++;
-       wake_up(&vfsp->vfs_wait_single_sync_task);
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+               error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR |
+                                    SYNC_REFCACHE | SYNC_SUPER);
+       mp->m_sync_seq++;
+       wake_up(&mp->m_wait_single_sync_task);
  }
  
  STATIC int
  xfssyncd(
         void                    *arg)
  {
+       struct xfs_mount        *mp = arg;
         long                    timeleft;
-       bhv_vfs_t               *vfsp = (bhv_vfs_t *) arg;
         bhv_vfs_sync_work_t     *work, *n;
         LIST_HEAD               (tmp);
  
@@ -557,31 +1048,31 @@ xfssyncd(
                 timeleft = schedule_timeout_interruptible(timeleft);
                 /* swsusp */
                 try_to_freeze();
-               if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
+               if (kthread_should_stop() && list_empty(&mp->m_sync_list))
                         break;
  
-               spin_lock(&vfsp->vfs_sync_lock);
+               spin_lock(&mp->m_sync_lock);
                 /*
                  * We can get woken by laptop mode, to do a sync -
                  * that's the (only!) case where the list would be
                  * empty with time remaining.
                  */
-               if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+               if (!timeleft || list_empty(&mp->m_sync_list)) {
                         if (!timeleft)
                                 timeleft = xfs_syncd_centisecs *
                                                         msecs_to_jiffies(10);
-                       INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
-                       list_add_tail(&vfsp->vfs_sync_work.w_list,
-                                       &vfsp->vfs_sync_list);
+                       INIT_LIST_HEAD(&mp->m_sync_work.w_list);
+                       list_add_tail(&mp->m_sync_work.w_list,
+                                       &mp->m_sync_list);
                 }
-               list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+               list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
                         list_move(&work->w_list, &tmp);
-               spin_unlock(&vfsp->vfs_sync_lock);
+               spin_unlock(&mp->m_sync_lock);
  
                 list_for_each_entry_safe(work, n, &tmp, w_list) {
-                       (*work->w_syncer)(vfsp, work->w_data);
+                       (*work->w_syncer)(mp, work->w_data);
                         list_del(&work->w_list);
-                       if (work == &vfsp->vfs_sync_work)
+                       if (work == &mp->m_sync_work)
                                 continue;
                         kmem_free(work, sizeof(struct bhv_vfs_sync_work));
                 }
@@ -590,41 +1081,19 @@ xfssyncd(
         return 0;
  }
  
-STATIC int
-xfs_fs_start_syncd(
-       bhv_vfs_t               *vfsp)
-{
-       vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
-       vfsp->vfs_sync_work.w_vfs = vfsp;
-       vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
-       if (IS_ERR(vfsp->vfs_sync_task))
-               return -PTR_ERR(vfsp->vfs_sync_task);
-       return 0;
-}
-
-STATIC void
-xfs_fs_stop_syncd(
-       bhv_vfs_t               *vfsp)
-{
-       kthread_stop(vfsp->vfs_sync_task);
-}
-
  STATIC void
  xfs_fs_put_super(
         struct super_block      *sb)
  {
-       bhv_vfs_t               *vfsp = vfs_from_sb(sb);
+       struct xfs_mount        *mp = XFS_M(sb);
         int                     error;
  
-       xfs_fs_stop_syncd(vfsp);
-       bhv_vfs_sync(vfsp, SYNC_ATTR | SYNC_DELWRI, NULL);
-       error = bhv_vfs_unmount(vfsp, 0, NULL);
-       if (error) {
+       kthread_stop(mp->m_sync_task);
+
+       xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
+       error = xfs_unmount(mp, 0, NULL);
+       if (error)
                 printk("XFS: unmount got error=%d\n", error);
-               printk("%s: vfs=0x%p left dangling!\n", __FUNCTION__, vfsp);
-       } else {
-               vfs_deallocate(vfsp);
-       }
  }
  
  STATIC void
@@ -632,7 +1101,7 @@ xfs_fs_write_super(
         struct super_block      *sb)
  {
         if (!(sb->s_flags & MS_RDONLY))
-               bhv_vfs_sync(vfs_from_sb(sb), SYNC_FSDATA, NULL);
+               xfs_sync(XFS_M(sb), SYNC_FSDATA);
         sb->s_dirt = 0;
  }
  
@@ -641,11 +1110,23 @@ xfs_fs_sync_super(
         struct super_block      *sb,
         int                     wait)
  {
-       bhv_vfs_t               *vfsp = vfs_from_sb(sb);
+       struct xfs_mount        *mp = XFS_M(sb);
         int                     error;
         int                     flags;
  
-       if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
+       /*
+        * Treat a sync operation like a freeze.  This is to work
+        * around a race in sync_inodes() which works in two phases
+        * - an asynchronous flush, which can write out an inode
+        * without waiting for file size updates to complete, and a
+        * synchronous flush, which wont do anything because the
+        * async flush removed the inode's dirty flag.  Also
+        * sync_inodes() will not see any files that just have
+        * outstanding transactions to be flushed because we don't
+        * dirty the Linux inode until after the transaction I/O
+        * completes.
+        */
+       if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
                 /*
                  * First stage of freeze - no more writers will make progress
                  * now we are here, so we flush delwri and delalloc buffers
@@ -656,28 +1137,28 @@ xfs_fs_sync_super(
                  */
                 flags = SYNC_DATA_QUIESCE;
         } else
-               flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
+               flags = SYNC_FSDATA;
  
-       error = bhv_vfs_sync(vfsp, flags, NULL);
+       error = xfs_sync(mp, flags);
         sb->s_dirt = 0;
  
         if (unlikely(laptop_mode)) {
-               int     prev_sync_seq = vfsp->vfs_sync_seq;
+               int     prev_sync_seq = mp->m_sync_seq;
  
                 /*
                  * The disk must be active because we're syncing.
                  * We schedule xfssyncd now (now that the disk is
                  * active) instead of later (when it might not be).
                  */
-               wake_up_process(vfsp->vfs_sync_task);
+               wake_up_process(mp->m_sync_task);
                 /*
                  * We have to wait for the sync iteration to complete.
                  * If we don't, the disk activity caused by the sync
                  * will come after the sync is completed, and that
                  * triggers another sync from laptop mode.
                  */
-               wait_event(vfsp->vfs_wait_single_sync_task,
-                               vfsp->vfs_sync_seq != prev_sync_seq);
+               wait_event(mp->m_wait_single_sync_task,
+                               mp->m_sync_seq != prev_sync_seq);
         }
  
         return -error;
@@ -688,8 +1169,44 @@ xfs_fs_statfs(
         struct dentry           *dentry,
         struct kstatfs          *statp)
  {
-       return -bhv_vfs_statvfs(vfs_from_sb(dentry->d_sb), statp,
-                               vn_from_inode(dentry->d_inode));
+       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
+       xfs_sb_t                *sbp = &mp->m_sb;
+       __uint64_t              fakeinos, id;
+       xfs_extlen_t            lsize;
+
+       statp->f_type = XFS_SB_MAGIC;
+       statp->f_namelen = MAXNAMELEN - 1;
+
+       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+       statp->f_fsid.val[0] = (u32)id;
+       statp->f_fsid.val[1] = (u32)(id >> 32);
+
+       xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+
+       spin_lock(&mp->m_sb_lock);
+       statp->f_bsize = sbp->sb_blocksize;
+       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+       statp->f_blocks = sbp->sb_dblocks - lsize;
+       statp->f_bfree = statp->f_bavail =
+                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       fakeinos = statp->f_bfree << sbp->sb_inopblog;
+#if XFS_BIG_INUMS
+       fakeinos += mp->m_inoadd;
+#endif
+       statp->f_files =
+           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       if (mp->m_maxicount)
+#if XFS_BIG_INUMS
+               if (!mp->m_inoadd)
+#endif
+                       statp->f_files = min_t(typeof(statp->f_files),
+                                               statp->f_files,
+                                               mp->m_maxicount);
+       statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       spin_unlock(&mp->m_sb_lock);
+
+       XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+       return 0;
  }
  
  STATIC int
@@ -698,22 +1215,30 @@ xfs_fs_remount(
         int                     *flags,
         char                    *options)
  {
-       bhv_vfs_t               *vfsp = vfs_from_sb(sb);
+       struct xfs_mount        *mp = XFS_M(sb);
         struct xfs_mount_args   *args = xfs_args_allocate(sb, 0);
         int                     error;
  
-       error = bhv_vfs_parseargs(vfsp, options, args, 1);
+       error = xfs_parseargs(mp, options, args, 1);
         if (!error)
-               error = bhv_vfs_mntupdate(vfsp, flags, args);
+               error = xfs_mntupdate(mp, flags, args);
         kmem_free(args, sizeof(*args));
         return -error;
  }
  
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of themetadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
  STATIC void
  xfs_fs_lockfs(
         struct super_block      *sb)
  {
-       bhv_vfs_freeze(vfs_from_sb(sb));
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_attr_quiesce(mp);
+       xfs_fs_log_dummy(mp);
  }
  
  STATIC int
@@ -721,7 +1246,7 @@ xfs_fs_show_options(
         struct seq_file         *m,
         struct vfsmount         *mnt)
  {
-       return -bhv_vfs_showargs(vfs_from_sb(mnt->mnt_sb), m);
+       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
  }
  
  STATIC int
@@ -782,45 +1307,48 @@ xfs_fs_fill_super(
         int                     silent)
  {
         struct inode            *rootvp;
-       struct bhv_vfs          *vfsp = vfs_allocate(sb);
+       struct xfs_mount        *mp = NULL;
         struct xfs_mount_args   *args = xfs_args_allocate(sb, silent);
-       struct kstatfs          statvfs;
         int                     error;
  
-       bhv_insert_all_vfsops(vfsp);
+       mp = xfs_mount_init();
  
-       error = bhv_vfs_parseargs(vfsp, (char *)data, args, 0);
-       if (error) {
-               bhv_remove_all_vfsops(vfsp, 1);
+       INIT_LIST_HEAD(&mp->m_sync_list);
+       spin_lock_init(&mp->m_sync_lock);
+       init_waitqueue_head(&mp->m_wait_single_sync_task);
+
+       mp->m_super = sb;
+       sb->s_fs_info = mp;
+
+       if (sb->s_flags & MS_RDONLY)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+
+       error = xfs_parseargs(mp, (char *)data, args, 0);
+       if (error)
                 goto fail_vfsop;
-       }
  
         sb_min_blocksize(sb, BBSIZE);
         sb->s_export_op = &xfs_export_operations;
         sb->s_qcop = &xfs_quotactl_operations;
         sb->s_op = &xfs_super_operations;
  
-       error = bhv_vfs_mount(vfsp, args, NULL);
-       if (error) {
-               bhv_remove_all_vfsops(vfsp, 1);
-               goto fail_vfsop;
-       }
-
-       error = bhv_vfs_statvfs(vfsp, &statvfs, NULL);
+       error = xfs_mount(mp, args, NULL);
         if (error)
-               goto fail_unmount;
+               goto fail_vfsop;
  
         sb->s_dirt = 1;
-       sb->s_magic = statvfs.f_type;
-       sb->s_blocksize = statvfs.f_bsize;
-       sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+       sb->s_magic = XFS_SB_MAGIC;
+       sb->s_blocksize = mp->m_sb.sb_blocksize;
+       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
         sb->s_time_gran = 1;
         set_posix_acl_flag(sb);
  
-       error = bhv_vfs_root(vfsp, &rootvp);
-       if (error)
+       rootvp = igrab(mp->m_rootip->i_vnode);
+       if (!rootvp) {
+               error = ENOENT;
                 goto fail_unmount;
+       }
  
         sb->s_root = d_alloc_root(vn_to_inode(rootvp));
         if (!sb->s_root) {
@@ -831,10 +1359,16 @@ xfs_fs_fill_super(
                 error = EINVAL;
                 goto fail_vnrele;
         }
-       if ((error = xfs_fs_start_syncd(vfsp)))
+
+       mp->m_sync_work.w_syncer = xfs_sync_worker;
+       mp->m_sync_work.w_mount = mp;
+       mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+       if (IS_ERR(mp->m_sync_task)) {
+               error = -PTR_ERR(mp->m_sync_task);
                 goto fail_vnrele;
-       vn_trace_exit(XFS_I(sb->s_root->d_inode), __FUNCTION__,
-                       (inst_t *)__return_address);
+       }
+
+       xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
  
         kmem_free(args, sizeof(*args));
         return 0;
@@ -848,10 +1382,9 @@ fail_vnrele:
         }
  
  fail_unmount:
-       bhv_vfs_unmount(vfsp, 0, NULL);
+       xfs_unmount(mp, 0, NULL);
  
  fail_vfsop:
-       vfs_deallocate(vfsp);
         kmem_free(args, sizeof(*args));
         return -error;
  }