Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jgarzi...

[linux-2.6] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index fde8553faa7661677e2210d65620b753eceddaf8..e9c10cd01e1325cff95869cbb0f23aa8d14c10ce 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -25,9 +25,14 @@
  #include <linux/security.h>
  #include <linux/mount.h>
  #include <linux/ramfs.h>
+#include <linux/log2.h>
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
  #include "pnode.h"
+#include "internal.h"
+
+#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
+#define HASH_SIZE (1UL << HASH_SHIFT)
  
  /* spinlock for vfsmount related operations, inplace of dcache_lock */
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
@@ -35,27 +40,25 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
  static int event;
  
  static struct list_head *mount_hashtable __read_mostly;
-static int hash_mask __read_mostly, hash_bits __read_mostly;
  static struct kmem_cache *mnt_cache __read_mostly;
  static struct rw_semaphore namespace_sem;
  
  /* /sys/fs */
-decl_subsys(fs, NULL, NULL);
-EXPORT_SYMBOL_GPL(fs_subsys);
+struct kobject *fs_kobj;
+EXPORT_SYMBOL_GPL(fs_kobj);
  
  static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
  {
         unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
         tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
-       tmp = tmp + (tmp >> hash_bits);
-       return tmp & hash_mask;
+       tmp = tmp + (tmp >> HASH_SHIFT);
+       return tmp & (HASH_SIZE - 1);
  }
  
  struct vfsmount *alloc_vfsmnt(const char *name)
  {
-       struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+       struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
         if (mnt) {
-               memset(mnt, 0, sizeof(struct vfsmount));
                 atomic_set(&mnt->mnt_count, 1);
                 INIT_LIST_HEAD(&mnt->mnt_hash);
                 INIT_LIST_HEAD(&mnt->mnt_child);
@@ -246,7 +249,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
                         list_add(&mnt->mnt_slave, &old->mnt_slave_list);
                         mnt->mnt_master = old;
                         CLEAR_MNT_SHARED(mnt);
-               } else {
+               } else if (!(flag & CL_PRIVATE)) {
                         if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
                                 list_add(&mnt->mnt_share, &old->mnt_share);
                         if (IS_MNT_SLAVE(old))
@@ -321,22 +324,16 @@ EXPORT_SYMBOL(mnt_unpin);
  static void *m_start(struct seq_file *m, loff_t *pos)
  {
         struct mnt_namespace *n = m->private;
-       struct list_head *p;
-       loff_t l = *pos;
  
         down_read(&namespace_sem);
-       list_for_each(p, &n->list)
-               if (!l--)
-                       return list_entry(p, struct vfsmount, mnt_list);
-       return NULL;
+       return seq_list_start(&n->list, *pos);
  }
  
  static void *m_next(struct seq_file *m, void *v, loff_t *pos)
  {
         struct mnt_namespace *n = m->private;
-       struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
-       (*pos)++;
-       return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
+
+       return seq_list_next(v, &n->list, pos);
  }
  
  static void m_stop(struct seq_file *m, void *v)
@@ -351,7 +348,7 @@ static inline void mangle(struct seq_file *m, const char *s)
  
  static int show_vfsmnt(struct seq_file *m, void *v)
  {
-       struct vfsmount *mnt = v;
+       struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
         int err = 0;
         static struct proc_fs_info {
                 int flag;
@@ -368,6 +365,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                 { MNT_NOEXEC, ",noexec" },
                 { MNT_NOATIME, ",noatime" },
                 { MNT_NODIRATIME, ",nodiratime" },
+               { MNT_RELATIME, ",relatime" },
                 { 0, NULL }
         };
         struct proc_fs_info *fs_infop;
@@ -377,6 +375,10 @@ static int show_vfsmnt(struct seq_file *m, void *v)
         seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
         seq_putc(m, ' ');
         mangle(m, mnt->mnt_sb->s_type->name);
+       if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
+               seq_putc(m, '.');
+               mangle(m, mnt->mnt_sb->s_subtype);
+       }
         seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
         for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
                 if (mnt->mnt_sb->s_flags & fs_infop->flag)
@@ -401,7 +403,7 @@ struct seq_operations mounts_op = {
  
  static int show_vfsstat(struct seq_file *m, void *v)
  {
-       struct vfsmount *mnt = v;
+       struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
         int err = 0;
  
         /* device */
@@ -495,7 +497,7 @@ void release_mounts(struct list_head *head)
  {
         struct vfsmount *mnt;
         while (!list_empty(head)) {
-               mnt = list_entry(head->next, struct vfsmount, mnt_hash);
+               mnt = list_first_entry(head, struct vfsmount, mnt_hash);
                 list_del_init(&mnt->mnt_hash);
                 if (mnt->mnt_parent != mnt) {
                         struct dentry *dentry;
@@ -747,6 +749,26 @@ Enomem:
         return NULL;
  }
  
+struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+{
+       struct vfsmount *tree;
+       down_read(&namespace_sem);
+       tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+       up_read(&namespace_sem);
+       return tree;
+}
+
+void drop_collected_mounts(struct vfsmount *mnt)
+{
+       LIST_HEAD(umount_list);
+       down_read(&namespace_sem);
+       spin_lock(&vfsmount_lock);
+       umount_tree(mnt, 0, &umount_list);
+       spin_unlock(&vfsmount_lock);
+       up_read(&namespace_sem);
+       release_mounts(&umount_list);
+}
+
  /*
   *  @source_mnt : mount tree to be attached
   *  @nd         : place the mount tree @source_mnt is attached
@@ -882,6 +904,9 @@ static int do_change_type(struct nameidata *nd, int flag)
         int recurse = flag & MS_REC;
         int type = flag & ~MS_REC;
  
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
         if (nd->dentry != nd->mnt->mnt_root)
                 return -EINVAL;
  
@@ -1173,7 +1198,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
  
         while (!list_empty(graveyard)) {
                 LIST_HEAD(umounts);
-               mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire);
+               mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire);
                 list_del_init(&mnt->mnt_expire);
  
                 /* don't do anything if the namespace is dead - all the
@@ -1405,9 +1430,11 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                 mnt_flags |= MNT_NOATIME;
         if (flags & MS_NODIRATIME)
                 mnt_flags |= MNT_NODIRATIME;
+       if (flags & MS_RELATIME)
+               mnt_flags |= MNT_RELATIME;
  
         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-                  MS_NOATIME | MS_NODIRATIME);
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
  
         /* ... and get the mountpoint */
         retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1439,17 +1466,16 @@ dput_out:
   * Allocate a new namespace structure and populate it with contents
   * copied from the namespace of the passed in task structure.
   */
-struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
+static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                 struct fs_struct *fs)
  {
-       struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
         struct mnt_namespace *new_ns;
         struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
         struct vfsmount *p, *q;
  
         new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
         if (!new_ns)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
  
         atomic_set(&new_ns->count, 1);
         INIT_LIST_HEAD(&new_ns->list);
@@ -1463,7 +1489,7 @@ struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
         if (!new_ns->root) {
                 up_write(&namespace_sem);
                 kfree(new_ns);
-               return NULL;
+               return ERR_PTR(-ENOMEM);;
         }
         spin_lock(&vfsmount_lock);
         list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
@@ -1507,36 +1533,21 @@ struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
         return new_ns;
  }
  
-int copy_mnt_ns(int flags, struct task_struct *tsk)
+struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+               struct fs_struct *new_fs)
  {
-       struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
         struct mnt_namespace *new_ns;
-       int err = 0;
-
-       if (!ns)
-               return 0;
  
+       BUG_ON(!ns);
         get_mnt_ns(ns);
  
         if (!(flags & CLONE_NEWNS))
-               return 0;
-
-       if (!capable(CAP_SYS_ADMIN)) {
-               err = -EPERM;
-               goto out;
-       }
-
-       new_ns = dup_mnt_ns(tsk, tsk->fs);
-       if (!new_ns) {
-               err = -ENOMEM;
-               goto out;
-       }
+               return ns;
  
-       tsk->nsproxy->mnt_ns = new_ns;
+       new_ns = dup_mnt_ns(ns, new_fs);
  
-out:
         put_mnt_ns(ns);
-       return err;
+       return new_ns;
  }
  
  asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
@@ -1803,60 +1814,33 @@ static void __init init_mount_tree(void)
         set_fs_root(current->fs, ns->root, ns->root->mnt_root);
  }
  
-void __init mnt_init(unsigned long mempages)
+void __init mnt_init(void)
  {
-       struct list_head *d;
-       unsigned int nr_hash;
-       int i;
+       unsigned u;
         int err;
  
         init_rwsem(&namespace_sem);
  
         mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-                       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+                       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  
         mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
  
         if (!mount_hashtable)
                 panic("Failed to allocate mount hash table\n");
  
-       /*
-        * Find the power-of-two list-heads that can fit into the allocation..
-        * We don't guarantee that "sizeof(struct list_head)" is necessarily
-        * a power-of-two.
-        */
-       nr_hash = PAGE_SIZE / sizeof(struct list_head);
-       hash_bits = 0;
-       do {
-               hash_bits++;
-       } while ((nr_hash >> hash_bits) != 0);
-       hash_bits--;
+       printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
+
+       for (u = 0; u < HASH_SIZE; u++)
+               INIT_LIST_HEAD(&mount_hashtable[u]);
  
-       /*
-        * Re-calculate the actual number of entries and the mask
-        * from the number of bits we can fit.
-        */
-       nr_hash = 1UL << hash_bits;
-       hash_mask = nr_hash - 1;
-
-       printk("Mount-cache hash table entries: %d\n", nr_hash);
-
-       /* And initialize the newly allocated array */
-       d = mount_hashtable;
-       i = nr_hash;
-       do {
-               INIT_LIST_HEAD(d);
-               d++;
-               i--;
-       } while (i);
         err = sysfs_init();
         if (err)
                 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
                         __FUNCTION__, err);
-       err = subsystem_register(&fs_subsys);
-       if (err)
-               printk(KERN_WARNING "%s: subsystem_register error: %d\n",
-                       __FUNCTION__, err);
+       fs_kobj = kobject_create_and_add("fs", NULL);
+       if (!fs_kobj)
+               printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__);
         init_rootfs();
         init_mount_tree();
  }