extern int __init init_rootfs(void);
-#define CL_EXPIRE 0x01
-
#ifdef CONFIG_SYSFS
extern int __init sysfs_init(void);
#else
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
INIT_LIST_HEAD(&mnt->mnt_expire);
+ INIT_LIST_HEAD(&mnt->mnt_share);
+ INIT_LIST_HEAD(&mnt->mnt_slave_list);
+ INIT_LIST_HEAD(&mnt->mnt_slave);
if (name) {
int size = strlen(name) + 1;
char *newname = kmalloc(size, GFP_KERNEL);
}
/*
- * Now, lookup_mnt increments the ref count before returning
- * the vfsmount struct.
+ * find the first or last mount at @dentry on vfsmount @mnt depending on
+ * @dir. If @dir is set return the first mount else return the last mount.
*/
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
+ int dir)
{
struct list_head *head = mount_hashtable + hash(mnt, dentry);
struct list_head *tmp = head;
struct vfsmount *p, *found = NULL;
- spin_lock(&vfsmount_lock);
for (;;) {
- tmp = tmp->next;
+ tmp = dir ? tmp->next : tmp->prev;
p = NULL;
if (tmp == head)
break;
p = list_entry(tmp, struct vfsmount, mnt_hash);
if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
- found = mntget(p);
+ found = p;
break;
}
}
- spin_unlock(&vfsmount_lock);
return found;
}
+/*
+ * lookup_mnt increments the ref count before returning
+ * the vfsmount struct.
+ */
+struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+{
+ struct vfsmount *child_mnt;
+ spin_lock(&vfsmount_lock);
+ if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+ mntget(child_mnt);
+ spin_unlock(&vfsmount_lock);
+ return child_mnt;
+}
+
static inline int check_mnt(struct vfsmount *mnt)
{
return mnt->mnt_namespace == current->namespace;
old_nd->dentry->d_mounted--;
}
+void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
+ struct vfsmount *child_mnt)
+{
+ child_mnt->mnt_parent = mntget(mnt);
+ child_mnt->mnt_mountpoint = dget(dentry);
+ dentry->d_mounted++;
+}
+
static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
{
- mnt->mnt_parent = mntget(nd->mnt);
- mnt->mnt_mountpoint = dget(nd->dentry);
- list_add(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry));
+ mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
+ list_add_tail(&mnt->mnt_hash, mount_hashtable +
+ hash(nd->mnt, nd->dentry));
list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
- nd->dentry->d_mounted++;
+}
+
+/*
+ * the caller must hold vfsmount_lock
+ */
+static void commit_tree(struct vfsmount *mnt)
+{
+ struct vfsmount *parent = mnt->mnt_parent;
+ struct vfsmount *m;
+ LIST_HEAD(head);
+ struct namespace *n = parent->mnt_namespace;
+
+ BUG_ON(parent == mnt);
+
+ list_add_tail(&head, &mnt->mnt_list);
+ list_for_each_entry(m, &head, mnt_list)
+ m->mnt_namespace = n;
+ list_splice(&head, n->list.prev);
+
+ list_add_tail(&mnt->mnt_hash, mount_hashtable +
+ hash(parent, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ touch_namespace(n);
}
static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
return list_entry(next, struct vfsmount, mnt_child);
}
+static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
+{
+ struct list_head *prev = p->mnt_mounts.prev;
+ while (prev != &p->mnt_mounts) {
+ p = list_entry(prev, struct vfsmount, mnt_child);
+ prev = p->mnt_mounts.prev;
+ }
+ return p;
+}
+
static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
int flag)
{
mnt->mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
- mnt->mnt_namespace = current->namespace;
+
+ if (flag & CL_SLAVE) {
+ list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+ mnt->mnt_master = old;
+ CLEAR_MNT_SHARED(mnt);
+ } else {
+ if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
+ list_add(&mnt->mnt_share, &old->mnt_share);
+ if (IS_MNT_SLAVE(old))
+ list_add(&mnt->mnt_slave, &old->mnt_slave);
+ mnt->mnt_master = old->mnt_master;
+ }
+ if (flag & CL_MAKE_SHARED)
+ set_mnt_shared(mnt);
/* stick the duplicate mount on the same expiry list
* as the original if that was on one */
*/
int may_umount(struct vfsmount *mnt)
{
- if (atomic_read(&mnt->mnt_count) > 2)
- return -EBUSY;
- return 0;
+ int ret = 0;
+ spin_lock(&vfsmount_lock);
+ if (propagate_mount_busy(mnt, 2))
+ ret = -EBUSY;
+ spin_unlock(&vfsmount_lock);
+ return ret;
}
EXPORT_SYMBOL(may_umount);
-static void release_mounts(struct list_head *head)
+void release_mounts(struct list_head *head)
{
struct vfsmount *mnt;
while(!list_empty(head)) {
}
}
-static void umount_tree(struct vfsmount *mnt, struct list_head *kill)
+void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
{
struct vfsmount *p;
list_add(&p->mnt_hash, kill);
}
+ if (propagate)
+ propagate_umount(kill);
+
list_for_each_entry(p, kill, mnt_hash) {
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
list_del_init(&p->mnt_child);
if (p->mnt_parent != p)
mnt->mnt_mountpoint->d_mounted--;
+ change_mnt_propagation(p, MS_PRIVATE);
}
}
event++;
retval = -EBUSY;
- if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
+ if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, &umount_list);
+ umount_tree(mnt, 1, &umount_list);
retval = 0;
}
spin_unlock(&vfsmount_lock);
if (current->uid != nd->dentry->d_inode->i_uid)
return -EPERM;
}
- if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
+ if (vfs_permission(nd, MAY_WRITE))
return -EPERM;
return 0;
#endif
}
}
-static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
+struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
int flag)
{
struct vfsmount *res, *p, *q, *r, *s;
struct nameidata nd;
+ if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
+ return NULL;
+
res = q = clone_mnt(mnt, dentry, flag);
if (!q)
goto Enomem;
continue;
for (s = r; s; s = next_mnt(s, r)) {
+ if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
+ s = skip_mnt_tree(s);
+ continue;
+ }
while (p != s->mnt_parent) {
p = p->mnt_parent;
q = q->mnt_parent;
if (res) {
LIST_HEAD(umount_list);
spin_lock(&vfsmount_lock);
- umount_tree(res, &umount_list);
+ umount_tree(res, 0, &umount_list);
spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
return NULL;
}
+/*
+ * @source_mnt : mount tree to be attached
+ * @nd : place the mount tree @source_mnt is attached
+ * @parent_nd : if non-null, detach the source_mnt from its parent and
+ * store the parent mount and mountpoint dentry.
+ * (done when source_mnt is moved)
+ *
+ * NOTE: in the table below explains the semantics when a source mount
+ * of a given type is attached to a destination mount of a given type.
+ * ---------------------------------------------------------------------------
+ * | BIND MOUNT OPERATION |
+ * |**************************************************************************
+ * | source-->| shared | private | slave | unbindable |
+ * | dest | | | | |
+ * | | | | | | |
+ * | v | | | | |
+ * |**************************************************************************
+ * | shared | shared (++) | shared (+) | shared(+++)| invalid |
+ * | | | | | |
+ * |non-shared| shared (+) | private | slave (*) | invalid |
+ * ***************************************************************************
+ * A bind operation clones the source mount and mounts the clone on the
+ * destination mount.
+ *
+ * (++) the cloned mount is propagated to all the mounts in the propagation
+ * tree of the destination mount and the cloned mount is added to
+ * the peer group of the source mount.
+ * (+) the cloned mount is created under the destination mount and is marked
+ * as shared. The cloned mount is added to the peer group of the source
+ * mount.
+ * (+++) the mount is propagated to all the mounts in the propagation tree
+ * of the destination mount and the cloned mount is made slave
+ * of the same master as that of the source mount. The cloned mount
+ * is marked as 'shared and slave'.
+ * (*) the cloned mount is made a slave of the same master as that of the
+ * source mount.
+ *
+ * ---------------------------------------------------------------------------
+ * | MOVE MOUNT OPERATION |
+ * |**************************************************************************
+ * | source-->| shared | private | slave | unbindable |
+ * | dest | | | | |
+ * | | | | | | |
+ * | v | | | | |
+ * |**************************************************************************
+ * | shared | shared (+) | shared (+) | shared(+++) | invalid |
+ * | | | | | |
+ * |non-shared| shared (+*) | private | slave (*) | unbindable |
+ * ***************************************************************************
+ *
+ * (+) the mount is moved to the destination. And is then propagated to
+ * all the mounts in the propagation tree of the destination mount.
+ * (+*) the mount is moved to the destination.
+ * (+++) the mount is moved to the destination and is then propagated to
+ * all the mounts belonging to the destination mount's propagation tree.
+ * the mount is marked as 'shared and slave'.
+ * (*) the mount continues to be a slave at the new location.
+ *
+ * if the source mount is a tree, the operations explained above is
+ * applied to each mount in the tree.
+ * Must be called without spinlocks held, since this function can sleep
+ * in allocations.
+ */
+static int attach_recursive_mnt(struct vfsmount *source_mnt,
+ struct nameidata *nd, struct nameidata *parent_nd)
+{
+ LIST_HEAD(tree_list);
+ struct vfsmount *dest_mnt = nd->mnt;
+ struct dentry *dest_dentry = nd->dentry;
+ struct vfsmount *child, *p;
+
+ if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
+ return -EINVAL;
+
+ if (IS_MNT_SHARED(dest_mnt)) {
+ for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+ set_mnt_shared(p);
+ }
+
+ spin_lock(&vfsmount_lock);
+ if (parent_nd) {
+ detach_mnt(source_mnt, parent_nd);
+ attach_mnt(source_mnt, nd);
+ touch_namespace(current->namespace);
+ } else {
+ mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
+ commit_tree(source_mnt);
+ }
+
+ list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
+ list_del_init(&child->mnt_hash);
+ commit_tree(child);
+ }
+ spin_unlock(&vfsmount_lock);
+ return 0;
+}
+
static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
{
int err;
goto out_unlock;
err = -ENOENT;
- spin_lock(&vfsmount_lock);
- if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
- struct list_head head;
-
- attach_mnt(mnt, nd);
- list_add_tail(&head, &mnt->mnt_list);
- list_splice(&head, current->namespace->list.prev);
- err = 0;
- touch_namespace(current->namespace);
- }
- spin_unlock(&vfsmount_lock);
+ if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
+ err = attach_recursive_mnt(mnt, nd, NULL);
out_unlock:
up(&nd->dentry->d_inode->i_sem);
if (!err)
down_write(&namespace_sem);
err = -EINVAL;
+ if (IS_MNT_UNBINDABLE(old_nd.mnt))
+ goto out;
+
if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
goto out;
if (err) {
LIST_HEAD(umount_list);
spin_lock(&vfsmount_lock);
- umount_tree(mnt, &umount_list);
+ umount_tree(mnt, 0, &umount_list);
spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
return err;
}
+static inline int tree_contains_unbindable(struct vfsmount *mnt)
+{
+ struct vfsmount *p;
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ if (IS_MNT_UNBINDABLE(p))
+ return 1;
+ }
+ return 0;
+}
+
static int do_move_mount(struct nameidata *nd, char *old_name)
{
struct nameidata old_nd, parent_nd;
if (IS_DEADDIR(nd->dentry->d_inode))
goto out1;
- spin_lock(&vfsmount_lock);
if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
- goto out2;
+ goto out1;
err = -EINVAL;
if (old_nd.dentry != old_nd.mnt->mnt_root)
- goto out2;
+ goto out1;
if (old_nd.mnt == old_nd.mnt->mnt_parent)
- goto out2;
+ goto out1;
if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
S_ISDIR(old_nd.dentry->d_inode->i_mode))
- goto out2;
-
+ goto out1;
+ /*
+ * Don't move a mount residing in a shared parent.
+ */
+ if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
+ goto out1;
+ /*
+ * Don't move a mount tree containing unbindable mounts to a destination
+ * mount which is shared.
+ */
+ if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
+ goto out1;
err = -ELOOP;
for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
if (p == old_nd.mnt)
- goto out2;
- err = 0;
+ goto out1;
- detach_mnt(old_nd.mnt, &parent_nd);
- attach_mnt(old_nd.mnt, nd);
- touch_namespace(current->namespace);
+ if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
+ goto out1;
+ spin_lock(&vfsmount_lock);
/* if the mount is moved, it should no longer be expire
* automatically */
list_del_init(&old_nd.mnt->mnt_expire);
-out2:
spin_unlock(&vfsmount_lock);
out1:
up(&nd->dentry->d_inode->i_sem);
* Check that it is still dead: the count should now be 2 - as
* contributed by the vfsmount parent and the mntget above
*/
- if (atomic_read(&mnt->mnt_count) == 2) {
+ if (!propagate_mount_busy(mnt, 2)) {
/* delete from the namespace */
touch_namespace(mnt->mnt_namespace);
list_del_init(&mnt->mnt_list);
mnt->mnt_namespace = NULL;
- umount_tree(mnt, umounts);
+ umount_tree(mnt, 1, umounts);
spin_unlock(&vfsmount_lock);
} else {
/*
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&nd, dev_name, flags & MS_REC);
- else if (flags & MS_PRIVATE)
+ else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
down_write(&namespace_sem);
/* First pass: copy the tree topology */
new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
- CL_EXPIRE);
+ CL_COPY_ALL | CL_EXPIRE);
if (!new_ns->root) {
up_write(&namespace_sem);
kfree(new_ns);
down_write(&namespace_sem);
down(&old_nd.dentry->d_inode->i_sem);
error = -EINVAL;
+ if (IS_MNT_SHARED(old_nd.mnt) ||
+ IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
+ IS_MNT_SHARED(user_nd.mnt->mnt_parent))
+ goto out2;
if (!check_mnt(user_nd.mnt))
goto out2;
error = -ENOENT;
spin_unlock(&vfsmount_lock);
down_write(&namespace_sem);
spin_lock(&vfsmount_lock);
- umount_tree(root, &umount_list);
+ umount_tree(root, 0, &umount_list);
spin_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);