]> err.no Git - linux-2.6/blobdiff - fs/exec.c
exec: simplify ->sighand switching
[linux-2.6] / fs / exec.c
index 498f2b3dca20b03a4640465aabe3ef5f174e78be..7f325df5e0140a05444e74114aec91325ee12355 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -29,6 +29,7 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
+#include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -50,7 +51,6 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
-#include <linux/signalfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -67,49 +67,26 @@ int suid_dumpable = 0;
 EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
-static struct linux_binfmt *formats;
+static LIST_HEAD(formats);
 static DEFINE_RWLOCK(binfmt_lock);
 
 int register_binfmt(struct linux_binfmt * fmt)
 {
-       struct linux_binfmt ** tmp = &formats;
-
        if (!fmt)
                return -EINVAL;
-       if (fmt->next)
-               return -EBUSY;
        write_lock(&binfmt_lock);
-       while (*tmp) {
-               if (fmt == *tmp) {
-                       write_unlock(&binfmt_lock);
-                       return -EBUSY;
-               }
-               tmp = &(*tmp)->next;
-       }
-       fmt->next = formats;
-       formats = fmt;
+       list_add(&fmt->lh, &formats);
        write_unlock(&binfmt_lock);
        return 0;       
 }
 
 EXPORT_SYMBOL(register_binfmt);
 
-int unregister_binfmt(struct linux_binfmt * fmt)
+void unregister_binfmt(struct linux_binfmt * fmt)
 {
-       struct linux_binfmt ** tmp = &formats;
-
        write_lock(&binfmt_lock);
-       while (*tmp) {
-               if (fmt == *tmp) {
-                       *tmp = fmt->next;
-                       fmt->next = NULL;
-                       write_unlock(&binfmt_lock);
-                       return 0;
-               }
-               tmp = &(*tmp)->next;
-       }
+       list_del(&fmt->lh);
        write_unlock(&binfmt_lock);
-       return -EINVAL;
 }
 
 EXPORT_SYMBOL(unregister_binfmt);
@@ -135,9 +112,6 @@ asmlinkage long sys_uselib(const char __user * library)
        if (error)
                goto out;
 
-       error = -EACCES;
-       if (nd.mnt->mnt_flags & MNT_NOEXEC)
-               goto exit;
        error = -EINVAL;
        if (!S_ISREG(nd.dentry->d_inode->i_mode))
                goto exit;
@@ -156,7 +130,7 @@ asmlinkage long sys_uselib(const char __user * library)
                struct linux_binfmt * fmt;
 
                read_lock(&binfmt_lock);
-               for (fmt = formats ; fmt ; fmt = fmt->next) {
+               list_for_each_entry(fmt, &formats, lh) {
                        if (!fmt->load_shlib)
                                continue;
                        if (!try_module_get(fmt->module))
@@ -681,8 +655,7 @@ struct file *open_exec(const char *name)
        if (!err) {
                struct inode *inode = nd.dentry->d_inode;
                file = ERR_PTR(-EACCES);
-               if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
-                   S_ISREG(inode->i_mode)) {
+               if (S_ISREG(inode->i_mode)) {
                        int err = vfs_permission(&nd, MAY_EXEC);
                        file = ERR_PTR(err);
                        if (!err) {
@@ -779,19 +752,11 @@ static int de_thread(struct task_struct *tsk)
        struct task_struct *leader = NULL;
        int count;
 
-       /*
-        * Tell all the sighand listeners that this sighand has
-        * been detached. The signalfd_detach() function grabs the
-        * sighand lock, if signal listeners are present on the sighand.
-        */
-       signalfd_detach(tsk);
-
        /*
         * If we don't share sighandlers, then we aren't sharing anything
         * and we can just re-use it all.
         */
        if (atomic_read(&oldsighand->count) <= 1) {
-               BUG_ON(atomic_read(&sig->count) != 1);
                exit_itimers(sig);
                return 0;
        }
@@ -934,8 +899,6 @@ no_thread_group:
        if (leader)
                release_task(leader);
 
-       BUG_ON(atomic_read(&sig->count) != 1);
-
        if (atomic_read(&oldsighand->count) == 1) {
                /*
                 * Now that we nuked the rest of the thread group,
@@ -953,12 +916,7 @@ no_thread_group:
 
                write_lock_irq(&tasklist_lock);
                spin_lock(&oldsighand->siglock);
-               spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
-
                rcu_assign_pointer(tsk->sighand, newsighand);
-               recalc_sigpending();
-
-               spin_unlock(&newsighand->siglock);
                spin_unlock(&oldsighand->siglock);
                write_unlock_irq(&tasklist_lock);
 
@@ -968,12 +926,11 @@ no_thread_group:
        BUG_ON(!thread_group_leader(tsk));
        return 0;
 }
-       
+
 /*
  * These functions flushes out all traces of the currently running executable
  * so that a new one can be started
  */
-
 static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
@@ -1058,9 +1015,9 @@ int flush_old_exec(struct linux_binprm * bprm)
        current->sas_ss_sp = current->sas_ss_size = 0;
 
        if (current->euid == current->uid && current->egid == current->gid)
-               current->mm->dumpable = 1;
+               set_dumpable(current->mm, 1);
        else
-               current->mm->dumpable = suid_dumpable;
+               set_dumpable(current->mm, suid_dumpable);
 
        name = bprm->filename;
 
@@ -1084,11 +1041,14 @@ int flush_old_exec(struct linux_binprm * bprm)
         */
        current->mm->task_size = TASK_SIZE;
 
-       if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
-           file_permission(bprm->file, MAY_READ) ||
-           (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+       if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
                suid_keys(current);
-               current->mm->dumpable = suid_dumpable;
+               set_dumpable(current->mm, suid_dumpable);
+               current->pdeath_signal = 0;
+       } else if (file_permission(bprm->file, MAY_READ) ||
+                       (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+               suid_keys(current);
+               set_dumpable(current->mm, suid_dumpable);
        }
 
        /* An exec changes our domain. We are no longer part of the thread
@@ -1177,8 +1137,10 @@ void compute_creds(struct linux_binprm *bprm)
 {
        int unsafe;
 
-       if (bprm->e_uid != current->uid)
+       if (bprm->e_uid != current->uid) {
                suid_keys(current);
+               current->pdeath_signal = 0;
+       }
        exec_keys(current);
 
        task_lock(current);
@@ -1290,7 +1252,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
        retval = -ENOENT;
        for (try=0; try<2; try++) {
                read_lock(&binfmt_lock);
-               for (fmt = formats ; fmt ; fmt = fmt->next) {
+               list_for_each_entry(fmt, &formats, lh) {
                        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
                        if (!fn)
                                continue;
@@ -1543,6 +1505,14 @@ static int format_corename(char *corename, const char *pattern, long signr)
                                        goto out;
                                out_ptr += rc;
                                break;
+                       /* core limit size */
+                       case 'c':
+                               rc = snprintf(out_ptr, out_end - out_ptr,
+                                             "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
+                               if (rc > out_end - out_ptr)
+                                       goto out;
+                               out_ptr += rc;
+                               break;
                        default:
                                break;
                        }
@@ -1665,6 +1635,56 @@ fail:
        return core_waiters;
 }
 
+/*
+ * set_dumpable converts traditional three-value dumpable to two flags and
+ * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
+ * these bits are not changed atomically.  So get_dumpable can observe the
+ * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
+ * return either old dumpable or new one by paying attention to the order of
+ * modifying the bits.
+ *
+ * dumpable |   mm->flags (binary)
+ * old  new | initial interim  final
+ * ---------+-----------------------
+ *  0    1  |   00      01      01
+ *  0    2  |   00      10(*)   11
+ *  1    0  |   01      00      00
+ *  1    2  |   01      11      11
+ *  2    0  |   11      10(*)   00
+ *  2    1  |   11      11      01
+ *
+ * (*) get_dumpable regards interim value of 10 as 11.
+ */
+void set_dumpable(struct mm_struct *mm, int value)
+{
+       switch (value) {
+       case 0:
+               clear_bit(MMF_DUMPABLE, &mm->flags);
+               smp_wmb();
+               clear_bit(MMF_DUMP_SECURELY, &mm->flags);
+               break;
+       case 1:
+               set_bit(MMF_DUMPABLE, &mm->flags);
+               smp_wmb();
+               clear_bit(MMF_DUMP_SECURELY, &mm->flags);
+               break;
+       case 2:
+               set_bit(MMF_DUMP_SECURELY, &mm->flags);
+               smp_wmb();
+               set_bit(MMF_DUMPABLE, &mm->flags);
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(set_dumpable);
+
+int get_dumpable(struct mm_struct *mm)
+{
+       int ret;
+
+       ret = mm->flags & 0x3;
+       return (ret >= 2) ? 2 : ret;
+}
+
 int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 {
        char corename[CORENAME_MAX_SIZE + 1];
@@ -1676,6 +1696,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        int fsuid = current->fsuid;
        int flag = 0;
        int ispipe = 0;
+       unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+       char **helper_argv = NULL;
+       int helper_argc = 0;
+       char *delimit;
 
        audit_core_dumps(signr);
 
@@ -1683,7 +1707,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!binfmt || !binfmt->core_dump)
                goto fail;
        down_write(&mm->mmap_sem);
-       if (!mm->dumpable) {
+       if (!get_dumpable(mm)) {
                up_write(&mm->mmap_sem);
                goto fail;
        }
@@ -1693,11 +1717,11 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         *      process nor do we know its entire history. We only know it
         *      was tainted so we dump it as root in mode 2.
         */
-       if (mm->dumpable == 2) {        /* Setuid core dump mode */
+       if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */
                flag = O_EXCL;          /* Stop rewrite attacks */
                current->fsuid = 0;     /* Dump root private */
        }
-       mm->dumpable = 0;
+       set_dumpable(mm, 0);
 
        retval = coredump_wait(exit_code);
        if (retval < 0)
@@ -1709,9 +1733,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         */
        clear_thread_flag(TIF_SIGPENDING);
 
-       if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
-               goto fail_unlock;
-
        /*
         * lock_kernel() because format_corename() is controlled by sysctl, which
         * uses lock_kernel()
@@ -1719,9 +1740,39 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        lock_kernel();
        ispipe = format_corename(corename, core_pattern, signr);
        unlock_kernel();
+       /*
+        * Don't bother to check the RLIMIT_CORE value if core_pattern points
+        * to a pipe.  Since we're not writing directly to the filesystem
+        * RLIMIT_CORE doesn't really apply, as no actual core file will be
+        * created unless the pipe reader choses to write out the core file
+        * at which point file size limits and permissions will be imposed
+        * as it does with any other process
+        */
+       if ((!ispipe) && (core_limit < binfmt->min_coredump))
+               goto fail_unlock;
+
        if (ispipe) {
+               helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+               /* Terminate the string before the first option */
+               delimit = strchr(corename, ' ');
+               if (delimit)
+                       *delimit = '\0';
+               delimit = strrchr(helper_argv[0], '/');
+               if (delimit)
+                       delimit++;
+               else
+                       delimit = helper_argv[0];
+               if (!strcmp(delimit, current->comm)) {
+                       printk(KERN_NOTICE "Recursive core dump detected, "
+                                       "aborting\n");
+                       goto fail_unlock;
+               }
+
+               core_limit = RLIM_INFINITY;
+
                /* SIGPIPE can happen, but it's just never processed */
-               if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+                               &file)) {
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
                               corename);
                        goto fail_unlock;
@@ -1749,13 +1800,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                goto close_fail;
 
-       retval = binfmt->core_dump(signr, regs, file);
+       retval = binfmt->core_dump(signr, regs, file, core_limit);
 
        if (retval)
                current->signal->group_exit_code |= 0x80;
 close_fail:
        filp_close(file, NULL);
 fail_unlock:
+       if (helper_argv)
+               argv_free(helper_argv);
+
        current->fsuid = fsuid;
        complete_all(&mm->core_done);
 fail: