[MIPS] Early check for SMTC kernel on non-MT processor

[linux-2.6] / fs / exec.c
diff --git a/fs/exec.c b/fs/exec.c

index 8c5196087f319c3d3e809c08291856b1f38cfe1c..f20561ff4528f21ad7a36d35d56513346c5f740b 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -22,7 +22,6 @@
   * formats. 
   */
  
-#include <linux/config.h>
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/mman.h>
@@ -39,6 +38,7 @@
  #include <linux/binfmts.h>
  #include <linux/swap.h>
  #include <linux/utsname.h>
+#include <linux/pid_namespace.h>
  #include <linux/module.h>
  #include <linux/namei.h>
  #include <linux/proc_fs.h>
@@ -47,9 +47,10 @@
  #include <linux/security.h>
  #include <linux/syscalls.h>
  #include <linux/rmap.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
  #include <linux/cn_proc.h>
  #include <linux/audit.h>
+#include <linux/signalfd.h>
  
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
@@ -59,7 +60,7 @@
  #endif
  
  int core_uses_pid;
-char core_pattern[65] = "core";
+char core_pattern[CORENAME_MAX_SIZE] = "core";
  int suid_dumpable = 0;
  
  EXPORT_SYMBOL(suid_dumpable);
@@ -100,6 +101,7 @@ int unregister_binfmt(struct linux_binfmt * fmt)
         while (*tmp) {
                 if (fmt == *tmp) {
                         *tmp = fmt->next;
+                       fmt->next = NULL;
                         write_unlock(&binfmt_lock);
                         return 0;
                 }
@@ -132,6 +134,9 @@ asmlinkage long sys_uselib(const char __user * library)
         if (error)
                 goto out;
  
+       error = -EACCES;
+       if (nd.mnt->mnt_flags & MNT_NOEXEC)
+               goto exit;
         error = -EINVAL;
         if (!S_ISREG(nd.dentry->d_inode->i_mode))
                 goto exit;
@@ -405,12 +410,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
                 bprm->loader += stack_base;
         bprm->exec += stack_base;
  
-       mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
         if (!mpnt)
                 return -ENOMEM;
  
-       memset(mpnt, 0, sizeof(*mpnt));
-
         down_write(&mm->mmap_sem);
         {
                 mpnt->vm_mm = mm;
@@ -487,8 +490,6 @@ struct file *open_exec(const char *name)
                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
                     S_ISREG(inode->i_mode)) {
                         int err = vfs_permission(&nd, MAY_EXEC);
-                       if (!err && !(inode->i_mode & 0111))
-                               err = -EACCES;
                         file = ERR_PTR(err);
                         if (!err) {
                                 file = nameidata_to_filp(&nd, O_RDONLY);
@@ -584,6 +585,13 @@ static int de_thread(struct task_struct *tsk)
         struct task_struct *leader = NULL;
         int count;
  
+       /*
+        * Tell all the sighand listeners that this sighand has
+        * been detached. The signalfd_detach() function grabs the
+        * sighand lock, if signal listeners are present on the sighand.
+        */
+       signalfd_detach(tsk);
+
         /*
          * If we don't share sighandlers, then we aren't sharing anything
          * and we can just re-use it all.
@@ -598,7 +606,7 @@ static int de_thread(struct task_struct *tsk)
         if (!newsighand)
                 return -ENOMEM;
  
-       if (thread_group_empty(current))
+       if (thread_group_empty(tsk))
                 goto no_thread_group;
  
         /*
@@ -623,17 +631,17 @@ static int de_thread(struct task_struct *tsk)
          * Reparenting needs write_lock on tasklist_lock,
          * so it is safe to do it under read_lock.
          */
-       if (unlikely(current->group_leader == child_reaper))
-               child_reaper = current;
+       if (unlikely(tsk->group_leader == child_reaper(tsk)))
+               tsk->nsproxy->pid_ns->child_reaper = tsk;
  
-       zap_other_threads(current);
+       zap_other_threads(tsk);
         read_unlock(&tasklist_lock);
  
         /*
          * Account for the thread group leader hanging around:
          */
         count = 1;
-       if (!thread_group_leader(current)) {
+       if (!thread_group_leader(tsk)) {
                 count = 2;
                 /*
                  * The SIGALRM timer survives the exec, but needs to point
@@ -642,14 +650,14 @@ static int de_thread(struct task_struct *tsk)
                  * synchronize with any firing (by calling del_timer_sync)
                  * before we can safely let the old group leader die.
                  */
-               sig->tsk = current;
+               sig->tsk = tsk;
                 spin_unlock_irq(lock);
                 if (hrtimer_cancel(&sig->real_timer))
                         hrtimer_restart(&sig->real_timer);
                 spin_lock_irq(lock);
         }
         while (atomic_read(&sig->count) > count) {
-               sig->group_exit_task = current;
+               sig->group_exit_task = tsk;
                 sig->notify_count = count;
                 __set_current_state(TASK_UNINTERRUPTIBLE);
                 spin_unlock_irq(lock);
@@ -665,13 +673,13 @@ static int de_thread(struct task_struct *tsk)
          * do is to wait for the thread group leader to become inactive,
          * and to assume its PID:
          */
-       if (!thread_group_leader(current)) {
+       if (!thread_group_leader(tsk)) {
                 /*
                  * Wait for the thread group leader to be a zombie.
                  * It should already be zombie at this point, most
                  * of the time.
                  */
-               leader = current->group_leader;
+               leader = tsk->group_leader;
                 while (leader->exit_state != EXIT_ZOMBIE)
                         yield();
  
@@ -685,12 +693,12 @@ static int de_thread(struct task_struct *tsk)
                  * When we take on its identity by switching to its PID, we
                  * also take its birthdate (always earlier than our own).
                  */
-               current->start_time = leader->start_time;
+               tsk->start_time = leader->start_time;
  
                 write_lock_irq(&tasklist_lock);
  
-               BUG_ON(leader->tgid != current->tgid);
-               BUG_ON(current->pid == current->tgid);
+               BUG_ON(leader->tgid != tsk->tgid);
+               BUG_ON(tsk->pid == tsk->tgid);
                 /*
                  * An exec() starts a new thread group with the
                  * TGID of the previous thread group. Rehash the
@@ -699,25 +707,21 @@ static int de_thread(struct task_struct *tsk)
                  */
  
                 /* Become a process group leader with the old leader's pid.
-                * Note: The old leader also uses thispid until release_task
+                * The old leader becomes a thread of the this thread group.
+                * Note: The old leader also uses this pid until release_task
                  *       is called.  Odd but simple and correct.
                  */
-               detach_pid(current, PIDTYPE_PID);
-               current->pid = leader->pid;
-               attach_pid(current, PIDTYPE_PID,  current->pid);
-               attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
-               attach_pid(current, PIDTYPE_SID,  current->signal->session);
-               list_add_tail_rcu(&current->tasks, &init_task.tasks);
+               detach_pid(tsk, PIDTYPE_PID);
+               tsk->pid = leader->pid;
+               attach_pid(tsk, PIDTYPE_PID,  find_pid(tsk->pid));
+               transfer_pid(leader, tsk, PIDTYPE_PGID);
+               transfer_pid(leader, tsk, PIDTYPE_SID);
+               list_replace_rcu(&leader->tasks, &tsk->tasks);
  
-               current->group_leader = current;
-               leader->group_leader = current;
+               tsk->group_leader = tsk;
+               leader->group_leader = tsk;
  
-               /* Reduce leader to a thread */
-               detach_pid(leader, PIDTYPE_PGID);
-               detach_pid(leader, PIDTYPE_SID);
-               list_del_init(&leader->tasks);
-
-               current->exit_signal = SIGCHLD;
+               tsk->exit_signal = SIGCHLD;
  
                 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
                 leader->exit_state = EXIT_DEAD;
@@ -755,20 +759,19 @@ no_thread_group:
  
                 write_lock_irq(&tasklist_lock);
                 spin_lock(&oldsighand->siglock);
-               spin_lock(&newsighand->siglock);
+               spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
  
-               rcu_assign_pointer(current->sighand, newsighand);
+               rcu_assign_pointer(tsk->sighand, newsighand);
                 recalc_sigpending();
  
                 spin_unlock(&newsighand->siglock);
                 spin_unlock(&oldsighand->siglock);
                 write_unlock_irq(&tasklist_lock);
  
-               if (atomic_dec_and_test(&oldsighand->count))
-                       kmem_cache_free(sighand_cachep, oldsighand);
+               __cleanup_sighand(oldsighand);
         }
  
-       BUG_ON(!thread_group_leader(current));
+       BUG_ON(!thread_group_leader(tsk));
         return 0;
  }
         
@@ -789,7 +792,7 @@ static void flush_old_files(struct files_struct * files)
                 j++;
                 i = j * __NFDBITS;
                 fdt = files_fdtable(files);
-               if (i >= fdt->max_fds || i >= fdt->max_fdset)
+               if (i >= fdt->max_fds)
                         break;
                 set = fdt->close_on_exec->fds_bits[j];
                 if (!set)
@@ -905,8 +908,7 @@ int flush_old_exec(struct linux_binprm * bprm)
         return 0;
  
  mmap_failed:
-       put_files_struct(current->files);
-       current->files = files;
+       reset_files_struct(current, files);
  out:
         return retval;
  }
@@ -920,23 +922,17 @@ EXPORT_SYMBOL(flush_old_exec);
  int prepare_binprm(struct linux_binprm *bprm)
  {
         int mode;
-       struct inode * inode = bprm->file->f_dentry->d_inode;
+       struct inode * inode = bprm->file->f_path.dentry->d_inode;
         int retval;
  
         mode = inode->i_mode;
-       /*
-        * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-        * generic_permission lets a non-executable through
-        */
-       if (!(mode & 0111))     /* with at least _one_ execute bit set */
-               return -EACCES;
         if (bprm->file->f_op == NULL)
                 return -EACCES;
  
         bprm->e_uid = current->euid;
         bprm->e_gid = current->egid;
  
-       if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+       if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
                 /* Set-uid? */
                 if (mode & S_ISUID) {
                         current->personality &= ~PER_CLEAR_ON_SETID;
@@ -997,33 +993,51 @@ void compute_creds(struct linux_binprm *bprm)
         task_unlock(current);
         security_bprm_post_apply_creds(bprm);
  }
-
  EXPORT_SYMBOL(compute_creds);
  
+/*
+ * Arguments are '\0' separated strings found at the location bprm->p
+ * points to; chop off the first by relocating brpm->p to right after
+ * the first '\0' encountered.
+ */
  void remove_arg_zero(struct linux_binprm *bprm)
  {
         if (bprm->argc) {
-               unsigned long offset;
-               char * kaddr;
-               struct page *page;
+               char ch;
  
-               offset = bprm->p % PAGE_SIZE;
-               goto inside;
+               do {
+                       unsigned long offset;
+                       unsigned long index;
+                       char *kaddr;
+                       struct page *page;
  
-               while (bprm->p++, *(kaddr+offset++)) {
-                       if (offset != PAGE_SIZE)
-                               continue;
-                       offset = 0;
-                       kunmap_atomic(kaddr, KM_USER0);
-inside:
-                       page = bprm->page[bprm->p/PAGE_SIZE];
+                       offset = bprm->p & ~PAGE_MASK;
+                       index = bprm->p >> PAGE_SHIFT;
+
+                       page = bprm->page[index];
                         kaddr = kmap_atomic(page, KM_USER0);
-               }
-               kunmap_atomic(kaddr, KM_USER0);
+
+                       /* run through page until we reach end or find NUL */
+                       do {
+                               ch = *(kaddr + offset);
+
+                               /* discard that character... */
+                               bprm->p++;
+                               offset++;
+                       } while (offset < PAGE_SIZE && ch != '\0');
+
+                       kunmap_atomic(kaddr, KM_USER0);
+
+                       /* free the old page */
+                       if (offset == PAGE_SIZE) {
+                               __free_page(page);
+                               bprm->page[index] = NULL;
+                       }
+               } while (ch != '\0');
+
                 bprm->argc--;
         }
  }
-
  EXPORT_SYMBOL(remove_arg_zero);
  
  /*
@@ -1253,19 +1267,21 @@ int set_binfmt(struct linux_binfmt *new)
  
  EXPORT_SYMBOL(set_binfmt);
  
-#define CORENAME_MAX_SIZE 64
-
  /* format_corename will inspect the pattern parameter, and output a
   * name into corename, which must have space for at least
   * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
   */
-static void format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, const char *pattern, long signr)
  {
         const char *pat_ptr = pattern;
         char *out_ptr = corename;
         char *const out_end = corename + CORENAME_MAX_SIZE;
         int rc;
         int pid_in_pattern = 0;
+       int ispipe = 0;
+
+       if (*pattern == '|')
+               ispipe = 1;
  
         /* Repeat as long as we have more pattern to process and more output
            space */
@@ -1332,7 +1348,7 @@ static void format_corename(char *corename, const char *pattern, long signr)
                         case 'h':
                                 down_read(&uts_sem);
                                 rc = snprintf(out_ptr, out_end - out_ptr,
-                                             "%s", system_utsname.nodename);
+                                             "%s", utsname()->nodename);
                                 up_read(&uts_sem);
                                 if (rc > out_end - out_ptr)
                                         goto out;
@@ -1356,8 +1372,8 @@ static void format_corename(char *corename, const char *pattern, long signr)
          *
          * If core_pattern does not include a %p (as is the default)
          * and core_uses_pid is set, then .%pid will be appended to
-        * the filename */
-       if (!pid_in_pattern
+        * the filename. Do not do this for piped commands. */
+       if (!ispipe && !pid_in_pattern
              && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
                 rc = snprintf(out_ptr, out_end - out_ptr,
                               ".%d", current->tgid);
@@ -1365,71 +1381,107 @@ static void format_corename(char *corename, const char *pattern, long signr)
                         goto out;
                 out_ptr += rc;
         }
-      out:
+out:
         *out_ptr = 0;
+       return ispipe;
  }
  
-static void zap_threads (struct mm_struct *mm)
+static void zap_process(struct task_struct *start)
  {
-       struct task_struct *g, *p;
-       struct task_struct *tsk = current;
-       struct completion *vfork_done = tsk->vfork_done;
-       int traced = 0;
+       struct task_struct *t;
  
-       /*
-        * Make sure nobody is waiting for us to release the VM,
-        * otherwise we can deadlock when we wait on each other
-        */
-       if (vfork_done) {
-               tsk->vfork_done = NULL;
-               complete(vfork_done);
-       }
+       start->signal->flags = SIGNAL_GROUP_EXIT;
+       start->signal->group_stop_count = 0;
  
-       read_lock(&tasklist_lock);
-       do_each_thread(g,p)
-               if (mm == p->mm && p != tsk) {
-                       force_sig_specific(SIGKILL, p);
-                       mm->core_waiters++;
-                       if (unlikely(p->ptrace) &&
-                           unlikely(p->parent->mm == mm))
-                               traced = 1;
+       t = start;
+       do {
+               if (t != current && t->mm) {
+                       t->mm->core_waiters++;
+                       sigaddset(&t->pending.signal, SIGKILL);
+                       signal_wake_up(t, 1);
                 }
-       while_each_thread(g,p);
+       } while ((t = next_thread(t)) != start);
+}
  
-       read_unlock(&tasklist_lock);
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+                               int exit_code)
+{
+       struct task_struct *g, *p;
+       unsigned long flags;
+       int err = -EAGAIN;
+
+       spin_lock_irq(&tsk->sighand->siglock);
+       if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+               tsk->signal->group_exit_code = exit_code;
+               zap_process(tsk);
+               err = 0;
+       }
+       spin_unlock_irq(&tsk->sighand->siglock);
+       if (err)
+               return err;
  
-       if (unlikely(traced)) {
-               /*
-                * We are zapping a thread and the thread it ptraces.
-                * If the tracee went into a ptrace stop for exit tracing,
-                * we could deadlock since the tracer is waiting for this
-                * coredump to finish.  Detach them so they can both die.
-                */
-               write_lock_irq(&tasklist_lock);
-               do_each_thread(g,p) {
-                       if (mm == p->mm && p != tsk &&
-                           p->ptrace && p->parent->mm == mm) {
-                               __ptrace_detach(p, 0);
+       if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+               goto done;
+
+       rcu_read_lock();
+       for_each_process(g) {
+               if (g == tsk->group_leader)
+                       continue;
+
+               p = g;
+               do {
+                       if (p->mm) {
+                               if (p->mm == mm) {
+                                       /*
+                                        * p->sighand can't disappear, but
+                                        * may be changed by de_thread()
+                                        */
+                                       lock_task_sighand(p, &flags);
+                                       zap_process(p);
+                                       unlock_task_sighand(p, &flags);
+                               }
+                               break;
                         }
-               } while_each_thread(g,p);
-               write_unlock_irq(&tasklist_lock);
+               } while ((p = next_thread(p)) != g);
         }
+       rcu_read_unlock();
+done:
+       return mm->core_waiters;
  }
  
-static void coredump_wait(struct mm_struct *mm)
+static int coredump_wait(int exit_code)
  {
-       DECLARE_COMPLETION(startup_done);
+       struct task_struct *tsk = current;
+       struct mm_struct *mm = tsk->mm;
+       struct completion startup_done;
+       struct completion *vfork_done;
         int core_waiters;
  
+       init_completion(&mm->core_done);
+       init_completion(&startup_done);
         mm->core_startup_done = &startup_done;
  
-       zap_threads(mm);
-       core_waiters = mm->core_waiters;
+       core_waiters = zap_threads(tsk, mm, exit_code);
         up_write(&mm->mmap_sem);
  
+       if (unlikely(core_waiters < 0))
+               goto fail;
+
+       /*
+        * Make sure nobody is waiting for us to release the VM,
+        * otherwise we can deadlock when we wait on each other
+        */
+       vfork_done = tsk->vfork_done;
+       if (vfork_done) {
+               tsk->vfork_done = NULL;
+               complete(vfork_done);
+       }
+
         if (core_waiters)
                 wait_for_completion(&startup_done);
+fail:
         BUG_ON(mm->core_waiters);
+       return core_waiters;
  }
  
  int do_coredump(long signr, int exit_code, struct pt_regs * regs)
@@ -1442,6 +1494,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         int retval = 0;
         int fsuid = current->fsuid;
         int flag = 0;
+       int ispipe = 0;
+
+       audit_core_dumps(signr);
  
         binfmt = current->binfmt;
         if (!binfmt || !binfmt->core_dump)
@@ -1463,22 +1518,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         }
         mm->dumpable = 0;
  
-       retval = -EAGAIN;
-       spin_lock_irq(&current->sighand->siglock);
-       if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
-               current->signal->flags = SIGNAL_GROUP_EXIT;
-               current->signal->group_exit_code = exit_code;
-               current->signal->group_stop_count = 0;
-               retval = 0;
-       }
-       spin_unlock_irq(&current->sighand->siglock);
-       if (retval) {
-               up_write(&mm->mmap_sem);
+       retval = coredump_wait(exit_code);
+       if (retval < 0)
                 goto fail;
-       }
-
-       init_completion(&mm->core_done);
-       coredump_wait(mm);
  
         /*
          * Clear any false indication of pending signals that might
@@ -1494,24 +1536,36 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
          * uses lock_kernel()
          */
         lock_kernel();
-       format_corename(corename, core_pattern, signr);
+       ispipe = format_corename(corename, core_pattern, signr);
         unlock_kernel();
-       file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
+       if (ispipe) {
+               /* SIGPIPE can happen, but it's just never processed */
+               if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+                       printk(KERN_INFO "Core dump to %s pipe failed\n",
+                              corename);
+                       goto fail_unlock;
+               }
+       } else
+               file = filp_open(corename,
+                                O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+                                0600);
         if (IS_ERR(file))
                 goto fail_unlock;
-       inode = file->f_dentry->d_inode;
+       inode = file->f_path.dentry->d_inode;
         if (inode->i_nlink > 1)
                 goto close_fail;        /* multiple links - don't dump */
-       if (d_unhashed(file->f_dentry))
+       if (!ispipe && d_unhashed(file->f_path.dentry))
                 goto close_fail;
  
-       if (!S_ISREG(inode->i_mode))
+       /* AK: actually i see no reason to not allow this for named pipes etc.,
+          but keep the previous behaviour for now. */
+       if (!ispipe && !S_ISREG(inode->i_mode))
                 goto close_fail;
         if (!file->f_op)
                 goto close_fail;
         if (!file->f_op->write)
                 goto close_fail;
-       if (do_truncate(file->f_dentry, 0, 0, file) != 0)
+       if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                 goto close_fail;
  
         retval = binfmt->core_dump(signr, regs, file);