* formats.
*/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/mman.h>
#include <linux/binfmts.h>
#include <linux/swap.h>
#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/rmap.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
+#include <linux/signalfd.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#endif
int core_uses_pid;
-char core_pattern[65] = "core";
+char core_pattern[CORENAME_MAX_SIZE] = "core";
int suid_dumpable = 0;
EXPORT_SYMBOL(suid_dumpable);
while (*tmp) {
if (fmt == *tmp) {
*tmp = fmt->next;
+ fmt->next = NULL;
write_unlock(&binfmt_lock);
return 0;
}
if (error)
goto out;
+ error = -EACCES;
+ if (nd.mnt->mnt_flags & MNT_NOEXEC)
+ goto exit;
error = -EINVAL;
if (!S_ISREG(nd.dentry->d_inode->i_mode))
goto exit;
bprm->loader += stack_base;
bprm->exec += stack_base;
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!mpnt)
return -ENOMEM;
- memset(mpnt, 0, sizeof(*mpnt));
-
down_write(&mm->mmap_sem);
{
mpnt->vm_mm = mm;
if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
S_ISREG(inode->i_mode)) {
int err = vfs_permission(&nd, MAY_EXEC);
- if (!err && !(inode->i_mode & 0111))
- err = -EACCES;
file = ERR_PTR(err);
if (!err) {
file = nameidata_to_filp(&nd, O_RDONLY);
struct task_struct *leader = NULL;
int count;
+ /*
+ * Tell all the sighand listeners that this sighand has
+ * been detached. The signalfd_detach() function grabs the
+ * sighand lock, if signal listeners are present on the sighand.
+ */
+ signalfd_detach(tsk);
+
/*
* If we don't share sighandlers, then we aren't sharing anything
* and we can just re-use it all.
if (!newsighand)
return -ENOMEM;
- if (thread_group_empty(current))
+ if (thread_group_empty(tsk))
goto no_thread_group;
/*
* Reparenting needs write_lock on tasklist_lock,
* so it is safe to do it under read_lock.
*/
- if (unlikely(current->group_leader == child_reaper))
- child_reaper = current;
+ if (unlikely(tsk->group_leader == child_reaper(tsk)))
+ tsk->nsproxy->pid_ns->child_reaper = tsk;
- zap_other_threads(current);
+ zap_other_threads(tsk);
read_unlock(&tasklist_lock);
/*
* Account for the thread group leader hanging around:
*/
count = 1;
- if (!thread_group_leader(current)) {
+ if (!thread_group_leader(tsk)) {
count = 2;
/*
* The SIGALRM timer survives the exec, but needs to point
* synchronize with any firing (by calling del_timer_sync)
* before we can safely let the old group leader die.
*/
- sig->tsk = current;
+ sig->tsk = tsk;
spin_unlock_irq(lock);
if (hrtimer_cancel(&sig->real_timer))
hrtimer_restart(&sig->real_timer);
spin_lock_irq(lock);
}
while (atomic_read(&sig->count) > count) {
- sig->group_exit_task = current;
+ sig->group_exit_task = tsk;
sig->notify_count = count;
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(lock);
* do is to wait for the thread group leader to become inactive,
* and to assume its PID:
*/
- if (!thread_group_leader(current)) {
+ if (!thread_group_leader(tsk)) {
/*
* Wait for the thread group leader to be a zombie.
* It should already be zombie at this point, most
* of the time.
*/
- leader = current->group_leader;
+ leader = tsk->group_leader;
while (leader->exit_state != EXIT_ZOMBIE)
yield();
* When we take on its identity by switching to its PID, we
* also take its birthdate (always earlier than our own).
*/
- current->start_time = leader->start_time;
+ tsk->start_time = leader->start_time;
write_lock_irq(&tasklist_lock);
- BUG_ON(leader->tgid != current->tgid);
- BUG_ON(current->pid == current->tgid);
+ BUG_ON(leader->tgid != tsk->tgid);
+ BUG_ON(tsk->pid == tsk->tgid);
/*
* An exec() starts a new thread group with the
* TGID of the previous thread group. Rehash the
*/
/* Become a process group leader with the old leader's pid.
- * Note: The old leader also uses thispid until release_task
+ * The old leader becomes a thread of the this thread group.
+ * Note: The old leader also uses this pid until release_task
* is called. Odd but simple and correct.
*/
- detach_pid(current, PIDTYPE_PID);
- current->pid = leader->pid;
- attach_pid(current, PIDTYPE_PID, current->pid);
- attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
- attach_pid(current, PIDTYPE_SID, current->signal->session);
- list_add_tail_rcu(¤t->tasks, &init_task.tasks);
+ detach_pid(tsk, PIDTYPE_PID);
+ tsk->pid = leader->pid;
+ attach_pid(tsk, PIDTYPE_PID, find_pid(tsk->pid));
+ transfer_pid(leader, tsk, PIDTYPE_PGID);
+ transfer_pid(leader, tsk, PIDTYPE_SID);
+ list_replace_rcu(&leader->tasks, &tsk->tasks);
- current->group_leader = current;
- leader->group_leader = current;
+ tsk->group_leader = tsk;
+ leader->group_leader = tsk;
- /* Reduce leader to a thread */
- detach_pid(leader, PIDTYPE_PGID);
- detach_pid(leader, PIDTYPE_SID);
- list_del_init(&leader->tasks);
-
- current->exit_signal = SIGCHLD;
+ tsk->exit_signal = SIGCHLD;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
- spin_lock(&newsighand->siglock);
+ spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
- rcu_assign_pointer(current->sighand, newsighand);
+ rcu_assign_pointer(tsk->sighand, newsighand);
recalc_sigpending();
spin_unlock(&newsighand->siglock);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);
- if (atomic_dec_and_test(&oldsighand->count))
- kmem_cache_free(sighand_cachep, oldsighand);
+ __cleanup_sighand(oldsighand);
}
- BUG_ON(!thread_group_leader(current));
+ BUG_ON(!thread_group_leader(tsk));
return 0;
}
j++;
i = j * __NFDBITS;
fdt = files_fdtable(files);
- if (i >= fdt->max_fds || i >= fdt->max_fdset)
+ if (i >= fdt->max_fds)
break;
set = fdt->close_on_exec->fds_bits[j];
if (!set)
return 0;
mmap_failed:
- put_files_struct(current->files);
- current->files = files;
+ reset_files_struct(current, files);
out:
return retval;
}
int prepare_binprm(struct linux_binprm *bprm)
{
int mode;
- struct inode * inode = bprm->file->f_dentry->d_inode;
+ struct inode * inode = bprm->file->f_path.dentry->d_inode;
int retval;
mode = inode->i_mode;
- /*
- * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
- * generic_permission lets a non-executable through
- */
- if (!(mode & 0111)) /* with at least _one_ execute bit set */
- return -EACCES;
if (bprm->file->f_op == NULL)
return -EACCES;
bprm->e_uid = current->euid;
bprm->e_gid = current->egid;
- if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+ if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
/* Set-uid? */
if (mode & S_ISUID) {
current->personality &= ~PER_CLEAR_ON_SETID;
task_unlock(current);
security_bprm_post_apply_creds(bprm);
}
-
EXPORT_SYMBOL(compute_creds);
+/*
+ * Arguments are '\0' separated strings found at the location bprm->p
+ * points to; chop off the first by relocating brpm->p to right after
+ * the first '\0' encountered.
+ */
void remove_arg_zero(struct linux_binprm *bprm)
{
if (bprm->argc) {
- unsigned long offset;
- char * kaddr;
- struct page *page;
+ char ch;
- offset = bprm->p % PAGE_SIZE;
- goto inside;
+ do {
+ unsigned long offset;
+ unsigned long index;
+ char *kaddr;
+ struct page *page;
- while (bprm->p++, *(kaddr+offset++)) {
- if (offset != PAGE_SIZE)
- continue;
- offset = 0;
- kunmap_atomic(kaddr, KM_USER0);
-inside:
- page = bprm->page[bprm->p/PAGE_SIZE];
+ offset = bprm->p & ~PAGE_MASK;
+ index = bprm->p >> PAGE_SHIFT;
+
+ page = bprm->page[index];
kaddr = kmap_atomic(page, KM_USER0);
- }
- kunmap_atomic(kaddr, KM_USER0);
+
+ /* run through page until we reach end or find NUL */
+ do {
+ ch = *(kaddr + offset);
+
+ /* discard that character... */
+ bprm->p++;
+ offset++;
+ } while (offset < PAGE_SIZE && ch != '\0');
+
+ kunmap_atomic(kaddr, KM_USER0);
+
+ /* free the old page */
+ if (offset == PAGE_SIZE) {
+ __free_page(page);
+ bprm->page[index] = NULL;
+ }
+ } while (ch != '\0');
+
bprm->argc--;
}
}
-
EXPORT_SYMBOL(remove_arg_zero);
/*
EXPORT_SYMBOL(set_binfmt);
-#define CORENAME_MAX_SIZE 64
-
/* format_corename will inspect the pattern parameter, and output a
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static void format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, const char *pattern, long signr)
{
const char *pat_ptr = pattern;
char *out_ptr = corename;
char *const out_end = corename + CORENAME_MAX_SIZE;
int rc;
int pid_in_pattern = 0;
+ int ispipe = 0;
+
+ if (*pattern == '|')
+ ispipe = 1;
/* Repeat as long as we have more pattern to process and more output
space */
case 'h':
down_read(&uts_sem);
rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", system_utsname.nodename);
+ "%s", utsname()->nodename);
up_read(&uts_sem);
if (rc > out_end - out_ptr)
goto out;
*
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
- * the filename */
- if (!pid_in_pattern
+ * the filename. Do not do this for piped commands. */
+ if (!ispipe && !pid_in_pattern
&& (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) {
rc = snprintf(out_ptr, out_end - out_ptr,
".%d", current->tgid);
goto out;
out_ptr += rc;
}
- out:
+out:
*out_ptr = 0;
+ return ispipe;
}
-static void zap_threads (struct mm_struct *mm)
+static void zap_process(struct task_struct *start)
{
- struct task_struct *g, *p;
- struct task_struct *tsk = current;
- struct completion *vfork_done = tsk->vfork_done;
- int traced = 0;
+ struct task_struct *t;
- /*
- * Make sure nobody is waiting for us to release the VM,
- * otherwise we can deadlock when we wait on each other
- */
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
+ start->signal->flags = SIGNAL_GROUP_EXIT;
+ start->signal->group_stop_count = 0;
- read_lock(&tasklist_lock);
- do_each_thread(g,p)
- if (mm == p->mm && p != tsk) {
- force_sig_specific(SIGKILL, p);
- mm->core_waiters++;
- if (unlikely(p->ptrace) &&
- unlikely(p->parent->mm == mm))
- traced = 1;
+ t = start;
+ do {
+ if (t != current && t->mm) {
+ t->mm->core_waiters++;
+ sigaddset(&t->pending.signal, SIGKILL);
+ signal_wake_up(t, 1);
}
- while_each_thread(g,p);
+ } while ((t = next_thread(t)) != start);
+}
- read_unlock(&tasklist_lock);
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ int exit_code)
+{
+ struct task_struct *g, *p;
+ unsigned long flags;
+ int err = -EAGAIN;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+ tsk->signal->group_exit_code = exit_code;
+ zap_process(tsk);
+ err = 0;
+ }
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (err)
+ return err;
- if (unlikely(traced)) {
- /*
- * We are zapping a thread and the thread it ptraces.
- * If the tracee went into a ptrace stop for exit tracing,
- * we could deadlock since the tracer is waiting for this
- * coredump to finish. Detach them so they can both die.
- */
- write_lock_irq(&tasklist_lock);
- do_each_thread(g,p) {
- if (mm == p->mm && p != tsk &&
- p->ptrace && p->parent->mm == mm) {
- __ptrace_detach(p, 0);
+ if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+ goto done;
+
+ rcu_read_lock();
+ for_each_process(g) {
+ if (g == tsk->group_leader)
+ continue;
+
+ p = g;
+ do {
+ if (p->mm) {
+ if (p->mm == mm) {
+ /*
+ * p->sighand can't disappear, but
+ * may be changed by de_thread()
+ */
+ lock_task_sighand(p, &flags);
+ zap_process(p);
+ unlock_task_sighand(p, &flags);
+ }
+ break;
}
- } while_each_thread(g,p);
- write_unlock_irq(&tasklist_lock);
+ } while ((p = next_thread(p)) != g);
}
+ rcu_read_unlock();
+done:
+ return mm->core_waiters;
}
-static void coredump_wait(struct mm_struct *mm)
+static int coredump_wait(int exit_code)
{
- DECLARE_COMPLETION(startup_done);
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ struct completion startup_done;
+ struct completion *vfork_done;
int core_waiters;
+ init_completion(&mm->core_done);
+ init_completion(&startup_done);
mm->core_startup_done = &startup_done;
- zap_threads(mm);
- core_waiters = mm->core_waiters;
+ core_waiters = zap_threads(tsk, mm, exit_code);
up_write(&mm->mmap_sem);
+ if (unlikely(core_waiters < 0))
+ goto fail;
+
+ /*
+ * Make sure nobody is waiting for us to release the VM,
+ * otherwise we can deadlock when we wait on each other
+ */
+ vfork_done = tsk->vfork_done;
+ if (vfork_done) {
+ tsk->vfork_done = NULL;
+ complete(vfork_done);
+ }
+
if (core_waiters)
wait_for_completion(&startup_done);
+fail:
BUG_ON(mm->core_waiters);
+ return core_waiters;
}
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
int retval = 0;
int fsuid = current->fsuid;
int flag = 0;
+ int ispipe = 0;
+
+ audit_core_dumps(signr);
binfmt = current->binfmt;
if (!binfmt || !binfmt->core_dump)
}
mm->dumpable = 0;
- retval = -EAGAIN;
- spin_lock_irq(¤t->sighand->siglock);
- if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
- current->signal->flags = SIGNAL_GROUP_EXIT;
- current->signal->group_exit_code = exit_code;
- current->signal->group_stop_count = 0;
- retval = 0;
- }
- spin_unlock_irq(¤t->sighand->siglock);
- if (retval) {
- up_write(&mm->mmap_sem);
+ retval = coredump_wait(exit_code);
+ if (retval < 0)
goto fail;
- }
-
- init_completion(&mm->core_done);
- coredump_wait(mm);
/*
* Clear any false indication of pending signals that might
* uses lock_kernel()
*/
lock_kernel();
- format_corename(corename, core_pattern, signr);
+ ispipe = format_corename(corename, core_pattern, signr);
unlock_kernel();
- file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
+ if (ispipe) {
+ /* SIGPIPE can happen, but it's just never processed */
+ if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
+ corename);
+ goto fail_unlock;
+ }
+ } else
+ file = filp_open(corename,
+ O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ 0600);
if (IS_ERR(file))
goto fail_unlock;
- inode = file->f_dentry->d_inode;
+ inode = file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
- if (d_unhashed(file->f_dentry))
+ if (!ispipe && d_unhashed(file->f_path.dentry))
goto close_fail;
- if (!S_ISREG(inode->i_mode))
+ /* AK: actually i see no reason to not allow this for named pipes etc.,
+ but keep the previous behaviour for now. */
+ if (!ispipe && !S_ISREG(inode->i_mode))
goto close_fail;
if (!file->f_op)
goto close_fail;
if (!file->f_op->write)
goto close_fail;
- if (do_truncate(file->f_dentry, 0, 0, file) != 0)
+ if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
goto close_fail;
retval = binfmt->core_dump(signr, regs, file);