From f400e198b2ed26ce55b22a1412ded0896e7516ac Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 29 Sep 2006 02:00:07 -0700 Subject: [PATCH] [PATCH] pidspace: is_init() This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Dave Hansen Cc: Serge Hallyn Cc: Cedric Le Goater Cc: Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/fault.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm26/mm/fault.c | 2 +- arch/i386/lib/usercopy.c | 2 +- arch/i386/mm/fault.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m32r/mm/fault.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/mips/mm/fault.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/ppc/kernel/traps.c | 2 +- arch/ppc/mm/fault.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/sh/mm/fault.c | 2 +- arch/sh64/mm/fault.c | 6 +++--- arch/um/kernel/trap.c | 2 +- arch/x86_64/mm/fault.c | 4 ++-- arch/xtensa/mm/fault.c | 2 +- drivers/char/sysrq.c | 2 +- include/linux/sched.h | 10 ++++++++++ kernel/capability.c | 2 +- kernel/cpuset.c | 2 +- kernel/exit.c | 2 +- kernel/kexec.c | 2 +- kernel/ptrace.c | 1 + kernel/sysctl.c | 2 +- mm/oom_kill.c | 2 +- security/commoncap.c | 2 +- 29 files changed, 41 insertions(+), 30 deletions(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 622dabd846..8871529a34 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -193,7 +193,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* We ran out of memory, or some other thing happened to us that made us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a5b33ff392..5e658a8744 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -198,7 +198,7 @@ survive: return fault; } - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c index a7c4cc9220..a1f6d8a9cc 100644 --- a/arch/arm26/mm/fault.c +++ b/arch/arm26/mm/fault.c @@ -185,7 +185,7 @@ survive: } fault = -3; /* out of memory */ - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index efc7e7d5f4..08502fc6d0 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -739,7 +739,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && current->pid == 1) { + if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); blk_congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 50d8617391..2581575786 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -589,7 +589,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index d8b1b4ac7f..59f3ab9376 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -280,7 +280,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index dc18a33eef..8d5f551b57 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -299,7 +299,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 5e2d87c10c..911f2ce3f5 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -181,7 +181,7 @@ good_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index a4f8c45c4e..8423d85907 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -171,7 +171,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 77953f41d7..e8fa50624b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -386,7 +386,7 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 903115d67f..311ed1993f 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -337,7 +337,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) err->disposition == RTAS_DISP_NOT_RECOVERED && err->target == RTAS_TARGET_MEMORY && err->type == RTAS_TYPE_ECC_UNCORR && - !(current->pid == 0 || current->pid == 1)) { + !(current->pid == 0 || is_init(current))) { /* Kill off a user process with an ECC error */ printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", current->pid); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index d7a433049b..aafc8e8893 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -119,7 +119,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) * generate the same exception over and over again and we get * nowhere. Better to kill it and let the kernel panic. */ - if (current->pid == 1) { + if (is_init(current)) { __sighandler_t handler; spin_lock_irq(¤t->sighand->siglock); diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c index bc776beb31..465f451f3b 100644 --- a/arch/ppc/mm/fault.c +++ b/arch/ppc/mm/fault.c @@ -291,7 +291,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index f2b9a84dc2..9c3c19fe62 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -353,7 +353,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 507f289147..68663b8f99 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -149,7 +149,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c index f08d0eaf64..8e2f6c28b7 100644 --- a/arch/sh64/mm/fault.c +++ b/arch/sh64/mm/fault.c @@ -277,7 +277,7 @@ bad_area: show_regs(regs); #endif } - if (tsk->pid == 1) { + if (is_init(tsk)) { panic("INIT had user mode bad_area\n"); } tsk->thread.address = address; @@ -319,14 +319,14 @@ no_context: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { panic("INIT out of memory\n"); yield(); goto survive; } printk("fault:Out of memory\n"); up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 61a23fff43..c7b195c7e5 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -120,7 +120,7 @@ out_nosemaphore: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 9ba54cc2b5..3751b4788e 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) int unhandled_signal(struct task_struct *tsk, int sig) { - if (tsk->pid == 1) + if (is_init(tsk)) return 1; if (tsk->ptrace & PT_PTRACED) return 0; @@ -580,7 +580,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); goto again; } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index a945a33e85..dd0dbec2e5 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -144,7 +144,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index ee3ca8f176..0ad6cb081d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -208,7 +208,7 @@ static void send_sig_all(int sig) struct task_struct *p; for_each_process(p) { - if (p->mm && p->pid != 1) + if (p->mm && !is_init(p)) /* Not swapper, init nor kernel thread */ force_sig(sig, p); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 3696f2f712..ed2af86715 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1033,6 +1033,16 @@ static inline int pid_alive(struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } +/** + * is_init - check if a task structure is the first user space + * task the kernel created. + * @p: Task structure to be checked. + */ +static inline int is_init(struct task_struct *tsk) +{ + return tsk->pid == 1; +} + extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) diff --git a/kernel/capability.c b/kernel/capability.c index c7685ad00a..edb845a6e8 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective, int found = 0; do_each_thread(g, target) { - if (target == current || target->pid == 1) + if (target == current || is_init(target)) continue; found = 1; if (security_capset_check(target, effective, inheritable, diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1b32c2c04c..584bb4e6c0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -240,7 +240,7 @@ static struct super_block *cpuset_sb; * A cpuset can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cpusets is empty. Since all * tasks in the system use _some_ cpuset, and since there is always at - * least one task in the system (init, pid == 1), therefore, top_cpuset + * least one task in the system (init), therefore, top_cpuset * always has either children cpusets and/or using tasks. So we don't * need a special hack to ensure that top_cpuset cannot be deleted. * diff --git a/kernel/exit.c b/kernel/exit.c index 4b6fb054b2..9961192d60 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -219,7 +219,7 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p == ignored_task || p->exit_state - || p->real_parent->pid == 1) + || is_init(p->real_parent)) continue; if (process_group(p->real_parent) != pgrp && p->real_parent->signal->session == p->signal->session) { diff --git a/kernel/kexec.c b/kernel/kexec.c index 50087ecf33..37cad75cf4 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -40,7 +40,7 @@ struct resource crashk_res = { int kexec_should_crash(struct task_struct *p) { - if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) + if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) return 1; return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8aad0331d8..4d50e06fd7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -440,6 +440,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) child = find_task_by_pid(pid); if (child) get_task_struct(child); + read_unlock(&tasklist_lock); if (!child) return ERR_PTR(-ESRCH); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bfa7d117c..9535a38399 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1915,7 +1915,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, return -EPERM; } - op = (current->pid == 1) ? OP_SET : OP_AND; + op = is_init(current) ? OP_SET : OP_AND; return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_bset_conv,&op); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index bada3d0311..f3dd79c1c3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -255,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) */ static void __oom_kill_task(struct task_struct *p, const char *message) { - if (p->pid == 1) { + if (is_init(p)) { WARN_ON(1); printk(KERN_WARNING "tried to kill init!\n"); return; diff --git a/security/commoncap.c b/security/commoncap.c index f50fc298cf..5a5ef5ca7e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -169,7 +169,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ - if (current->pid != 1) { + if (!is_init(current)) { current->cap_permitted = new_permitted; current->cap_effective = cap_intersect (new_permitted, bprm->cap_effective); -- 2.39.5