X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fsys.c;h=2d5179c67cec727f40476e2f9e75fbb9eeeb8d8c;hb=913b83944b9e13c60ca4ef95cf262547ff6bf93b;hp=c93d37f71aef98b23a9f43f6ac7d1f37b9ff20fd;hpb=4fa639123d9e6e8dfaa3d116368f4b2478da31af;p=linux-2.6

diff --git a/kernel/sys.c b/kernel/sys.c
index c93d37f71a..2d5179c67c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,7 +13,6 @@
 #include <linux/notifier.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
-#include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -57,6 +56,12 @@
 #ifndef GET_FPEXC_CTL
 # define GET_FPEXC_CTL(a,b)	(-EINVAL)
 #endif
+#ifndef GET_ENDIAN
+# define GET_ENDIAN(a,b)	(-EINVAL)
+#endif
+#ifndef SET_ENDIAN
+# define SET_ENDIAN(a,b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -132,14 +137,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 		unsigned long val, void *v)
 {
 	int ret = NOTIFY_DONE;
-	struct notifier_block *nb;
+	struct notifier_block *nb, *next_nb;
 
 	nb = rcu_dereference(*nl);
 	while (nb) {
+		next_nb = rcu_dereference(nb->next);
 		ret = nb->notifier_call(nb, val, v);
 		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
 			break;
-		nb = rcu_dereference(nb->next);
+		nb = next_nb;
 	}
 	return ret;
 }
@@ -583,7 +589,7 @@ void emergency_restart(void)
 }
 EXPORT_SYMBOL_GPL(emergency_restart);
 
-void kernel_restart_prepare(char *cmd)
+static void kernel_restart_prepare(char *cmd)
 {
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
@@ -617,7 +623,7 @@ EXPORT_SYMBOL_GPL(kernel_restart);
  *	Move into place and start executing a preloaded standalone
  *	executable.  If nothing was preloaded return an error.
  */
-void kernel_kexec(void)
+static void kernel_kexec(void)
 {
 #ifdef CONFIG_KEXEC
 	struct kimage *image;
@@ -631,7 +637,6 @@ void kernel_kexec(void)
 	machine_kexec(image);
 #endif
 }
-EXPORT_SYMBOL_GPL(kernel_kexec);
 
 void kernel_shutdown_prepare(enum system_states state)
 {
@@ -1202,69 +1207,24 @@ asmlinkage long sys_times(struct tms __user * tbuf)
 	 */
 	if (tbuf) {
 		struct tms tmp;
+		struct task_struct *tsk = current;
+		struct task_struct *t;
 		cputime_t utime, stime, cutime, cstime;
 
-#ifdef CONFIG_SMP
-		if (thread_group_empty(current)) {
-			/*
-			 * Single thread case without the use of any locks.
-			 *
-			 * We may race with release_task if two threads are
-			 * executing. However, release task first adds up the
-			 * counters (__exit_signal) before  removing the task
-			 * from the process tasklist (__unhash_process).
-			 * __exit_signal also acquires and releases the
-			 * siglock which results in the proper memory ordering
-			 * so that the list modifications are always visible
-			 * after the counters have been updated.
-			 *
-			 * If the counters have been updated by the second thread
-			 * but the thread has not yet been removed from the list
-			 * then the other branch will be executing which will
-			 * block on tasklist_lock until the exit handling of the
-			 * other task is finished.
-			 *
-			 * This also implies that the sighand->siglock cannot
-			 * be held by another processor. So we can also
-			 * skip acquiring that lock.
-			 */
-			utime = cputime_add(current->signal->utime, current->utime);
-			stime = cputime_add(current->signal->utime, current->stime);
-			cutime = current->signal->cutime;
-			cstime = current->signal->cstime;
-		} else
-#endif
-		{
+		spin_lock_irq(&tsk->sighand->siglock);
+		utime = tsk->signal->utime;
+		stime = tsk->signal->stime;
+		t = tsk;
+		do {
+			utime = cputime_add(utime, t->utime);
+			stime = cputime_add(stime, t->stime);
+			t = next_thread(t);
+		} while (t != tsk);
 
-			/* Process with multiple threads */
-			struct task_struct *tsk = current;
-			struct task_struct *t;
+		cutime = tsk->signal->cutime;
+		cstime = tsk->signal->cstime;
+		spin_unlock_irq(&tsk->sighand->siglock);
 
-			read_lock(&tasklist_lock);
-			utime = tsk->signal->utime;
-			stime = tsk->signal->stime;
-			t = tsk;
-			do {
-				utime = cputime_add(utime, t->utime);
-				stime = cputime_add(stime, t->stime);
-				t = next_thread(t);
-			} while (t != tsk);
-
-			/*
-			 * While we have tasklist_lock read-locked, no dying thread
-			 * can be updating current->signal->[us]time.  Instead,
-			 * we got their counts included in the live thread loop.
-			 * However, another thread can come in right now and
-			 * do a wait call that updates current->signal->c[us]time.
-			 * To make sure we always see that pair updated atomically,
-			 * we take the siglock around fetching them.
-			 */
-			spin_lock_irq(&tsk->sighand->siglock);
-			cutime = tsk->signal->cutime;
-			cstime = tsk->signal->cstime;
-			spin_unlock_irq(&tsk->sighand->siglock);
-			read_unlock(&tasklist_lock);
-		}
 		tmp.tms_utime = cputime_to_clock_t(utime);
 		tmp.tms_stime = cputime_to_clock_t(stime);
 		tmp.tms_cutime = cputime_to_clock_t(cutime);
@@ -1417,18 +1377,29 @@ asmlinkage long sys_getsid(pid_t pid)
 asmlinkage long sys_setsid(void)
 {
 	struct task_struct *group_leader = current->group_leader;
-	struct pid *pid;
+	pid_t session;
 	int err = -EPERM;
 
 	mutex_lock(&tty_mutex);
 	write_lock_irq(&tasklist_lock);
 
-	pid = find_pid(PIDTYPE_PGID, group_leader->pid);
-	if (pid)
+	/* Fail if I am already a session leader */
+	if (group_leader->signal->leader)
+		goto out;
+
+	session = group_leader->pid;
+	/* Fail if a process group id already exists that equals the
+	 * proposed session id.
+	 *
+	 * Don't check if session id == 1 because kernel threads use this
+	 * session id and so the check will always fail and make it so
+	 * init cannot successfully call setsid.
+	 */
+	if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
 		goto out;
 
 	group_leader->signal->leader = 1;
-	__set_special_pids(group_leader->pid, group_leader->pid);
+	__set_special_pids(session, session);
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
 	err = process_group(group_leader);
@@ -1894,23 +1865,20 @@ out:
  * fields when reaping, so a sample either gets all the additions of a
  * given child after it's reaped, or none so this sample is before reaping.
  *
- * tasklist_lock locking optimisation:
- * If we are current and single threaded, we do not need to take the tasklist
- * lock or the siglock.  No one else can take our signal_struct away,
- * no one else can reap the children to update signal->c* counters, and
- * no one else can race with the signal-> fields.
- * If we do not take the tasklist_lock, the signal-> fields could be read
- * out of order while another thread was just exiting. So we place a
- * read memory barrier when we avoid the lock.  On the writer side,
- * write memory barrier is implied in  __exit_signal as __exit_signal releases
- * the siglock spinlock after updating the signal-> fields.
- *
- * We don't really need the siglock when we access the non c* fields
- * of the signal_struct (for RUSAGE_SELF) even in multithreaded
- * case, since we take the tasklist lock for read and the non c* signal->
- * fields are updated only in __exit_signal, which is called with
- * tasklist_lock taken for write, hence these two threads cannot execute
- * concurrently.
+ * Locking:
+ * We need to take the siglock for CHILDEREN, SELF and BOTH
+ * for  the cases current multithreaded, non-current single threaded
+ * non-current multithreaded.  Thread traversal is now safe with
+ * the siglock held.
+ * Strictly speaking, we donot need to take the siglock if we are current and
+ * single threaded,  as no one else can take our signal_struct away, no one
+ * else can  reap the  children to update signal->c* counters, and no one else
+ * can race with the signal-> fields. If we do not take any lock, the
+ * signal-> fields could be read out of order while another thread was just
+ * exiting. So we should  place a read memory barrier when we avoid the lock.
+ * On the writer side,  write memory barrier is implied in  __exit_signal
+ * as __exit_signal releases  the siglock spinlock after updating the signal->
+ * fields. But we don't do this yet to keep things simple.
  *
  */
 
@@ -1919,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
-	int need_lock = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
-	if (p != current || !thread_group_empty(p))
-		need_lock = 1;
-
-	if (need_lock) {
-		read_lock(&tasklist_lock);
-		if (unlikely(!p->signal)) {
-			read_unlock(&tasklist_lock);
-			return;
-		}
-	} else
-		/* See locking comments above */
-		smp_rmb();
+	rcu_read_lock();
+	if (!lock_task_sighand(p, &flags)) {
+		rcu_read_unlock();
+		return;
+	}
 
 	switch (who) {
 		case RUSAGE_BOTH:
 		case RUSAGE_CHILDREN:
-			spin_lock_irqsave(&p->sighand->siglock, flags);
 			utime = p->signal->cutime;
 			stime = p->signal->cstime;
 			r->ru_nvcsw = p->signal->cnvcsw;
 			r->ru_nivcsw = p->signal->cnivcsw;
 			r->ru_minflt = p->signal->cmin_flt;
 			r->ru_majflt = p->signal->cmaj_flt;
-			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -1975,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			BUG();
 	}
 
-	if (need_lock)
-		read_unlock(&tasklist_lock);
+	unlock_task_sighand(p, &flags);
+	rcu_read_unlock();
+
 	cputime_to_timeval(utime, &r->ru_utime);
 	cputime_to_timeval(stime, &r->ru_stime);
 }
@@ -2091,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 				return -EFAULT;
 			return 0;
 		}
+		case PR_GET_ENDIAN:
+			error = GET_ENDIAN(current, arg2);
+			break;
+		case PR_SET_ENDIAN:
+			error = SET_ENDIAN(current, arg2);
+			break;
+
 		default:
 			error = -EINVAL;
 			break;