Pull extend-notify-die into release branch

[linux-2.6] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 49e617fa0f662a10a04980ed0f2b3997a1adc0d0..2bbf968b23d9b62cdc58094f5799db09ecc51ff2 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -107,13 +107,25 @@ extern unsigned long nr_iowait(void);
  
  #include <asm/processor.h>
  
+/*
+ * Task state bitmask. NOTE! These bits are also
+ * encoded in fs/proc/array.c: get_task_state().
+ *
+ * We have two separate sets of flags: task->state
+ * is about runnability, while task->exit_state are
+ * about the task exiting. Confusing, but this way
+ * modifying one set can't modify the other one by
+ * mistake.
+ */
  #define TASK_RUNNING           0
  #define TASK_INTERRUPTIBLE     1
  #define TASK_UNINTERRUPTIBLE   2
  #define TASK_STOPPED           4
  #define TASK_TRACED            8
+/* in tsk->exit_state */
  #define EXIT_ZOMBIE            16
  #define EXIT_DEAD              32
+/* in tsk->state again */
  #define TASK_NONINTERACTIVE    64
  
  #define __set_task_state(tsk, state_value)             \
@@ -237,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  extern void arch_unmap_area(struct mm_struct *, unsigned long);
  extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+#ifdef ATOMIC64_INIT
+#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
+typedef atomic64_t mm_counter_t;
+#else /* !ATOMIC64_INIT */
+/*
+ * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
+ * that is, at 16TB if using 4kB page size.
+ */
+#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
+typedef atomic_t mm_counter_t;
+#endif /* !ATOMIC64_INIT */
+
+#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
  #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
  #define get_mm_counter(mm, member) ((mm)->_##member)
  #define add_mm_counter(mm, member, value) (mm)->_##member += (value)
@@ -244,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  #define dec_mm_counter(mm, member) (mm)->_##member--
  typedef unsigned long mm_counter_t;
  
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define get_mm_rss(mm)                                 \
+       (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+#define update_hiwater_rss(mm) do {                    \
+       unsigned long _rss = get_mm_rss(mm);            \
+       if ((mm)->hiwater_rss < _rss)                   \
+               (mm)->hiwater_rss = _rss;               \
+} while (0)
+#define update_hiwater_vm(mm)  do {                    \
+       if ((mm)->hiwater_vm < (mm)->total_vm)          \
+               (mm)->hiwater_vm = (mm)->total_vm;      \
+} while (0)
+
  struct mm_struct {
         struct vm_area_struct * mmap;           /* list of VMAs */
         struct rb_root mm_rb;
@@ -267,15 +323,20 @@ struct mm_struct {
                                                  * by mmlist_lock
                                                  */
  
+       /* Special counters, in some configurations protected by the
+        * page_table_lock, in other configurations by being atomic.
+        */
+       mm_counter_t _file_rss;
+       mm_counter_t _anon_rss;
+
+       unsigned long hiwater_rss;      /* High-watermark of RSS usage */
+       unsigned long hiwater_vm;       /* High-water virtual memory usage */
+
+       unsigned long total_vm, locked_vm, shared_vm, exec_vm;
+       unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
         unsigned long start_code, end_code, start_data, end_data;
         unsigned long start_brk, brk, start_stack;
         unsigned long arg_start, arg_end, env_start, env_end;
-       unsigned long total_vm, locked_vm, shared_vm;
-       unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;
-
-       /* Special counters protected by the page_table_lock */
-       mm_counter_t _rss;
-       mm_counter_t _anon_rss;
  
         unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
  
@@ -296,11 +357,7 @@ struct mm_struct {
         /* aio bits */
         rwlock_t                ioctx_list_lock;
         struct kioctx           *ioctx_list;
-
         struct kioctx           default_kioctx;
-
-       unsigned long hiwater_rss;      /* High-water RSS usage */
-       unsigned long hiwater_vm;       /* High-water virtual memory usage */
  };
  
  struct sighand_struct {
@@ -852,6 +909,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
  #define PF_SYNCWRITE   0x00200000      /* I am doing a sync write */
  #define PF_BORROWED_MM 0x00400000      /* I am a kthread doing use_mm */
  #define PF_RANDOMIZE   0x00800000      /* randomize virtual address space */
+#define PF_HOTPLUG_CPU 0x01000000      /* Currently performing CPU hotplug */
  
  /*
   * Only the _current_ task can read/write to tsk->flags, but other
@@ -883,7 +941,7 @@ extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
  #else
  static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
  {
-       if (!cpus_intersects(new_mask, cpu_online_map))
+       if (!cpu_isset(0, new_mask))
                 return -EINVAL;
         return 0;
  }
@@ -1006,6 +1064,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *);
  extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
  extern int kill_pg_info(int, struct siginfo *, pid_t);
  extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t);
  extern void do_notify_parent(struct task_struct *, int);
  extern void force_sig(int, struct task_struct *);
  extern void force_sig_specific(int, struct task_struct *);
@@ -1026,6 +1085,11 @@ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned lon
  #define SEND_SIG_PRIV  ((struct siginfo *) 1)
  #define SEND_SIG_FORCED        ((struct siginfo *) 2)
  
+static inline int is_si_special(const struct siginfo *info)
+{
+       return info <= SEND_SIG_FORCED;
+}
+
  /* True if we are on the alternate signal stack.  */
  
  static inline int on_sig_stack(unsigned long sp)
@@ -1153,7 +1217,7 @@ extern void unhash_process(struct task_struct *p);
  /*
   * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
   * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.
+ * pins the final release of task.io_context.  Also protects ->cpuset.
   *
   * Nests both inside and outside of read_lock(&tasklist_lock).
   * It must not be nested with write_lock_irq(&tasklist_lock),