* task has been modifying its cpuset.
*/
-void cpuset_update_task_memory_state()
+void cpuset_update_task_memory_state(void)
{
int my_cpusets_mem_gen;
struct task_struct *tsk = current;
* when reading out p->cpuset, as we don't really care if it changes
* on the next cycle, and we are not going to try to dereference it.
*/
-static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
+static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
{
int n = 0;
struct task_struct *g, *p;
* We don't need to task_lock() this reference to tsk->cpuset,
* because tsk is already marked PF_EXITING, so attach_task() won't
* mess with it, or task is a failed fork, never visible to attach_task.
+ *
+ * Hack:
+ *
+ * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+ *
+ * Don't leave a task unable to allocate memory, as that is an
+ * accident waiting to happen should someone add a callout in
+ * do_exit() after the cpuset_exit() call that might allocate.
+ * If a task tries to allocate memory with an invalid cpuset,
+ * it will oops in cpuset_update_task_memory_state().
+ *
+ * We call cpuset_exit() while the task is still competent to
+ * handle notify_on_release(), then leave the task attached to
+ * the root cpuset (top_cpuset) for the remainder of its exit.
+ *
+ * To do this properly, we would increment the reference count on
+ * top_cpuset, and near the very end of the kernel/exit.c do_exit()
+ * code we would add a second cpuset function call, to drop that
+ * reference. This would just create an unnecessary hot spot on
+ * the top_cpuset reference count, to no avail.
+ *
+ * Normally, holding a reference to a cpuset without bumping its
+ * count is unsafe. The cpuset could go away, or someone could
+ * attach us to a different cpuset, decrementing the count on
+ * the first cpuset that we never incremented. But in this case,
+ * top_cpuset isn't going away, and either task has PF_EXITING set,
+ * which wards off any attach_task() attempts, or task is a failed
+ * fork, never visible to attach_task.
+ *
+ * Another way to do this would be to set the cpuset pointer
+ * to NULL here, and check in cpuset_update_task_memory_state()
+ * for a NULL pointer. This hack avoids that NULL check, for no
+ * cost (other than this way too long comment ;).
**/
void cpuset_exit(struct task_struct *tsk)
struct cpuset *cs;
cs = tsk->cpuset;
- tsk->cpuset = NULL;
+ tsk->cpuset = &top_cpuset; /* Hack - see comment above */
if (notify_on_release(cs)) {
char *pathbuf = NULL;
return allowed;
}
+/**
+ * cpuset_lock - lock out any changes to cpuset structures
+ *
+ * The out of memory (oom) code needs to lock down cpusets
+ * from being changed while it scans the tasklist looking for a
+ * task in an overlapping cpuset. Expose callback_sem via this
+ * cpuset_lock() routine, so the oom code can lock it, before
+ * locking the task list. The tasklist_lock is a spinlock, so
+ * must be taken inside callback_sem.
+ */
+
+void cpuset_lock(void)
+{
+ down(&callback_sem);
+}
+
+/**
+ * cpuset_unlock - release lock on cpuset changes
+ *
+ * Undo the lock taken in a previous cpuset_lock() call.
+ */
+
+void cpuset_unlock(void)
+{
+ up(&callback_sem);
+}
+
/**
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
* @p: pointer to task_struct of some other task.
* determine if task @p's memory usage might impact the memory
* available to the current task.
*
- * Acquires callback_sem - not suitable for calling from a fast path.
+ * Call while holding callback_sem.
**/
int cpuset_excl_nodes_overlap(const struct task_struct *p)
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */
- down(&callback_sem);
-
task_lock(current);
if (current->flags & PF_EXITING) {
task_unlock(current);
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done:
- up(&callback_sem);
-
return overlap;
}