#include <linux/seq_file.h>
#include <linux/security.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/string.h>
envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[i] = NULL;
- call_usermodehelper(argv[0], argv, envp, 0);
+ call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
kfree(pathbuf);
}
return -EACCES;
trialcs = *cs;
- retval = cpulist_parse(buf, trialcs.cpus_allowed);
- if (retval < 0)
- return retval;
+
+ /*
+ * We allow a cpuset's cpus_allowed to be empty; if it has attached
+ * tasks, we'll catch it later when we validate the change and return
+ * -ENOSPC.
+ */
+ if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+ cpus_clear(trialcs.cpus_allowed);
+ } else {
+ retval = cpulist_parse(buf, trialcs.cpus_allowed);
+ if (retval < 0)
+ return retval;
+ }
cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
- if (cpus_empty(trialcs.cpus_allowed))
+ /* cpus_allowed cannot be empty for a cpuset with attached tasks. */
+ if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
return -ENOSPC;
retval = validate_change(cs, &trialcs);
if (retval < 0)
return -EACCES;
trialcs = *cs;
- retval = nodelist_parse(buf, trialcs.mems_allowed);
- if (retval < 0)
- goto done;
+
+ /*
+ * We allow a cpuset's mems_allowed to be empty; if it has attached
+ * tasks, we'll catch it later when we validate the change and return
+ * -ENOSPC.
+ */
+ if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+ nodes_clear(trialcs.mems_allowed);
+ } else {
+ retval = nodelist_parse(buf, trialcs.mems_allowed);
+ if (retval < 0)
+ goto done;
+ }
nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
oldmem = cs->mems_allowed;
if (nodes_equal(oldmem, trialcs.mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
- if (nodes_empty(trialcs.mems_allowed)) {
+ /* mems_allowed cannot be empty for a cpuset with attached tasks. */
+ if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
retval = -ENOSPC;
goto done;
}
mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
if (!mmarray)
goto done;
- write_lock_irq(&tasklist_lock); /* block fork */
+ read_lock(&tasklist_lock); /* block fork */
if (atomic_read(&cs->count) <= ntasks)
break; /* got enough */
- write_unlock_irq(&tasklist_lock); /* try again */
+ read_unlock(&tasklist_lock); /* try again */
kfree(mmarray);
}
continue;
mmarray[n++] = mm;
} while_each_thread(g, p);
- write_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
/*
* Now that we've dropped the tasklist spinlock, we can
do_each_thread(g, p) {
if (p->cpuset == cs) {
- pidarray[n++] = p->pid;
if (unlikely(n == npids))
goto array_full;
+ pidarray[n++] = p->pid;
}
} while_each_thread(g, p);
{
struct ctr_struct *ctr = file->private_data;
- if (*ppos + nbytes > ctr->bufsz)
- nbytes = ctr->bufsz - *ppos;
- if (copy_to_user(buf, ctr->buf + *ppos, nbytes))
- return -EFAULT;
- *ppos += nbytes;
- return nbytes;
+ return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
}
static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
static int cpuset_handle_cpuhp(struct notifier_block *nb,
unsigned long phase, void *cpu)
{
+ if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
+ return NOTIFY_DONE;
+
common_cpu_mem_hotplug_unplug();
return 0;
}
* it is holding that mutex while calling check_for_release(),
* which calls kmalloc(), so can't be called holding callback_mutex().
*
- * We don't need to task_lock() this reference to tsk->cpuset,
- * because tsk is already marked PF_EXITING, so attach_task() won't
- * mess with it, or task is a failed fork, never visible to attach_task.
- *
* the_top_cpuset_hack:
*
* Set the exiting tasks cpuset to the root cpuset (top_cpuset).
{
struct cpuset *cs;
+ task_lock(current);
cs = tsk->cpuset;
tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */
+ task_unlock(current);
if (notify_on_release(cs)) {
char *pathbuf = NULL;
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
+ * If the task has been OOM killed and has access to memory reserves
+ * as specified by the TIF_MEMDIE flag, yes.
* Otherwise, no.
*
* If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
* calls get to this routine, we should just shut up and say 'yes'.
*
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
- * and do not allow allocations outside the current tasks cpuset.
+ * and do not allow allocations outside the current tasks cpuset
+ * unless the task has been OOM killed as is marked TIF_MEMDIE.
* GFP_KERNEL allocations are not so marked, so can escape to the
* nearest enclosing mem_exclusive ancestor cpuset.
*
* affect that:
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
+ * TIF_MEMDIE - any node ok
* GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return 1;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
return 0;
*
* If we're in interrupt, yes, we can always allocate.
* If __GFP_THISNODE is set, yes, we can always allocate. If zone
- * z's node is in our tasks mems_allowed, yes. Otherwise, no.
+ * z's node is in our tasks mems_allowed, yes. If the task has been
+ * OOM killed and has access to memory reserves as specified by the
+ * TIF_MEMDIE flag, yes. Otherwise, no.
*
* The __GFP_THISNODE placement logic is really handled elsewhere,
* by forcibly using a zonelist starting at a specified node, and by
node = zone_to_nid(z);
if (node_isset(node, current->mems_allowed))
return 1;
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return 1;
return 0;
}