typedef enum {
CS_CPU_EXCLUSIVE,
CS_MEM_EXCLUSIVE,
+ CS_MEM_HARDWALL,
CS_MEMORY_MIGRATE,
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
}
+static inline int is_mem_hardwall(const struct cpuset *cs)
+{
+ return test_bit(CS_MEM_HARDWALL, &cs->flags);
+}
+
static inline int is_sched_load_balance(const struct cpuset *cs)
{
return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
* Return nonzero if this tasks's cpus_allowed mask should be changed (in other
* words, if its mask is not equal to its cpuset's mask).
*/
-int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
+static int cpuset_test_cpumask(struct task_struct *tsk,
+ struct cgroup_scanner *scan)
{
return !cpus_equal(tsk->cpus_allowed,
(cgroup_cs(scan->cg))->cpus_allowed);
* We don't need to re-check for the cgroup/cpuset membership, since we're
* holding cgroup_lock() at this point.
*/
-void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
+static void cpuset_change_cpumask(struct task_struct *tsk,
+ struct cgroup_scanner *scan)
{
set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
}
return task_cs(current) == cpuset_being_rebound;
}
-static int update_relax_domain_level(struct cpuset *cs, char *buf)
+static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
- int val = simple_strtol(buf, NULL, 10);
-
- if (val < 0)
+ if ((int)val < 0)
val = -1;
if (val != cs->relax_domain_level) {
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
- * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- * CS_SCHED_LOAD_BALANCE,
- * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
- * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
- * cs: the cpuset to update
- * buf: the buffer where we read the 0 or 1
+ * bit: the bit to update (see cpuset_flagbits_t)
+ * cs: the cpuset to update
+ * turning_on: whether the flag is being set or cleared
*
* Call with cgroup_mutex held.
*/
FILE_MEMLIST,
FILE_CPU_EXCLUSIVE,
FILE_MEM_EXCLUSIVE,
+ FILE_MEM_HARDWALL,
FILE_SCHED_LOAD_BALANCE,
FILE_SCHED_RELAX_DOMAIN_LEVEL,
FILE_MEMORY_PRESSURE_ENABLED,
case FILE_MEMLIST:
retval = update_nodemask(cs, buffer);
break;
- case FILE_SCHED_RELAX_DOMAIN_LEVEL:
- retval = update_relax_domain_level(cs, buffer);
- break;
default:
retval = -EINVAL;
goto out2;
case FILE_MEM_EXCLUSIVE:
retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
break;
+ case FILE_MEM_HARDWALL:
+ retval = update_flag(CS_MEM_HARDWALL, cs, val);
+ break;
case FILE_SCHED_LOAD_BALANCE:
retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
break;
return retval;
}
+static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
+{
+ int retval = 0;
+ struct cpuset *cs = cgroup_cs(cgrp);
+ cpuset_filetype_t type = cft->private;
+
+ cgroup_lock();
+
+ if (cgroup_is_removed(cgrp)) {
+ cgroup_unlock();
+ return -ENODEV;
+ }
+ switch (type) {
+ case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+ retval = update_relax_domain_level(cs, val);
+ break;
+ default:
+ retval = -EINVAL;
+ break;
+ }
+ cgroup_unlock();
+ return retval;
+}
+
/*
* These ascii lists should be read in a single call, by using a user
* buffer large enough to hold the entire map. If read in smaller
case FILE_MEMLIST:
s += cpuset_sprintf_memlist(s, cs);
break;
- case FILE_SCHED_RELAX_DOMAIN_LEVEL:
- s += sprintf(s, "%d", cs->relax_domain_level);
- break;
default:
retval = -EINVAL;
goto out;
return is_cpu_exclusive(cs);
case FILE_MEM_EXCLUSIVE:
return is_mem_exclusive(cs);
+ case FILE_MEM_HARDWALL:
+ return is_mem_hardwall(cs);
case FILE_SCHED_LOAD_BALANCE:
return is_sched_load_balance(cs);
case FILE_MEMORY_MIGRATE:
}
}
+static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
+{
+ struct cpuset *cs = cgroup_cs(cont);
+ cpuset_filetype_t type = cft->private;
+ switch (type) {
+ case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+ return cs->relax_domain_level;
+ default:
+ BUG();
+ }
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
-static struct cftype cft_cpus = {
- .name = "cpus",
- .read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
- .private = FILE_CPULIST,
-};
-
-static struct cftype cft_mems = {
- .name = "mems",
- .read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
- .private = FILE_MEMLIST,
-};
-
-static struct cftype cft_cpu_exclusive = {
- .name = "cpu_exclusive",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_CPU_EXCLUSIVE,
-};
-
-static struct cftype cft_mem_exclusive = {
- .name = "mem_exclusive",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEM_EXCLUSIVE,
-};
-
-static struct cftype cft_sched_load_balance = {
- .name = "sched_load_balance",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SCHED_LOAD_BALANCE,
-};
-
-static struct cftype cft_sched_relax_domain_level = {
- .name = "sched_relax_domain_level",
- .read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
- .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
-};
-
-static struct cftype cft_memory_migrate = {
- .name = "memory_migrate",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEMORY_MIGRATE,
+static struct cftype files[] = {
+ {
+ .name = "cpus",
+ .read = cpuset_common_file_read,
+ .write = cpuset_common_file_write,
+ .private = FILE_CPULIST,
+ },
+
+ {
+ .name = "mems",
+ .read = cpuset_common_file_read,
+ .write = cpuset_common_file_write,
+ .private = FILE_MEMLIST,
+ },
+
+ {
+ .name = "cpu_exclusive",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_CPU_EXCLUSIVE,
+ },
+
+ {
+ .name = "mem_exclusive",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEM_EXCLUSIVE,
+ },
+
+ {
+ .name = "mem_hardwall",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEM_HARDWALL,
+ },
+
+ {
+ .name = "sched_load_balance",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SCHED_LOAD_BALANCE,
+ },
+
+ {
+ .name = "sched_relax_domain_level",
+ .read_s64 = cpuset_read_s64,
+ .write_s64 = cpuset_write_s64,
+ .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
+ },
+
+ {
+ .name = "memory_migrate",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEMORY_MIGRATE,
+ },
+
+ {
+ .name = "memory_pressure",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_MEMORY_PRESSURE,
+ },
+
+ {
+ .name = "memory_spread_page",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SPREAD_PAGE,
+ },
+
+ {
+ .name = "memory_spread_slab",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_SPREAD_SLAB,
+ },
};
static struct cftype cft_memory_pressure_enabled = {
.private = FILE_MEMORY_PRESSURE_ENABLED,
};
-static struct cftype cft_memory_pressure = {
- .name = "memory_pressure",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEMORY_PRESSURE,
-};
-
-static struct cftype cft_spread_page = {
- .name = "memory_spread_page",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SPREAD_PAGE,
-};
-
-static struct cftype cft_spread_slab = {
- .name = "memory_spread_slab",
- .read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_SPREAD_SLAB,
-};
-
static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
{
int err;
- if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss,
- &cft_sched_relax_domain_level)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
- return err;
- if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
+ err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+ if (err)
return err;
/* memory_pressure_enabled is in root cpuset only */
- if (err == 0 && !cont->parent)
+ if (!cont->parent)
err = cgroup_add_file(cont, ss,
- &cft_memory_pressure_enabled);
- return 0;
+ &cft_memory_pressure_enabled);
+ return err;
}
/*
* Called by cgroup_scan_tasks() for each task in a cgroup.
* Return nonzero to stop the walk through the tasks.
*/
-void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan)
+static void cpuset_do_move_task(struct task_struct *tsk,
+ struct cgroup_scanner *scan)
{
struct cpuset_hotplug_scanner *chsp;
}
/*
- * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
- * ancestor to the specified cpuset. Call holding callback_mutex.
- * If no ancestor is mem_exclusive (an unusual configuration), then
- * returns the root cpuset.
+ * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
+ * mem_hardwall ancestor to the specified cpuset. Call holding
+ * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
+ * (an unusual configuration), then returns the root cpuset.
*/
-static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
+static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
{
- while (!is_mem_exclusive(cs) && cs->parent)
+ while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
cs = cs->parent;
return cs;
}
* __GFP_THISNODE is set, yes, we can always allocate. If zone
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
- * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
+ * hardwalled cpuset ancestor to this tasks cpuset, yes.
* If the task has been OOM killed and has access to memory reserves
* as specified by the TIF_MEMDIE flag, yes.
* Otherwise, no.
* and do not allow allocations outside the current tasks cpuset
* unless the task has been OOM killed as is marked TIF_MEMDIE.
* GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest enclosing mem_exclusive ancestor cpuset.
+ * nearest enclosing hardwalled ancestor cpuset.
*
* Scanning up parent cpusets requires callback_mutex. The
* __alloc_pages() routine only calls here with __GFP_HARDWALL bit
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
* TIF_MEMDIE - any node ok
- * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
+ * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*
* Rule:
mutex_lock(&callback_mutex);
task_lock(current);
- cs = nearest_exclusive_ancestor(task_cs(current));
+ cs = nearest_hardwall_ancestor(task_cs(current));
task_unlock(current);
allowed = node_isset(node, cs->mems_allowed);