X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=kernel%2Fcpuset.c;h=9fceb97e989c25c82e438010437804774b892025;hb=3a57a788757738b8f80a82d4f5101fefb8fd7a58;hp=fe5407ca2f1ebd90cde99a2620c2d9d5b68fde1e;hpb=addf2c739d9015d3e9c0500b58a3af051cd58ea7;p=linux-2.6 diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fe5407ca2f..9fceb97e98 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEM_HARDWALL, CS_MEMORY_MIGRATE, CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, @@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); } +static inline int is_mem_hardwall(const struct cpuset *cs) +{ + return test_bit(CS_MEM_HARDWALL, &cs->flags); +} + static inline int is_sched_load_balance(const struct cpuset *cs) { return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); @@ -791,8 +797,10 @@ static int update_cpumask(struct cpuset *cs, char *buf) retval = cpulist_parse(buf, trialcs.cpus_allowed); if (retval < 0) return retval; + + if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) + return -EINVAL; } - cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); retval = validate_change(cs, &trialcs); if (retval < 0) return retval; @@ -926,9 +934,11 @@ static int update_nodemask(struct cpuset *cs, char *buf) retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) goto done; + + if (!nodes_subset(trialcs.mems_allowed, + node_states[N_HIGH_MEMORY])) + return -EINVAL; } - nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, - node_states[N_HIGH_MEMORY]); oldmem = cs->mems_allowed; if (nodes_equal(oldmem, trialcs.mems_allowed)) { retval = 0; /* Too easy - nothing to do */ @@ -1025,12 +1035,10 @@ int current_cpuset_is_being_rebound(void) return task_cs(current) == cpuset_being_rebound; } -static int update_relax_domain_level(struct cpuset *cs, char *buf) +static int update_relax_domain_level(struct cpuset *cs, s64 val) { - int val = simple_strtol(buf, NULL, 10); - - if (val < 0) - val = -1; + if (val < -1 || val >= SD_LV_MAX) + return -EINVAL; if (val != cs->relax_domain_level) { cs->relax_domain_level = val; @@ -1042,12 +1050,9 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag - * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_SCHED_LOAD_BALANCE, - * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, - * CS_SPREAD_PAGE, CS_SPREAD_SLAB) - * cs: the cpuset to update - * buf: the buffer where we read the 0 or 1 + * bit: the bit to update (see cpuset_flagbits_t) + * cs: the cpuset to update + * turning_on: whether the flag is being set or cleared * * Call with cgroup_mutex held. */ @@ -1228,6 +1233,7 @@ typedef enum { FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, + FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, @@ -1276,9 +1282,6 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, case FILE_MEMLIST: retval = update_nodemask(cs, buffer); break; - case FILE_SCHED_RELAX_DOMAIN_LEVEL: - retval = update_relax_domain_level(cs, buffer); - break; default: retval = -EINVAL; goto out2; @@ -1313,6 +1316,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; + case FILE_MEM_HARDWALL: + retval = update_flag(CS_MEM_HARDWALL, cs, val); + break; case FILE_SCHED_LOAD_BALANCE: retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; @@ -1341,6 +1347,30 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) return retval; } +static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) +{ + int retval = 0; + struct cpuset *cs = cgroup_cs(cgrp); + cpuset_filetype_t type = cft->private; + + cgroup_lock(); + + if (cgroup_is_removed(cgrp)) { + cgroup_unlock(); + return -ENODEV; + } + switch (type) { + case FILE_SCHED_RELAX_DOMAIN_LEVEL: + retval = update_relax_domain_level(cs, val); + break; + default: + retval = -EINVAL; + break; + } + cgroup_unlock(); + return retval; +} + /* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller @@ -1399,9 +1429,6 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, case FILE_MEMLIST: s += cpuset_sprintf_memlist(s, cs); break; - case FILE_SCHED_RELAX_DOMAIN_LEVEL: - s += sprintf(s, "%d", cs->relax_domain_level); - break; default: retval = -EINVAL; goto out; @@ -1423,6 +1450,8 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); + case FILE_MEM_HARDWALL: + return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: @@ -1440,6 +1469,18 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) } } +static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) +{ + struct cpuset *cs = cgroup_cs(cont); + cpuset_filetype_t type = cft->private; + switch (type) { + case FILE_SCHED_RELAX_DOMAIN_LEVEL: + return cs->relax_domain_level; + default: + BUG(); + } +} + /* * for the common functions, 'private' gives the type of file @@ -1474,6 +1515,13 @@ static struct cftype files[] = { .private = FILE_MEM_EXCLUSIVE, }, + { + .name = "mem_hardwall", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEM_HARDWALL, + }, + { .name = "sched_load_balance", .read_u64 = cpuset_read_u64, @@ -1483,8 +1531,8 @@ static struct cftype files[] = { { .name = "sched_relax_domain_level", - .read_u64 = cpuset_read_u64, - .write_u64 = cpuset_write_u64, + .read_s64 = cpuset_read_s64, + .write_s64 = cpuset_write_s64, .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, @@ -1842,6 +1890,12 @@ static void common_cpu_mem_hotplug_unplug(void) top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; scan_for_empty_cpusets(&top_cpuset); + /* + * Scheduler destroys domains on hotplug events. + * Rebuild them based on the current settings. + */ + rebuild_sched_domains(); + cgroup_unlock(); } @@ -1963,14 +2017,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) } /* - * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive - * ancestor to the specified cpuset. Call holding callback_mutex. - * If no ancestor is mem_exclusive (an unusual configuration), then - * returns the root cpuset. + * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or + * mem_hardwall ancestor to the specified cpuset. Call holding + * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall + * (an unusual configuration), then returns the root cpuset. */ -static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) +static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) { - while (!is_mem_exclusive(cs) && cs->parent) + while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) cs = cs->parent; return cs; } @@ -1984,7 +2038,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * __GFP_THISNODE is set, yes, we can always allocate. If zone * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest - * mem_exclusive cpuset ancestor to this tasks cpuset, yes. + * hardwalled cpuset ancestor to this tasks cpuset, yes. * If the task has been OOM killed and has access to memory reserves * as specified by the TIF_MEMDIE flag, yes. * Otherwise, no. @@ -2007,7 +2061,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * and do not allow allocations outside the current tasks cpuset * unless the task has been OOM killed as is marked TIF_MEMDIE. * GFP_KERNEL allocations are not so marked, so can escape to the - * nearest enclosing mem_exclusive ancestor cpuset. + * nearest enclosing hardwalled ancestor cpuset. * * Scanning up parent cpusets requires callback_mutex. The * __alloc_pages() routine only calls here with __GFP_HARDWALL bit @@ -2030,7 +2084,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * TIF_MEMDIE - any node ok - * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok + * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. * * Rule: @@ -2067,7 +2121,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) mutex_lock(&callback_mutex); task_lock(current); - cs = nearest_exclusive_ancestor(task_cs(current)); + cs = nearest_hardwall_ancestor(task_cs(current)); task_unlock(current); allowed = node_isset(node, cs->mems_allowed);