X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=64950fa5d3211c5f2843e9637b38452a70eef9f7;hb=ac0191517c3b5f2cf68ab36756d64ef035c4a770;hp=57e6448b171e9ff277a85bdf02d061ea8cac7623;hpb=973ed7c49a5c2271a10ce54ac06bba5617fca3a8;p=linux-2.6 diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 57e6448b17..64950fa5d3 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) /* * Return in *pmask the portion of a cpusets's mems_allowed that - * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online mems. If we get - * all the way to the top and still haven't found any online mems, - * return node_online_map. + * are online, with memory. If none are online with memory, walk + * up the cpuset hierarchy until we find one that does have some + * online mems. If we get all the way to the top and still haven't + * found any online mems, return node_states[N_HIGH_MEMORY]. * * One way or another, we guarantee to return some non-empty subset - * of node_online_map. + * of node_states[N_HIGH_MEMORY]. * * Call with callback_mutex held. */ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) { - while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) + while (cs && !nodes_intersects(cs->mems_allowed, + node_states[N_HIGH_MEMORY])) cs = cs->parent; if (cs) - nodes_and(*pmask, cs->mems_allowed, node_online_map); + nodes_and(*pmask, cs->mems_allowed, + node_states[N_HIGH_MEMORY]); else - *pmask = node_online_map; - BUG_ON(!nodes_intersects(*pmask, node_online_map)); + *pmask = node_states[N_HIGH_MEMORY]; + BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); } /** @@ -752,61 +754,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) return 0; } -/* - * For a given cpuset cur, partition the system as follows - * a. All cpus in the parent cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * b. All cpus in the current cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * Build these two partitions by calling partition_sched_domains - * - * Call with manage_mutex held. May nest a call to the - * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. - * Must not be called holding callback_mutex, because we must - * not call lock_cpu_hotplug() while holding callback_mutex. - */ - -static void update_cpu_domains(struct cpuset *cur) -{ - struct cpuset *c, *par = cur->parent; - cpumask_t pspan, cspan; - - if (par == NULL || cpus_empty(cur->cpus_allowed)) - return; - - /* - * Get all cpus from parent's cpus_allowed not part of exclusive - * children - */ - pspan = par->cpus_allowed; - list_for_each_entry(c, &par->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(pspan, pspan, c->cpus_allowed); - } - if (!is_cpu_exclusive(cur)) { - cpus_or(pspan, pspan, cur->cpus_allowed); - if (cpus_equal(pspan, cur->cpus_allowed)) - return; - cspan = CPU_MASK_NONE; - } else { - if (cpus_empty(pspan)) - return; - cspan = cur->cpus_allowed; - /* - * Get all cpus from current cpuset's cpus_allowed not part - * of exclusive children - */ - list_for_each_entry(c, &cur->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(cspan, cspan, c->cpus_allowed); - } - } - - lock_cpu_hotplug(); - partition_sched_domains(&pspan, &cspan); - unlock_cpu_hotplug(); -} - /* * Call with manage_mutex held. May take callback_mutex during call. */ @@ -814,7 +761,7 @@ static void update_cpu_domains(struct cpuset *cur) static int update_cpumask(struct cpuset *cs, char *buf) { struct cpuset trialcs; - int retval, cpus_unchanged; + int retval; /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ if (cs == &top_cpuset) @@ -841,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) retval = validate_change(cs, &trialcs); if (retval < 0) return retval; - cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); mutex_lock(&callback_mutex); cs->cpus_allowed = trialcs.cpus_allowed; mutex_unlock(&callback_mutex); - if (is_cpu_exclusive(cs) && !cpus_unchanged) - update_cpu_domains(cs); return 0; } @@ -924,7 +868,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) int fudge; int retval; - /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ + /* + * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; + * it's read-only + */ if (cs == &top_cpuset) return -EACCES; @@ -941,8 +888,21 @@ static int update_nodemask(struct cpuset *cs, char *buf) retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) goto done; + if (!nodes_intersects(trialcs.mems_allowed, + node_states[N_HIGH_MEMORY])) { + /* + * error if only memoryless nodes specified. + */ + retval = -ENOSPC; + goto done; + } } - nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); + /* + * Exclude memoryless nodes. We know that trialcs.mems_allowed + * contains at least one node with memory. + */ + nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, + node_states[N_HIGH_MEMORY]); oldmem = cs->mems_allowed; if (nodes_equal(oldmem, trialcs.mems_allowed)) { retval = 0; /* Too easy - nothing to do */ @@ -1067,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) { int turning_on; struct cpuset trialcs; - int err, cpu_exclusive_changed; + int err; turning_on = (simple_strtoul(buf, NULL, 10) != 0); @@ -1080,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) err = validate_change(cs, &trialcs); if (err < 0) return err; - cpu_exclusive_changed = - (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); mutex_lock(&callback_mutex); cs->flags = trialcs.flags; mutex_unlock(&callback_mutex); - if (cpu_exclusive_changed) - update_cpu_domains(cs); return 0; } @@ -1445,7 +1401,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, ssize_t retval = 0; char *s; - if (!(page = (char *)__get_free_page(GFP_KERNEL))) + if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) return -ENOMEM; s = page; @@ -1947,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } -/* - * Locking note on the strange update_flag() call below: - * - * If the cpuset being removed is marked cpu_exclusive, then simulate - * turning cpu_exclusive off, which will call update_cpu_domains(). - * The lock_cpu_hotplug() call in update_cpu_domains() must not be - * made while holding callback_mutex. Elsewhere the kernel nests - * callback_mutex inside lock_cpu_hotplug() calls. So the reverse - * nesting would risk an ABBA deadlock. - */ - static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cpuset *cs = dentry->d_fsdata; @@ -1977,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&manage_mutex); return -EBUSY; } - if (is_cpu_exclusive(cs)) { - int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); - if (retval < 0) { - mutex_unlock(&manage_mutex); - return retval; - } - } parent = cs->parent; mutex_lock(&callback_mutex); set_bit(CS_REMOVED, &cs->flags); @@ -2098,8 +2036,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) /* * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track - * cpu_online_map and node_online_map. Force the top cpuset to track - * whats online after any CPU or memory node hotplug or unplug event. + * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to + * track what's online after any CPU or memory node hotplug or unplug + * event. * * To ensure that we don't remove a CPU or node from the top cpuset * that is currently in use by a child cpuset (which would violate @@ -2119,7 +2058,7 @@ static void common_cpu_mem_hotplug_unplug(void) guarantee_online_cpus_mems_in_subtree(&top_cpuset); top_cpuset.cpus_allowed = cpu_online_map; - top_cpuset.mems_allowed = node_online_map; + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; mutex_unlock(&callback_mutex); mutex_unlock(&manage_mutex); @@ -2147,8 +2086,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, #ifdef CONFIG_MEMORY_HOTPLUG /* - * Keep top_cpuset.mems_allowed tracking node_online_map. - * Call this routine anytime after you change node_online_map. + * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. + * Call this routine anytime after you change + * node_states[N_HIGH_MEMORY]. * See also the previous routine cpuset_handle_cpuhp(). */ @@ -2167,7 +2107,7 @@ void cpuset_track_online_nodes(void) void __init cpuset_init_smp(void) { top_cpuset.cpus_allowed = cpu_online_map; - top_cpuset.mems_allowed = node_online_map; + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; hotcpu_notifier(cpuset_handle_cpuhp, 0); } @@ -2309,7 +2249,7 @@ void cpuset_init_current_mems_allowed(void) * * Description: Returns the nodemask_t mems_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty - * subset of node_online_map, even if this means going outside the + * subset of node_states[N_HIGH_MEMORY], even if this means going outside the * tasks cpuset. **/ @@ -2491,12 +2431,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) node = zone_to_nid(z); if (node_isset(node, current->mems_allowed)) return 1; - /* - * Allow tasks that have access to memory reserves because they have - * been OOM killed to get memory anywhere. - */ - if (unlikely(test_thread_flag(TIF_MEMDIE))) - return 1; + /* + * Allow tasks that have access to memory reserves because they have + * been OOM killed to get memory anywhere. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE))) + return 1; return 0; } @@ -2566,41 +2506,20 @@ int cpuset_mem_spread_node(void) EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); /** - * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? - * @p: pointer to task_struct of some other task. - * - * Description: Return true if the nearest mem_exclusive ancestor - * cpusets of tasks @p and current overlap. Used by oom killer to - * determine if task @p's memory usage might impact the memory - * available to the current task. - * - * Call while holding callback_mutex. + * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's? + * @tsk1: pointer to task_struct of some task. + * @tsk2: pointer to task_struct of some other task. + * + * Description: Return true if @tsk1's mems_allowed intersects the + * mems_allowed of @tsk2. Used by the OOM killer to determine if + * one of the task's memory usage might impact the memory available + * to the other. **/ -int cpuset_excl_nodes_overlap(const struct task_struct *p) +int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, + const struct task_struct *tsk2) { - const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ - int overlap = 1; /* do cpusets overlap? */ - - task_lock(current); - if (current->flags & PF_EXITING) { - task_unlock(current); - goto done; - } - cs1 = nearest_exclusive_ancestor(current->cpuset); - task_unlock(current); - - task_lock((struct task_struct *)p); - if (p->flags & PF_EXITING) { - task_unlock((struct task_struct *)p); - goto done; - } - cs2 = nearest_exclusive_ancestor(p->cpuset); - task_unlock((struct task_struct *)p); - - overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); -done: - return overlap; + return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); } /*