X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmempolicy.c;h=6c7ba1a63d23b05931003d97b8932c39ca983060;hb=3a2d5b700132f35401f1d9e22fe3c2cab02c2549;hp=3d6ac9505d0716e83c1eeee3a58ede5c5baa0a1b;hpb=b160292cc216a50fd0cd386b0bda2cd48352c73b;p=linux-2.6 diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3d6ac9505d..6c7ba1a63d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -72,23 +72,23 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -110,25 +110,57 @@ struct mempolicy default_policy = { .policy = MPOL_DEFAULT, }; +static void mpol_rebind_policy(struct mempolicy *pol, + const nodemask_t *newmask); + /* Do sanity checking on a policy */ static int mpol_check_policy(int mode, nodemask_t *nodes) { - int empty = nodes_empty(*nodes); + int was_empty, is_empty; + + if (!nodes) + return 0; + + /* + * "Contextualize" the in-coming nodemast for cpusets: + * Remember whether in-coming nodemask was empty, If not, + * restrict the nodes to the allowed nodes in the cpuset. + * This is guaranteed to be a subset of nodes with memory. + */ + cpuset_update_task_memory_state(); + is_empty = was_empty = nodes_empty(*nodes); + if (!was_empty) { + nodes_and(*nodes, *nodes, cpuset_current_mems_allowed); + is_empty = nodes_empty(*nodes); /* after "contextualization" */ + } switch (mode) { case MPOL_DEFAULT: - if (!empty) + /* + * require caller to specify an empty nodemask + * before "contextualization" + */ + if (!was_empty) return -EINVAL; break; case MPOL_BIND: case MPOL_INTERLEAVE: - /* Preferred will only use the first bit, but allow - more for now. */ - if (empty) + /* + * require at least 1 valid node after "contextualization" + */ + if (is_empty) + return -EINVAL; + break; + case MPOL_PREFERRED: + /* + * Did caller specify invalid nodes? + * Don't silently accept this as "local allocation". + */ + if (!was_empty && is_empty) return -EINVAL; break; } - return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL; + return 0; } /* Generate a custom zonelist for the BIND policy. */ @@ -185,7 +217,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; - if (nodes_weight(*nodes) == 0) { + if (nodes_weight(policy->v.nodes) == 0) { kmem_cache_free(policy_cache, policy); return ERR_PTR(-EINVAL); } @@ -416,18 +448,6 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start, return err; } -static int contextualize_policy(int mode, nodemask_t *nodes) -{ - if (!nodes) - return 0; - - cpuset_update_task_memory_state(); - if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) - return -EINVAL; - return mpol_check_policy(mode, nodes); -} - - /* * Update task->flags PF_MEMPOLICY bit: set iff non-default * mempolicy. Allows more rapid checking of this (combined perhaps @@ -459,11 +479,11 @@ static void mpol_set_task_struct_flag(void) } /* Set the process memory policy */ -long do_set_mempolicy(int mode, nodemask_t *nodes) +static long do_set_mempolicy(int mode, nodemask_t *nodes) { struct mempolicy *new; - if (contextualize_policy(mode, nodes)) + if (mpol_check_policy(mode, nodes)) return -EINVAL; new = mpol_new(mode, nodes); if (IS_ERR(new)) @@ -494,9 +514,9 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) *nodes = p->v.nodes; break; case MPOL_PREFERRED: - /* or use current node instead of online map? */ + /* or use current node instead of memory_map? */ if (p->v.preferred_node < 0) - *nodes = node_online_map; + *nodes = node_states[N_HIGH_MEMORY]; else node_set(p->v.preferred_node, *nodes); break; @@ -519,8 +539,8 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr) } /* Retrieve NUMA policy */ -long do_get_mempolicy(int *policy, nodemask_t *nmask, - unsigned long addr, unsigned long flags) +static long do_get_mempolicy(int *policy, nodemask_t *nmask, + unsigned long addr, unsigned long flags) { int err; struct mm_struct *mm = current->mm; @@ -528,8 +548,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct mempolicy *pol = current->mempolicy; cpuset_update_task_memory_state(); - if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) + if (flags & + ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) return -EINVAL; + + if (flags & MPOL_F_MEMS_ALLOWED) { + if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) + return -EINVAL; + *policy = 0; /* just so it's initialized */ + *nmask = cpuset_current_mems_allowed; + return 0; + } + if (flags & MPOL_F_ADDR) { down_read(&mm->mmap_sem); vma = find_vma_intersection(mm, addr, addr+1); @@ -601,7 +631,8 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */ -int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) +static int migrate_to_node(struct mm_struct *mm, int source, int dest, + int flags) { nodemask_t nmask; LIST_HEAD(pagelist); @@ -706,12 +737,29 @@ out: } +/* + * Allocate a new page for page migration based on vma policy. + * Start assuming that page is mapped by vma pointed to by @private. + * Search forward from there, if not. N.B., this assumes that the + * list of pages handed to migrate_pages()--which is how we get here-- + * is in virtual address order. + */ static struct page *new_vma_page(struct page *page, unsigned long private, int **x) { struct vm_area_struct *vma = (struct vm_area_struct *)private; + unsigned long uninitialized_var(address); + + while (vma) { + address = page_address_in_vma(page, vma); + if (address != -EFAULT) + break; + vma = vma->vm_next; + } - return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, - page_address_in_vma(page, vma)); + /* + * if !vma, alloc_page_vma() will use task or system default policy + */ + return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else @@ -732,8 +780,9 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * } #endif -long do_mbind(unsigned long start, unsigned long len, - unsigned long mode, nodemask_t *nmask, unsigned long flags) +static long do_mbind(unsigned long start, unsigned long len, + unsigned long mode, nodemask_t *nmask, + unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; @@ -881,10 +930,6 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, err = get_nodes(&nodes, nmask, maxnode); if (err) return err; -#ifdef CONFIG_CPUSETS - /* Restrict the nodes to the allowed nodes in the cpuset */ - nodes_and(nodes, nodes, current->mems_allowed); -#endif return do_mbind(start, len, mode, &nodes, flags); } @@ -924,7 +969,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, /* Find the mm_struct */ read_lock(&tasklist_lock); - task = pid ? find_task_by_pid(pid) : current; + task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); return -ESRCH; @@ -955,7 +1000,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, goto out; } - if (!nodes_subset(new, node_online_map)) { + if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) { err = -EINVAL; goto out; } @@ -978,7 +1023,8 @@ asmlinkage long sys_get_mempolicy(int __user *policy, unsigned long maxnode, unsigned long addr, unsigned long flags) { - int err, pval; + int err; + int uninitialized_var(pval); nodemask_t nodes; if (nmask != NULL && maxnode < MAX_NUMNODES) @@ -1371,7 +1417,6 @@ EXPORT_SYMBOL(alloc_pages_current); * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). */ -void *cpuset_being_rebound; /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) @@ -1527,8 +1572,8 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) kmem_cache_free(sn_cache, n); } -struct sp_node * -sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) +static struct sp_node *sp_alloc(unsigned long start, unsigned long end, + struct mempolicy *pol) { struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); @@ -1677,7 +1722,7 @@ void __init numa_policy_init(void) * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); - for_each_online_node(nid) { + for_each_node_state(nid, N_HIGH_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ @@ -1706,7 +1751,8 @@ void numa_default_policy(void) } /* Migrate a policy to a different set of nodes */ -void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) +static void mpol_rebind_policy(struct mempolicy *pol, + const nodemask_t *newmask) { nodemask_t *mpolmask; nodemask_t tmp; @@ -1950,7 +1996,7 @@ int show_numa_map(struct seq_file *m, void *v) if (file) { seq_printf(m, " file="); - seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n\t= "); + seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_printf(m, " heap"); } else if (vma->vm_start <= mm->start_stack && @@ -1963,7 +2009,7 @@ int show_numa_map(struct seq_file *m, void *v) seq_printf(m, " huge"); } else { check_pgd_range(vma, vma->vm_start, vma->vm_end, - &node_online_map, MPOL_MF_STATS, md); + &node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md); } if (!md->pages) @@ -1990,7 +2036,7 @@ int show_numa_map(struct seq_file *m, void *v) if (md->writeback) seq_printf(m," writeback=%lu", md->writeback); - for_each_online_node(n) + for_each_node_state(n, N_HIGH_MEMORY) if (md->node[n]) seq_printf(m, " N%d=%lu", n, md->node[n]); out: @@ -2001,4 +2047,3 @@ out: m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0; } -