switch (mode) {
case MPOL_INTERLEAVE:
policy->v.nodes = *nodes;
+ if (nodes_weight(*nodes) == 0) {
+ kmem_cache_free(policy_cache, policy);
+ return ERR_PTR(-EINVAL);
+ }
break;
case MPOL_PREFERRED:
policy->v.preferred_node = first_node(*nodes);
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
do {
- unsigned long pfn;
+ struct page *page;
unsigned int nid;
if (!pte_present(*pte))
continue;
- pfn = pte_pfn(*pte);
- if (!pfn_valid(pfn)) {
- print_bad_pte(vma, *pte, addr);
+ page = vm_normal_page(vma, addr, *pte);
+ if (!page)
continue;
- }
- nid = pfn_to_nid(pfn);
+ nid = page_to_nid(page);
if (!node_isset(nid, *nodes))
break;
} while (pte++, addr += PAGE_SIZE, addr != end);
first = find_vma(mm, start);
if (!first)
return ERR_PTR(-EFAULT);
- if (first->vm_flags & VM_RESERVED)
- return ERR_PTR(-EACCES);
prev = NULL;
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
if (!vma->vm_next && vma->vm_end < end)
struct vm_area_struct *vma = NULL;
struct mempolicy *pol = current->mempolicy;
+ cpuset_update_current_mems_allowed();
if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
if (flags & MPOL_F_ADDR) {
return nid;
}
+/* Determine a node number for interleave */
+static inline unsigned interleave_nid(struct mempolicy *pol,
+ struct vm_area_struct *vma, unsigned long addr, int shift)
+{
+ if (vma) {
+ unsigned long off;
+
+ off = vma->vm_pgoff;
+ off += (addr - vma->vm_start) >> shift;
+ return offset_il_node(pol, vma, off);
+ } else
+ return interleave_nodes(pol);
+}
+
+/* Return a zonelist suitable for a huge page allocation. */
+struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct mempolicy *pol = get_vma_policy(current, vma, addr);
+
+ if (pol->policy == MPOL_INTERLEAVE) {
+ unsigned nid;
+
+ nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+ return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+ }
+ return zonelist_policy(GFP_HIGHUSER, pol);
+}
+
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
unsigned nid;
- if (vma) {
- unsigned long off;
- off = vma->vm_pgoff;
- off += (addr - vma->vm_start) >> PAGE_SHIFT;
- nid = offset_il_node(pol, vma, off);
- } else {
- /* fall back to process interleaving */
- nid = interleave_nodes(pol);
- }
+
+ nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
return alloc_page_interleave(gfp, 0, nid);
}
return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
kmem_cache_free(policy_cache, p);
}
-/*
- * Hugetlb policy. Same as above, just works with node numbers instead of
- * zonelists.
- */
-
-/* Find first node suitable for an allocation */
-int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
-{
- struct mempolicy *pol = get_vma_policy(current, vma, addr);
-
- switch (pol->policy) {
- case MPOL_DEFAULT:
- return numa_node_id();
- case MPOL_BIND:
- return pol->v.zonelist->zones[0]->zone_pgdat->node_id;
- case MPOL_INTERLEAVE:
- return interleave_nodes(pol);
- case MPOL_PREFERRED:
- return pol->v.preferred_node >= 0 ?
- pol->v.preferred_node : numa_node_id();
- }
- BUG();
- return 0;
-}
-
-/* Find secondary valid nodes for an allocation */
-int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
-{
- struct mempolicy *pol = get_vma_policy(current, vma, addr);
-
- switch (pol->policy) {
- case MPOL_PREFERRED:
- case MPOL_DEFAULT:
- case MPOL_INTERLEAVE:
- return 1;
- case MPOL_BIND: {
- struct zone **z;
- for (z = pol->v.zonelist->zones; *z; z++)
- if ((*z)->zone_pgdat->node_id == nid)
- return 1;
- return 0;
- }
- default:
- BUG();
- return 0;
- }
-}
-
/*
* Shared memory backing store policy support.
*
{
do_set_mempolicy(MPOL_DEFAULT, NULL);
}
+
+/* Migrate a policy to a different set of nodes */
+static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
+ const nodemask_t *new)
+{
+ nodemask_t tmp;
+
+ if (!pol)
+ return;
+
+ switch (pol->policy) {
+ case MPOL_DEFAULT:
+ break;
+ case MPOL_INTERLEAVE:
+ nodes_remap(tmp, pol->v.nodes, *old, *new);
+ pol->v.nodes = tmp;
+ current->il_next = node_remap(current->il_next, *old, *new);
+ break;
+ case MPOL_PREFERRED:
+ pol->v.preferred_node = node_remap(pol->v.preferred_node,
+ *old, *new);
+ break;
+ case MPOL_BIND: {
+ nodemask_t nodes;
+ struct zone **z;
+ struct zonelist *zonelist;
+
+ nodes_clear(nodes);
+ for (z = pol->v.zonelist->zones; *z; z++)
+ node_set((*z)->zone_pgdat->node_id, nodes);
+ nodes_remap(tmp, nodes, *old, *new);
+ nodes = tmp;
+
+ zonelist = bind_zonelist(&nodes);
+
+ /* If no mem, then zonelist is NULL and we keep old zonelist.
+ * If that old zonelist has no remaining mems_allowed nodes,
+ * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT.
+ */
+
+ if (zonelist) {
+ /* Good - got mem - substitute new zonelist */
+ kfree(pol->v.zonelist);
+ pol->v.zonelist = zonelist;
+ }
+ break;
+ }
+ default:
+ BUG();
+ break;
+ }
+}
+
+/*
+ * Someone moved this task to different nodes. Fixup mempolicies.
+ *
+ * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
+ * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
+ */
+void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new)
+{
+ rebind_policy(current->mempolicy, old, new);
+}