]> err.no Git - linux-2.6/blobdiff - mm/mempolicy.c
[PATCH] Add NUMA policy support for huge pages.
[linux-2.6] / mm / mempolicy.c
index 5abc57c2b8bdd3804708267832fd20d29a65be71..45c51ac63443adc3cf5cb9feefbd17f9be4a2b9c 100644 (file)
@@ -161,6 +161,10 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
        switch (mode) {
        case MPOL_INTERLEAVE:
                policy->v.nodes = *nodes;
+               if (nodes_weight(*nodes) == 0) {
+                       kmem_cache_free(policy_cache, policy);
+                       return ERR_PTR(-EINVAL);
+               }
                break;
        case MPOL_PREFERRED:
                policy->v.preferred_node = first_node(*nodes);
@@ -189,17 +193,15 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
        orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        do {
-               unsigned long pfn;
+               struct page *page;
                unsigned int nid;
 
                if (!pte_present(*pte))
                        continue;
-               pfn = pte_pfn(*pte);
-               if (!pfn_valid(pfn)) {
-                       print_bad_pte(vma, *pte, addr);
+               page = vm_normal_page(vma, addr, *pte);
+               if (!page)
                        continue;
-               }
-               nid = pfn_to_nid(pfn);
+               nid = page_to_nid(page);
                if (!node_isset(nid, *nodes))
                        break;
        } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -269,8 +271,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
        first = find_vma(mm, start);
        if (!first)
                return ERR_PTR(-EFAULT);
-       if (first->vm_flags & VM_RESERVED)
-               return ERR_PTR(-EACCES);
        prev = NULL;
        for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
                if (!vma->vm_next && vma->vm_end < end)
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol,
        return nid;
 }
 
+/* Determine a node number for interleave */
+static inline unsigned interleave_nid(struct mempolicy *pol,
+                struct vm_area_struct *vma, unsigned long addr, int shift)
+{
+       if (vma) {
+               unsigned long off;
+
+               off = vma->vm_pgoff;
+               off += (addr - vma->vm_start) >> shift;
+               return offset_il_node(pol, vma, off);
+       } else
+               return interleave_nodes(pol);
+}
+
+/* Return a zonelist suitable for a huge page allocation. */
+struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
+{
+       struct mempolicy *pol = get_vma_policy(current, vma, addr);
+
+       if (pol->policy == MPOL_INTERLEAVE) {
+               unsigned nid;
+
+               nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+               return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+       }
+       return zonelist_policy(GFP_HIGHUSER, pol);
+}
+
 /* Allocate a page in interleaved policy.
    Own path because it needs to do special accounting. */
 static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 
        if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
                unsigned nid;
-               if (vma) {
-                       unsigned long off;
-                       off = vma->vm_pgoff;
-                       off += (addr - vma->vm_start) >> PAGE_SHIFT;
-                       nid = offset_il_node(pol, vma, off);
-               } else {
-                       /* fall back to process interleaving */
-                       nid = interleave_nodes(pol);
-               }
+
+               nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
                return alloc_page_interleave(gfp, 0, nid);
        }
        return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));