X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmempolicy.c;h=71b84b45154afc59efa6e0ffd3f088603693417a;hb=75ecb1a4d148b274aa9acd1d6ccaca0a4654784e;hp=259a706bd83ec5d3392b37dcf093b2c3ec953145;hpb=9cdd79c9b99873d600d397fda012fc3f57cc2776;p=linux-2.6 diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 259a706bd8..71b84b4515 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -101,8 +101,6 @@ static struct kmem_cache *policy_cache; static struct kmem_cache *sn_cache; -#define PDprintk(fmt...) - /* Highest zone. An specific allocation for a zone below that is not policied. */ enum zone_type policy_zone = 0; @@ -144,7 +142,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) max++; /* space for zlcache_ptr (see mmzone.h) */ zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); if (!zl) - return NULL; + return ERR_PTR(-ENOMEM); zl->zlcache_ptr = NULL; num = 0; /* First put in the highest zones from all nodes, then all the next @@ -162,6 +160,10 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) break; k--; } + if (num == 0) { + kfree(zl); + return ERR_PTR(-EINVAL); + } zl->zones[num] = NULL; return zl; } @@ -171,7 +173,9 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) { struct mempolicy *policy; - PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes_addr(*nodes)[0]); + pr_debug("setting mode %d nodes[0] %lx\n", + mode, nodes ? nodes_addr(*nodes)[0] : -1); + if (mode == MPOL_DEFAULT) return NULL; policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); @@ -193,9 +197,10 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) break; case MPOL_BIND: policy->v.zonelist = bind_zonelist(nodes); - if (policy->v.zonelist == NULL) { + if (IS_ERR(policy->v.zonelist)) { + void *error_code = policy->v.zonelist; kmem_cache_free(policy_cache, policy); - return ERR_PTR(-ENOMEM); + return error_code; } break; } @@ -316,15 +321,6 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } -/* Check if a vma is migratable */ -static inline int vma_migratable(struct vm_area_struct *vma) -{ - if (vma->vm_flags & ( - VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) - return 0; - return 1; -} - /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and @@ -383,7 +379,7 @@ static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) int err = 0; struct mempolicy *old = vma->vm_policy; - PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_ops, vma->vm_file, vma->vm_ops ? vma->vm_ops->set_policy : NULL); @@ -598,7 +594,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, static struct page *new_node_page(struct page *page, unsigned long node, int **x) { - return alloc_pages_node(node, GFP_HIGHUSER, 0); + return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0); } /* @@ -714,7 +710,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * { struct vm_area_struct *vma = (struct vm_area_struct *)private; - return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma)); + return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, + page_address_in_vma(page, vma)); } #else @@ -780,8 +777,8 @@ long do_mbind(unsigned long start, unsigned long len, if (!new) flags |= MPOL_MF_DISCONTIG_OK; - PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, - mode,nodes_addr(nodes)[0]); + pr_debug("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, + mode, nmask ? nodes_addr(*nmask)[0] : -1); down_write(&mm->mmap_sem); vma = check_range(mm, start, end, nmask, @@ -1206,7 +1203,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol, #ifdef CONFIG_HUGETLBFS /* Return a zonelist suitable for a huge page allocation. */ -struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) +struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, + gfp_t gfp_flags) { struct mempolicy *pol = get_vma_policy(current, vma, addr); @@ -1214,7 +1212,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) unsigned nid; nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); - return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); + return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); } return zonelist_policy(GFP_HIGHUSER, pol); } @@ -1438,7 +1436,7 @@ static void sp_insert(struct shared_policy *sp, struct sp_node *new) } rb_link_node(&new->nd, parent, p); rb_insert_color(&new->nd, &sp->root); - PDprintk("inserting %lx-%lx: %d\n", new->start, new->end, + pr_debug("inserting %lx-%lx: %d\n", new->start, new->end, new->policy ? new->policy->policy : 0); } @@ -1463,7 +1461,7 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) static void sp_delete(struct shared_policy *sp, struct sp_node *n) { - PDprintk("deleting %lx-l%x\n", n->start, n->end); + pr_debug("deleting %lx-l%lx\n", n->start, n->end); rb_erase(&n->nd, &sp->root); mpol_free(n->policy); kmem_cache_free(sn_cache, n); @@ -1562,10 +1560,10 @@ int mpol_set_shared_policy(struct shared_policy *info, struct sp_node *new = NULL; unsigned long sz = vma_pages(vma); - PDprintk("set_shared_policy %lx sz %lu %d %lx\n", + pr_debug("set_shared_policy %lx sz %lu %d %lx\n", vma->vm_pgoff, sz, npol? npol->policy : -1, - npol ? nodes_addr(npol->v.nodes)[0] : -1); + npol ? nodes_addr(npol->v.nodes)[0] : -1); if (npol) { new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol); @@ -1601,18 +1599,43 @@ void mpol_free_shared_policy(struct shared_policy *p) /* assumes fs == KERNEL_DS */ void __init numa_policy_init(void) { + nodemask_t interleave_nodes; + unsigned long largest = 0; + int nid, prefer = 0; + policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); + + /* + * Set interleaving policy for system init. Interleaving is only + * enabled across suitably sized nodes (default is >= 16MB), or + * fall back to the largest node if they're all smaller. + */ + nodes_clear(interleave_nodes); + for_each_online_node(nid) { + unsigned long total_pages = node_present_pages(nid); + + /* Preserve the largest node */ + if (largest < total_pages) { + largest = total_pages; + prefer = nid; + } + + /* Interleave this node? */ + if ((total_pages << PAGE_SHIFT) >= (16 << 20)) + node_set(nid, interleave_nodes); + } - /* Set interleaving policy for system init. This way not all - the data structures allocated at system boot end up in node zero. */ + /* All too small, use the largest */ + if (unlikely(nodes_empty(interleave_nodes))) + node_set(prefer, interleave_nodes); - if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map)) + if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes)) printk("numa_policy_init: interleaving failed\n"); } @@ -1667,7 +1690,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT. */ - if (zonelist) { + if (!IS_ERR(zonelist)) { /* Good - got mem - substitute new zonelist */ kfree(pol->v.zonelist); pol->v.zonelist = zonelist;