static struct kmem_cache *policy_cache;
static struct kmem_cache *sn_cache;
-#define PDprintk(fmt...)
-
/* Highest zone. An specific allocation for a zone below that is not
policied. */
enum zone_type policy_zone = 0;
max++; /* space for zlcache_ptr (see mmzone.h) */
zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
if (!zl)
- return NULL;
+ return ERR_PTR(-ENOMEM);
zl->zlcache_ptr = NULL;
num = 0;
/* First put in the highest zones from all nodes, then all the next
lower zones etc. Avoid empty zones because the memory allocator
doesn't like them. If you implement node hot removal you
have to fix that. */
- k = policy_zone;
+ k = MAX_NR_ZONES - 1;
while (1) {
for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k];
break;
k--;
}
+ if (num == 0) {
+ kfree(zl);
+ return ERR_PTR(-EINVAL);
+ }
zl->zones[num] = NULL;
return zl;
}
{
struct mempolicy *policy;
- PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes_addr(*nodes)[0]);
+ pr_debug("setting mode %d nodes[0] %lx\n",
+ mode, nodes ? nodes_addr(*nodes)[0] : -1);
+
if (mode == MPOL_DEFAULT)
return NULL;
policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
break;
case MPOL_BIND:
policy->v.zonelist = bind_zonelist(nodes);
- if (policy->v.zonelist == NULL) {
+ if (IS_ERR(policy->v.zonelist)) {
+ void *error_code = policy->v.zonelist;
kmem_cache_free(policy_cache, policy);
- return ERR_PTR(-ENOMEM);
+ return error_code;
}
break;
}
return 0;
}
-/* Check if a vma is migratable */
-static inline int vma_migratable(struct vm_area_struct *vma)
-{
- if (vma->vm_flags & (
- VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
- return 0;
- return 1;
-}
-
/*
* Check if all pages in a range are on a set of nodes.
* If pagelist != NULL then isolate pages from the LRU and
int err = 0;
struct mempolicy *old = vma->vm_policy;
- PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
+ pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
vma->vm_start, vma->vm_end, vma->vm_pgoff,
vma->vm_ops, vma->vm_file,
vma->vm_ops ? vma->vm_ops->set_policy : NULL);
static struct page *new_node_page(struct page *page, unsigned long node, int **x)
{
- return alloc_pages_node(node, GFP_HIGHUSER, 0);
+ return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
}
/*
{
struct vm_area_struct *vma = (struct vm_area_struct *)private;
- return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma));
+ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+ page_address_in_vma(page, vma));
}
#else
if (!new)
flags |= MPOL_MF_DISCONTIG_OK;
- PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
- mode,nodes_addr(nodes)[0]);
+ pr_debug("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
+ mode, nmask ? nodes_addr(*nmask)[0] : -1);
down_write(&mm->mmap_sem);
vma = check_range(mm, start, end, nmask,
#ifdef CONFIG_HUGETLBFS
/* Return a zonelist suitable for a huge page allocation. */
-struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
+struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
+ gfp_t gfp_flags)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
unsigned nid;
nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
- return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
}
return zonelist_policy(GFP_HIGHUSER, pol);
}
}
rb_link_node(&new->nd, parent, p);
rb_insert_color(&new->nd, &sp->root);
- PDprintk("inserting %lx-%lx: %d\n", new->start, new->end,
+ pr_debug("inserting %lx-%lx: %d\n", new->start, new->end,
new->policy ? new->policy->policy : 0);
}
static void sp_delete(struct shared_policy *sp, struct sp_node *n)
{
- PDprintk("deleting %lx-l%x\n", n->start, n->end);
+ pr_debug("deleting %lx-l%lx\n", n->start, n->end);
rb_erase(&n->nd, &sp->root);
mpol_free(n->policy);
kmem_cache_free(sn_cache, n);
struct sp_node *new = NULL;
unsigned long sz = vma_pages(vma);
- PDprintk("set_shared_policy %lx sz %lu %d %lx\n",
+ pr_debug("set_shared_policy %lx sz %lu %d %lx\n",
vma->vm_pgoff,
sz, npol? npol->policy : -1,
- npol ? nodes_addr(npol->v.nodes)[0] : -1);
+ npol ? nodes_addr(npol->v.nodes)[0] : -1);
if (npol) {
new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol);
/* assumes fs == KERNEL_DS */
void __init numa_policy_init(void)
{
+ nodemask_t interleave_nodes;
+ unsigned long largest = 0;
+ int nid, prefer = 0;
+
policy_cache = kmem_cache_create("numa_policy",
sizeof(struct mempolicy),
- 0, SLAB_PANIC, NULL, NULL);
+ 0, SLAB_PANIC, NULL);
sn_cache = kmem_cache_create("shared_policy_node",
sizeof(struct sp_node),
- 0, SLAB_PANIC, NULL, NULL);
+ 0, SLAB_PANIC, NULL);
+
+ /*
+ * Set interleaving policy for system init. Interleaving is only
+ * enabled across suitably sized nodes (default is >= 16MB), or
+ * fall back to the largest node if they're all smaller.
+ */
+ nodes_clear(interleave_nodes);
+ for_each_online_node(nid) {
+ unsigned long total_pages = node_present_pages(nid);
+
+ /* Preserve the largest node */
+ if (largest < total_pages) {
+ largest = total_pages;
+ prefer = nid;
+ }
+
+ /* Interleave this node? */
+ if ((total_pages << PAGE_SHIFT) >= (16 << 20))
+ node_set(nid, interleave_nodes);
+ }
- /* Set interleaving policy for system init. This way not all
- the data structures allocated at system boot end up in node zero. */
+ /* All too small, use the largest */
+ if (unlikely(nodes_empty(interleave_nodes)))
+ node_set(prefer, interleave_nodes);
- if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map))
+ if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes))
printk("numa_policy_init: interleaving failed\n");
}
* then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT.
*/
- if (zonelist) {
+ if (!IS_ERR(zonelist)) {
/* Good - got mem - substitute new zonelist */
kfree(pol->v.zonelist);
pol->v.zonelist = zonelist;