X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmempolicy.c;h=cf18f0942553554ae4977958dc06109335eb8abd;hb=cdb8355add9b1d87ecfcb58b12879897dc1e3e36;hp=8778f58880c41c55cab9a2de035e960379b0cfe4;hpb=d90125bfe958ed0451c6b98f831c86aba08b43d5;p=linux-2.6 diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8778f58880..cf18f09425 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -87,6 +87,8 @@ #include #include #include +#include +#include #include #include @@ -103,7 +105,7 @@ static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -int policy_zone = ZONE_DMA; +enum zone_type policy_zone = ZONE_DMA; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ @@ -135,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) static struct zonelist *bind_zonelist(nodemask_t *nodes) { struct zonelist *zl; - int num, max, nd, k; + int num, max, nd; + enum zone_type k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); @@ -146,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - for (k = policy_zone; k >= 0; k--) { + k = policy_zone; + while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; if (z->present_pages > 0) zl->zones[num++] = z; } + if (k == 0) + break; + k--; } zl->zones[num] = NULL; return zl; @@ -480,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) switch (p->policy) { case MPOL_BIND: for (i = 0; p->v.zonelist->zones[i]; i++) - node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, + node_set(zone_to_nid(p->v.zonelist->zones[i]), *nodes); break; case MPOL_DEFAULT: @@ -587,6 +594,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, isolate_lru_page(page, pagelist); } +static struct page *new_node_page(struct page *page, unsigned long node, int **x) +{ + return alloc_pages_node(node, GFP_HIGHUSER, 0); +} + /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. @@ -603,11 +615,9 @@ int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); - if (!list_empty(&pagelist)) { - err = migrate_pages_to(&pagelist, NULL, dest); - if (!list_empty(&pagelist)) - putback_lru_pages(&pagelist); - } + if (!list_empty(&pagelist)) + err = migrate_pages(&pagelist, new_node_page, dest); + return err; } @@ -627,6 +637,10 @@ int do_migrate_pages(struct mm_struct *mm, down_read(&mm->mmap_sem); + err = migrate_vmas(mm, from_nodes, to_nodes, flags); + if (err) + goto out; + /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' @@ -686,7 +700,7 @@ int do_migrate_pages(struct mm_struct *mm, if (err < 0) break; } - +out: up_read(&mm->mmap_sem); if (err < 0) return err; @@ -694,6 +708,12 @@ int do_migrate_pages(struct mm_struct *mm, } +static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +{ + struct vm_area_struct *vma = (struct vm_area_struct *)private; + + return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma)); +} #else static void migrate_page_add(struct page *page, struct list_head *pagelist, @@ -706,6 +726,11 @@ int do_migrate_pages(struct mm_struct *mm, { return -ENOSYS; } + +static struct page *new_vma_page(struct page *page, unsigned long private) +{ + return NULL; +} #endif long do_mbind(unsigned long start, unsigned long len, @@ -767,15 +792,13 @@ long do_mbind(unsigned long start, unsigned long len, err = mbind_range(vma, start, end, new); if (!list_empty(&pagelist)) - nr_failed = migrate_pages_to(&pagelist, vma, -1); + nr_failed = migrate_pages(&pagelist, new_vma_page, + (unsigned long)vma); if (!err && nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } - if (!list_empty(&pagelist)) - putback_lru_pages(&pagelist); - up_write(&mm->mmap_sem); mpol_free(new); return err; @@ -929,6 +952,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, goto out; } + err = security_task_movememory(task); + if (err) + goto out; + err = do_migrate_pages(mm, &old, &new, capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); out: @@ -1109,7 +1136,9 @@ static unsigned interleave_nodes(struct mempolicy *policy) */ unsigned slab_node(struct mempolicy *policy) { - switch (policy->policy) { + int pol = policy ? policy->policy : MPOL_DEFAULT; + + switch (pol) { case MPOL_INTERLEAVE: return interleave_nodes(policy); @@ -1118,7 +1147,7 @@ unsigned slab_node(struct mempolicy *policy) * Follow bind policy behavior and start allocation at the * first node. */ - return policy->v.zonelist->zones[0]->zone_pgdat->node_id; + return zone_to_nid(policy->v.zonelist->zones[0]); case MPOL_PREFERRED: if (policy->v.preferred_node >= 0) @@ -1154,7 +1183,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol, if (vma) { unsigned long off; - off = vma->vm_pgoff; + /* + * for small pages, there is no difference between + * shift and PAGE_SHIFT, so the bit-shift is safe. + * for huge pages, since vm_pgoff is in units of small + * pages, we need to shift off the always 0 bits to get + * a useful offset. + */ + BUG_ON(shift < PAGE_SHIFT); + off = vma->vm_pgoff >> (shift - PAGE_SHIFT); off += (addr - vma->vm_start) >> shift; return offset_il_node(pol, vma, off); } else @@ -1187,10 +1224,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); page = __alloc_pages(gfp, order, zl); - if (page && page_zone(page) == zl->zones[0]) { - zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; - put_cpu(); - } + if (page && page_zone(page) == zl->zones[0]) + inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); return page; } @@ -1257,7 +1292,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_task_memory_state(); - if (!pol || in_interrupt()) + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); @@ -1616,7 +1651,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) nodes_clear(nodes); for (z = pol->v.zonelist->zones; *z; z++) - node_set((*z)->zone_pgdat->node_id, nodes); + node_set(zone_to_nid(*z), nodes); nodes_remap(tmp, nodes, *mpolmask, *newmask); nodes = tmp; @@ -1799,7 +1834,7 @@ static inline void check_huge_range(struct vm_area_struct *vma, int show_numa_map(struct seq_file *m, void *v) { - struct task_struct *task = m->private; + struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; struct numa_maps *md; struct file *file = vma->vm_file; @@ -1815,7 +1850,7 @@ int show_numa_map(struct seq_file *m, void *v) return 0; mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(task, vma, vma->vm_start)); + get_vma_policy(priv->task, vma, vma->vm_start)); seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1869,7 +1904,7 @@ out: kfree(md); if (m->count < m->size) - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0; }