return policy;
}
-static void gather_stats(struct page *, void *);
+static void gather_stats(struct page *, void *, int pte_dirty);
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
continue;
if (flags & MPOL_MF_STATS)
- gather_stats(page, private);
+ gather_stats(page, private, pte_dirty(*pte));
else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
migrate_page_add(page, private, flags);
else
int err;
struct vm_area_struct *first, *vma, *prev;
- /* Clear the LRU lists so pages can be isolated */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ /* Must have swap device for migration */
+ if (nr_swap_pages <= 0)
+ return ERR_PTR(-ENODEV);
+
+ /*
+ * Clear the LRU lists so pages can be isolated.
+ * Note that pages may be moved off the LRU after we have
+ * drained them. Those pages will fail to migrate like other
+ * pages that may be busy.
+ */
lru_add_drain_all();
+ }
first = find_vma(mm, start);
if (!first)
*/
if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
if (isolate_lru_page(page))
- list_add(&page->lru, pagelist);
+ list_add_tail(&page->lru, pagelist);
}
}
LIST_HEAD(moved);
LIST_HEAD(failed);
int err = 0;
+ unsigned long offset = 0;
int nr_pages;
struct page *page;
struct list_head *p;
redo:
nr_pages = 0;
list_for_each(p, pagelist) {
- if (vma)
- page = alloc_page_vma(GFP_HIGHUSER, vma, vma->vm_start);
+ if (vma) {
+ /*
+ * The address passed to alloc_page_vma is used to
+ * generate the proper interleave behavior. We fake
+ * the address here by an increasing offset in order
+ * to get the proper distribution of pages.
+ *
+ * No decision has been made as to which page
+ * a certain old page is moved to so we cannot
+ * specify the correct address.
+ */
+ page = alloc_page_vma(GFP_HIGHUSER, vma,
+ offset + vma->vm_start);
+ offset += PAGE_SIZE;
+ }
else
page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
err = -ENOMEM;
goto out;
}
- list_add(&page->lru, &newlist);
+ list_add_tail(&page->lru, &newlist);
nr_pages++;
- if (nr_pages > MIGRATE_CHUNK_SIZE);
+ if (nr_pages > MIGRATE_CHUNK_SIZE)
break;
}
err = migrate_pages(pagelist, &newlist, &moved, &failed);
MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
|| mode > MPOL_MAX)
return -EINVAL;
- if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
+ if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
return -EPERM;
if (start & ~PAGE_MASK)
nodes_clear(*nodes);
if (maxnode == 0 || !nmask)
return 0;
+ if (maxnode > PAGE_SIZE*BITS_PER_BYTE)
+ return -EINVAL;
nlongs = BITS_TO_LONGS(maxnode);
if ((maxnode % BITS_PER_LONG) == 0)
*/
if ((current->euid != task->suid) && (current->euid != task->uid) &&
(current->uid != task->suid) && (current->uid != task->uid) &&
- !capable(CAP_SYS_ADMIN)) {
+ !capable(CAP_SYS_NICE)) {
err = -EPERM;
goto out;
}
task_nodes = cpuset_mems_allowed(task);
/* Is the user allowed to access the target nodes? */
- if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) {
+ if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_NICE)) {
err = -EPERM;
goto out;
}
- err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
+ err = do_migrate_pages(mm, &old, &new,
+ capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
out:
mmput(mm);
return err;
struct numa_maps {
unsigned long pages;
unsigned long anon;
- unsigned long mapped;
+ unsigned long active;
+ unsigned long writeback;
unsigned long mapcount_max;
+ unsigned long dirty;
+ unsigned long swapcache;
unsigned long node[MAX_NUMNODES];
};
-static void gather_stats(struct page *page, void *private)
+static void gather_stats(struct page *page, void *private, int pte_dirty)
{
struct numa_maps *md = private;
int count = page_mapcount(page);
- if (count)
- md->mapped++;
+ md->pages++;
+ if (pte_dirty || PageDirty(page))
+ md->dirty++;
- if (count > md->mapcount_max)
- md->mapcount_max = count;
+ if (PageSwapCache(page))
+ md->swapcache++;
- md->pages++;
+ if (PageActive(page))
+ md->active++;
+
+ if (PageWriteback(page))
+ md->writeback++;
if (PageAnon(page))
md->anon++;
+ if (count > md->mapcount_max)
+ md->mapcount_max = count;
+
md->node[page_to_nid(page)]++;
cond_resched();
}
+#ifdef CONFIG_HUGETLB_PAGE
+static void check_huge_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct numa_maps *md)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = start; addr < end; addr += HPAGE_SIZE) {
+ pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+ pte_t pte;
+
+ if (!ptep)
+ continue;
+
+ pte = *ptep;
+ if (pte_none(pte))
+ continue;
+
+ page = pte_page(pte);
+ if (!page)
+ continue;
+
+ gather_stats(page, md, pte_dirty(*ptep));
+ }
+}
+#else
+static inline void check_huge_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct numa_maps *md)
+{
+}
+#endif
+
int show_numa_map(struct seq_file *m, void *v)
{
struct task_struct *task = m->private;
struct vm_area_struct *vma = v;
struct numa_maps *md;
+ struct file *file = vma->vm_file;
+ struct mm_struct *mm = vma->vm_mm;
int n;
char buffer[50];
- if (!vma->vm_mm)
+ if (!mm)
return 0;
md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
if (!md)
return 0;
- check_pgd_range(vma, vma->vm_start, vma->vm_end,
- &node_online_map, MPOL_MF_STATS, md);
+ mpol_to_str(buffer, sizeof(buffer),
+ get_vma_policy(task, vma, vma->vm_start));
+
+ seq_printf(m, "%08lx %s", vma->vm_start, buffer);
- if (md->pages) {
- mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(task, vma, vma->vm_start));
+ if (file) {
+ seq_printf(m, " file=");
+ seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
+ } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+ seq_printf(m, " heap");
+ } else if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ seq_printf(m, " stack");
+ }
+
+ if (is_vm_hugetlb_page(vma)) {
+ check_huge_range(vma, vma->vm_start, vma->vm_end, md);
+ seq_printf(m, " huge");
+ } else {
+ check_pgd_range(vma, vma->vm_start, vma->vm_end,
+ &node_online_map, MPOL_MF_STATS, md);
+ }
- seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
- vma->vm_start, buffer, md->pages,
- md->mapped, md->mapcount_max);
+ if (!md->pages)
+ goto out;
- if (md->anon)
- seq_printf(m," anon=%lu",md->anon);
+ if (md->anon)
+ seq_printf(m," anon=%lu",md->anon);
- for_each_online_node(n)
- if (md->node[n])
- seq_printf(m, " N%d=%lu", n, md->node[n]);
+ if (md->dirty)
+ seq_printf(m," dirty=%lu",md->dirty);
- seq_putc(m, '\n');
- }
+ if (md->pages != md->anon && md->pages != md->dirty)
+ seq_printf(m, " mapped=%lu", md->pages);
+
+ if (md->mapcount_max > 1)
+ seq_printf(m, " mapmax=%lu", md->mapcount_max);
+
+ if (md->swapcache)
+ seq_printf(m," swapcache=%lu", md->swapcache);
+
+ if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+ seq_printf(m," active=%lu", md->active);
+
+ if (md->writeback)
+ seq_printf(m," writeback=%lu", md->writeback);
+
+ for_each_online_node(n)
+ if (md->node[n])
+ seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+ seq_putc(m, '\n');
kfree(md);
if (m->count < m->size)