- cpus: list of CPUs in that cpuset
- mems: list of Memory Nodes in that cpuset
+ - memory_migrate flag: if set, move pages to cpusets nodes
- cpu_exclusive flag: is cpu placement exclusive?
- mem_exclusive flag: is memory placement exclusive?
- tasks: list of tasks (by pid) attached to that cpuset
impacting the scheduler code in the kernel with a check for changes
in a tasks processor placement.
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'mems' subsequently changes.
+If the cpuset flag file 'memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the tasks new cpuset. Depending on the implementation,
+this migration may either be done by swapping the page out,
+so that the next time the page is referenced, it will be paged
+into the tasks new cpuset, usually on the node where it was
+referenced, or this migration may be done by directly copying
+the pages from the tasks previous cpuset to the new cpuset,
+where possible to the same node, relative to the new cpuset,
+as the node that held the page, relative to the old cpuset.
+Also if 'memory_migrate' is set true, then if that cpusets
+'mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'mems',
+will be moved to nodes in the new setting of 'mems.' Again,
+depending on the implementation, this might be done by swapping,
+or by direct copying. In either case, pages that were not in
+the tasks prior cpuset, or in the cpusets prior 'mems' setting,
+will not be moved.
+
There is an exception to the above. If hotplug functionality is used
to remove all the CPUs that are currently assigned to a cpuset,
then the kernel will automatically update the cpus_allowed of all
typedef enum {
CS_CPU_EXCLUSIVE,
CS_MEM_EXCLUSIVE,
+ CS_MEMORY_MIGRATE,
CS_REMOVED,
CS_NOTIFY_ON_RELEASE
} cpuset_flagbits_t;
return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
}
+static inline int is_memory_migrate(const struct cpuset *cs)
+{
+ return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+}
+
/*
* Increment this atomic integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
struct cpuset *cs;
nodemask_t oldmem = current->mems_allowed;
+ int migrate;
down(&callback_sem);
task_lock(current);
cs = current->cpuset;
+ migrate = is_memory_migrate(cs);
guarantee_online_mems(cs, ¤t->mems_allowed);
current->cpuset_mems_generation = cs->mems_generation;
task_unlock(current);
up(&callback_sem);
- if (!nodes_equal(oldmem, current->mems_allowed))
+ if (!nodes_equal(oldmem, current->mems_allowed)) {
numa_policy_rebind(&oldmem, ¤t->mems_allowed);
+ if (migrate) {
+ do_migrate_pages(current->mm, &oldmem,
+ ¤t->mems_allowed,
+ MPOL_MF_MOVE_ALL);
+ }
+ }
}
}
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- * CS_NOTIFY_ON_RELEASE)
+ * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
* cs: the cpuset to update
* buf: the buffer where we read the 0 or 1
*
struct task_struct *tsk;
struct cpuset *oldcs;
cpumask_t cpus;
+ nodemask_t from, to;
if (sscanf(pidbuf, "%d", &pid) != 1)
return -EIO;
guarantee_online_cpus(cs, &cpus);
set_cpus_allowed(tsk, cpus);
+ from = oldcs->mems_allowed;
+ to = cs->mems_allowed;
+
up(&callback_sem);
+ if (is_memory_migrate(cs))
+ do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
put_task_struct(tsk);
if (atomic_dec_and_test(&oldcs->count))
check_for_release(oldcs, ppathbuf);
typedef enum {
FILE_ROOT,
FILE_DIR,
+ FILE_MEMORY_MIGRATE,
FILE_CPULIST,
FILE_MEMLIST,
FILE_CPU_EXCLUSIVE,
case FILE_NOTIFY_ON_RELEASE:
retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
break;
+ case FILE_MEMORY_MIGRATE:
+ retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
+ break;
case FILE_TASKLIST:
retval = attach_task(cs, buffer, &pathbuf);
break;
case FILE_NOTIFY_ON_RELEASE:
*s++ = notify_on_release(cs) ? '1' : '0';
break;
+ case FILE_MEMORY_MIGRATE:
+ *s++ = is_memory_migrate(cs) ? '1' : '0';
+ break;
default:
retval = -EINVAL;
goto out;
.private = FILE_NOTIFY_ON_RELEASE,
};
+static struct cftype cft_memory_migrate = {
+ .name = "memory_migrate",
+ .private = FILE_MEMORY_MIGRATE,
+};
+
static int cpuset_populate_dir(struct dentry *cs_dentry)
{
int err;
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
return err;
+ if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+ return err;
if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
return err;
return 0;