[PATCH] cpusets: automatic numa mempolicy rebinding

author Paul Jackson <pj@sgi.com>

Sun, 30 Oct 2005 23:02:36 +0000 (15:02 -0800)

committer Linus Torvalds <torvalds@g5.osdl.org>

Mon, 31 Oct 2005 01:37:22 +0000 (17:37 -0800)
author Paul Jackson <pj@sgi.com>
Sun, 30 Oct 2005 23:02:36 +0000 (15:02 -0800)
committer Linus Torvalds <torvalds@g5.osdl.org>
Mon, 31 Oct 2005 01:37:22 +0000 (17:37 -0800)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h

index 7af8cb836e78df84afa4b1eb36dd3bb3cbe166b9..8b67cf837ca97fd3e819c0fba62e7ab9608eed6b 100644 (file)
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -154,6 +154,7 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
  
  extern void numa_default_policy(void);
  extern void numa_policy_init(void);
+extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new);
  extern struct mempolicy default_policy;
  
  #else
@@ -226,6 +227,11 @@ static inline void numa_default_policy(void)
  {
  }
  
+static inline void numa_policy_rebind(const nodemask_t *old,
+                                       const nodemask_t *new)
+{
+}
+
  #endif /* CONFIG_NUMA */
  #endif /* __KERNEL__ */
  
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 6633f3fb641751fb3d37de5c2e66dbcc3ae7d7e6..5a737ed9dac79d8cf2fe184e6a79574680afa01b 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -32,6 +32,7 @@
  #include <linux/kernel.h>
  #include <linux/kmod.h>
  #include <linux/list.h>
+#include <linux/mempolicy.h>
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/mount.h>
@@ -600,6 +601,7 @@ static void refresh_mems(void)
  
         if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
                 struct cpuset *cs;
+               nodemask_t oldmem = current->mems_allowed;
  
                 down(&callback_sem);
                 task_lock(current);
@@ -608,6 +610,8 @@ static void refresh_mems(void)
                 current->cpuset_mems_generation = cs->mems_generation;
                 task_unlock(current);
                 up(&callback_sem);
+               if (!nodes_equal(oldmem, current->mems_allowed))
+                       numa_policy_rebind(&oldmem, &current->mems_allowed);
         }
  }
  
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 2076b1542b8ac9ce32beba96e051bd70a62ebf96..5abc57c2b8bdd3804708267832fd20d29a65be71 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -457,6 +457,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
         struct vm_area_struct *vma = NULL;
         struct mempolicy *pol = current->mempolicy;
  
+       cpuset_update_current_mems_allowed();
         if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
                 return -EINVAL;
         if (flags & MPOL_F_ADDR) {
@@ -1206,3 +1207,66 @@ void numa_default_policy(void)
  {
         do_set_mempolicy(MPOL_DEFAULT, NULL);
  }
+
+/* Migrate a policy to a different set of nodes */
+static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
+                                                       const nodemask_t *new)
+{
+       nodemask_t tmp;
+
+       if (!pol)
+               return;
+
+       switch (pol->policy) {
+       case MPOL_DEFAULT:
+               break;
+       case MPOL_INTERLEAVE:
+               nodes_remap(tmp, pol->v.nodes, *old, *new);
+               pol->v.nodes = tmp;
+               current->il_next = node_remap(current->il_next, *old, *new);
+               break;
+       case MPOL_PREFERRED:
+               pol->v.preferred_node = node_remap(pol->v.preferred_node,
+                                                               *old, *new);
+               break;
+       case MPOL_BIND: {
+               nodemask_t nodes;
+               struct zone **z;
+               struct zonelist *zonelist;
+
+               nodes_clear(nodes);
+               for (z = pol->v.zonelist->zones; *z; z++)
+                       node_set((*z)->zone_pgdat->node_id, nodes);
+               nodes_remap(tmp, nodes, *old, *new);
+               nodes = tmp;
+
+               zonelist = bind_zonelist(&nodes);
+
+               /* If no mem, then zonelist is NULL and we keep old zonelist.
+                * If that old zonelist has no remaining mems_allowed nodes,
+                * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT.
+                */
+
+               if (zonelist) {
+                       /* Good - got mem - substitute new zonelist */
+                       kfree(pol->v.zonelist);
+                       pol->v.zonelist = zonelist;
+               }
+               break;
+       }
+       default:
+               BUG();
+               break;
+       }
+}
+
+/*
+ * Someone moved this task to different nodes.  Fixup mempolicies.
+ *
+ * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
+ * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
+ */
+void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new)
+{
+       rebind_policy(current->mempolicy, old, new);
+}
author	Paul Jackson <pj@sgi.com>
	Sun, 30 Oct 2005 23:02:36 +0000 (15:02 -0800)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Mon, 31 Oct 2005 01:37:22 +0000 (17:37 -0800)
include/linux/mempolicy.h		patch \| blob \| history
kernel/cpuset.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history