Make vm statistics update interval configurable

[linux-2.6] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 00f98b9f6df1e46eef8a73605bc39205b69a0f47..e50908b2bfac8d33948fdb70422f98030e74f5d9 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,8 +116,7 @@
  #include       <asm/page.h>
  
  /*
- * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- *               SLAB_RED_ZONE & SLAB_POISON.
+ * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
   *               0 for faster, smaller code (especially in the critical paths).
   *
   * STATS       - 1 to collect stats for /proc/slabinfo.
@@ -149,10 +148,11 @@
   * Usually, the kmalloc caches are cache_line_size() aligned, except when
   * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
   * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than BYTES_PER_WORD. ARCH_KMALLOC_MINALIGN allows that.
- * Note that this flag disables some debug features.
+ * alignment larger than the alignment of a 64-bit integer.
+ * ARCH_KMALLOC_MINALIGN allows that.
+ * Note that increasing this value may disable some debug features.
   */
-#define ARCH_KMALLOC_MINALIGN 0
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
  #endif
  
  #ifndef ARCH_SLAB_MINALIGN
@@ -172,15 +172,15 @@
  
  /* Legal flag mask for kmem_cache_create(). */
  #if DEBUG
-# define CREATE_MASK   (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+# define CREATE_MASK   (SLAB_RED_ZONE | \
                          SLAB_POISON | SLAB_HWCACHE_ALIGN | \
                          SLAB_CACHE_DMA | \
-                        SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+                        SLAB_STORE_USER | \
                          SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
                          SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
  #else
  # define CREATE_MASK   (SLAB_HWCACHE_ALIGN | \
-                        SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+                        SLAB_CACHE_DMA | \
                          SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
                          SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
  #endif
@@ -389,7 +389,6 @@ struct kmem_cache {
         unsigned int buffer_size;
         u32 reciprocal_buffer_size;
  /* 3) touched by every alloc & free from the backend */
-       struct kmem_list3 *nodelists[MAX_NUMNODES];
  
         unsigned int flags;             /* constant flags */
         unsigned int num;               /* # of objs per slab */
@@ -444,6 +443,17 @@ struct kmem_cache {
         int obj_offset;
         int obj_size;
  #endif
+       /*
+        * We put nodelists[] at the end of kmem_cache, because we want to size
+        * this array to nr_node_ids slots instead of MAX_NUMNODES
+        * (see kmem_cache_init())
+        * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+        * is statically defined, so we reserve the max number of nodes.
+        */
+       struct kmem_list3 *nodelists[MAX_NUMNODES];
+       /*
+        * Do not add fields after nodelists[]
+        */
  };
  
  #define CFLGS_OFF_SLAB         (0x80000000UL)
@@ -527,19 +537,22 @@ static int obj_size(struct kmem_cache *cachep)
         return cachep->obj_size;
  }
  
-static unsigned long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
  {
         BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
-       return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD);
+       return (unsigned long long*) (objp + obj_offset(cachep) -
+                                     sizeof(unsigned long long));
  }
  
-static unsigned long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
  {
         BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
         if (cachep->flags & SLAB_STORE_USER)
-               return (unsigned long *)(objp + cachep->buffer_size -
-                                        2 * BYTES_PER_WORD);
-       return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD);
+               return (unsigned long long *)(objp + cachep->buffer_size -
+                                             sizeof(unsigned long long) -
+                                             BYTES_PER_WORD);
+       return (unsigned long long *) (objp + cachep->buffer_size -
+                                      sizeof(unsigned long long));
  }
  
  static void **dbg_userword(struct kmem_cache *cachep, void *objp)
@@ -552,8 +565,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
  
  #define obj_offset(x)                  0
  #define obj_size(cachep)               (cachep->buffer_size)
-#define dbg_redzone1(cachep, objp)     ({BUG(); (unsigned long *)NULL;})
-#define dbg_redzone2(cachep, objp)     ({BUG(); (unsigned long *)NULL;})
+#define dbg_redzone1(cachep, objp)     ({BUG(); (unsigned long long *)NULL;})
+#define dbg_redzone2(cachep, objp)     ({BUG(); (unsigned long long *)NULL;})
  #define dbg_userword(cachep, objp)     ({BUG(); (void **)NULL;})
  
  #endif
@@ -592,8 +605,7 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
  
  static inline struct kmem_cache *page_get_cache(struct page *page)
  {
-       if (unlikely(PageCompound(page)))
-               page = (struct page *)page_private(page);
+       page = compound_head(page);
         BUG_ON(!PageSlab(page));
         return (struct kmem_cache *)page->lru.next;
  }
@@ -605,21 +617,19 @@ static inline void page_set_slab(struct page *page, struct slab *slab)
  
  static inline struct slab *page_get_slab(struct page *page)
  {
-       if (unlikely(PageCompound(page)))
-               page = (struct page *)page_private(page);
         BUG_ON(!PageSlab(page));
         return (struct slab *)page->lru.prev;
  }
  
  static inline struct kmem_cache *virt_to_cache(const void *obj)
  {
-       struct page *page = virt_to_page(obj);
+       struct page *page = virt_to_head_page(obj);
         return page_get_cache(page);
  }
  
  static inline struct slab *virt_to_slab(const void *obj)
  {
-       struct page *page = virt_to_page(obj);
+       struct page *page = virt_to_head_page(obj);
         return page_get_slab(page);
  }
  
@@ -678,9 +688,6 @@ static struct kmem_cache cache_cache = {
         .shared = 1,
         .buffer_size = sizeof(struct kmem_cache),
         .name = "kmem_cache",
-#if DEBUG
-       .obj_size = sizeof(struct kmem_cache),
-#endif
  };
  
  #define BAD_ALIEN_MAGIC 0x01020304ul
@@ -1179,8 +1186,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
         int memsize = sizeof(struct kmem_list3);
  
         switch (action) {
-       case CPU_UP_PREPARE:
+       case CPU_LOCK_ACQUIRE:
                 mutex_lock(&cache_chain_mutex);
+               break;
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
                 /*
                  * We need to do this right in the beginning since
                  * alloc_arraycache's are going to use this list.
@@ -1267,17 +1277,28 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
                 }
                 break;
         case CPU_ONLINE:
-               mutex_unlock(&cache_chain_mutex);
+       case CPU_ONLINE_FROZEN:
                 start_cpu_timer(cpu);
                 break;
  #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DOWN_PREPARE:
-               mutex_lock(&cache_chain_mutex);
-               break;
-       case CPU_DOWN_FAILED:
-               mutex_unlock(&cache_chain_mutex);
-               break;
+       case CPU_DOWN_PREPARE:
+       case CPU_DOWN_PREPARE_FROZEN:
+               /*
+                * Shutdown cache reaper. Note that the cache_chain_mutex is
+                * held so that if cache_reap() is invoked it cannot do
+                * anything expensive but will only modify reap_work
+                * and reschedule the timer.
+               */
+               cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
+               /* Now the cache_reaper is guaranteed to be not running. */
+               per_cpu(reap_work, cpu).work.func = NULL;
+               break;
+       case CPU_DOWN_FAILED:
+       case CPU_DOWN_FAILED_FROZEN:
+               start_cpu_timer(cpu);
+               break;
         case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
                 /*
                  * Even if all the cpus of a node are down, we don't free the
                  * kmem_list3 of any cache. This to avoid a race between
@@ -1289,6 +1310,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
                 /* fall thru */
  #endif
         case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
                 list_for_each_entry(cachep, &cache_chain, next) {
                         struct array_cache *nc;
                         struct array_cache *shared;
@@ -1347,6 +1369,8 @@ free_array_cache:
                                 continue;
                         drain_freelist(cachep, l3, l3->free_objects);
                 }
+               break;
+       case CPU_LOCK_RELEASE:
                 mutex_unlock(&cache_chain_mutex);
                 break;
         }
@@ -1440,6 +1464,15 @@ void __init kmem_cache_init(void)
         cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
         cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
  
+       /*
+        * struct kmem_cache size depends on nr_node_ids, which
+        * can be less than MAX_NUMNODES.
+        */
+       cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
+                                nr_node_ids * sizeof(struct kmem_list3 *);
+#if DEBUG
+       cache_cache.obj_size = cache_cache.buffer_size;
+#endif
         cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
                                         cache_line_size());
         cache_cache.reciprocal_buffer_size =
@@ -1764,7 +1797,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
         char *realobj;
  
         if (cachep->flags & SLAB_RED_ZONE) {
-               printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
+               printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
                         *dbg_redzone1(cachep, objp),
                         *dbg_redzone2(cachep, objp));
         }
@@ -1933,7 +1966,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
   * For setting up all the kmem_list3s for cache whose buffer_size is same as
   * size of kmem_list3.
   */
-static void set_up_list3s(struct kmem_cache *cachep, int index)
+static void __init set_up_list3s(struct kmem_cache *cachep, int index)
  {
         int node;
  
@@ -2155,13 +2188,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                  */
                 res = probe_kernel_address(pc->name, tmp);
                 if (res) {
-                       printk("SLAB: cache with size %d has lost its name\n",
+                       printk(KERN_ERR
+                              "SLAB: cache with size %d has lost its name\n",
                                pc->buffer_size);
                         continue;
                 }
  
                 if (!strcmp(pc->name, name)) {
-                       printk("kmem_cache_create: duplicate cache %s\n", name);
+                       printk(KERN_ERR
+                              "kmem_cache_create: duplicate cache %s\n", name);
                         dump_stack();
                         goto oops;
                 }
@@ -2169,12 +2204,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
  
  #if DEBUG
         WARN_ON(strchr(name, ' '));     /* It confuses parsers */
-       if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
-               /* No constructor, but inital state check requested */
-               printk(KERN_ERR "%s: No con, but init state check "
-                      "requested - %s\n", __FUNCTION__, name);
-               flags &= ~SLAB_DEBUG_INITIAL;
-       }
  #if FORCED_DEBUG
         /*
          * Enable redzoning and last user accounting, except for caches with
@@ -2231,7 +2260,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
          * is greater than BYTES_PER_WORD.
          */
         if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
-               ralign = BYTES_PER_WORD;
+               ralign = __alignof__(unsigned long long);
  
         /* 2) arch mandated alignment */
         if (ralign < ARCH_SLAB_MINALIGN) {
@@ -2242,7 +2271,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 ralign = align;
         }
         /* disable debug if necessary */
-       if (ralign > BYTES_PER_WORD)
+       if (ralign > __alignof__(unsigned long long))
                 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
         /*
          * 4) Store it.
@@ -2263,8 +2292,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
          */
         if (flags & SLAB_RED_ZONE) {
                 /* add space for red zone words */
-               cachep->obj_offset += BYTES_PER_WORD;
-               size += 2 * BYTES_PER_WORD;
+               cachep->obj_offset += sizeof(unsigned long long);
+               size += 2 * sizeof(unsigned long long);
         }
         if (flags & SLAB_STORE_USER) {
                 /* user store requires one word storage behind the end of
@@ -2298,7 +2327,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         left_over = calculate_slab_order(cachep, size, align, flags);
  
         if (!cachep->num) {
-               printk("kmem_cache_create: couldn't create cache %s.\n", name);
+               printk(KERN_ERR
+                      "kmem_cache_create: couldn't create cache %s.\n", name);
                 kmem_cache_free(&cache_cache, cachep);
                 cachep = NULL;
                 goto oops;
@@ -2737,19 +2767,10 @@ static int cache_grow(struct kmem_cache *cachep,
          * Be lazy and only check for valid flags here,  keeping it out of the
          * critical path in kmem_cache_alloc().
          */
-       BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW));
-       if (flags & __GFP_NO_GROW)
-               return 0;
+       BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
  
         ctor_flags = SLAB_CTOR_CONSTRUCTOR;
         local_flags = (flags & GFP_LEVEL_MASK);
-       if (!(local_flags & __GFP_WAIT))
-               /*
-                * Not allowed to sleep.  Need to tell a constructor about
-                * this - it might need to know...
-                */
-               ctor_flags |= SLAB_CTOR_ATOMIC;
-
         /* Take the l3 list lock to change the colour_next on this node */
         check_irq_off();
         l3 = cachep->nodelists[nodeid];
@@ -2833,7 +2854,7 @@ static void kfree_debugcheck(const void *objp)
  
  static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
  {
-       unsigned long redzone1, redzone2;
+       unsigned long long redzone1, redzone2;
  
         redzone1 = *dbg_redzone1(cache, obj);
         redzone2 = *dbg_redzone2(cache, obj);
@@ -2849,7 +2870,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
         else
                 slab_error(cache, "memory outside object was overwritten");
  
-       printk(KERN_ERR "%p: redzone 1:0x%lx, redzone 2:0x%lx.\n",
+       printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
                         obj, redzone1, redzone2);
  }
  
@@ -2862,7 +2883,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
  
         objp -= obj_offset(cachep);
         kfree_debugcheck(objp);
-       page = virt_to_page(objp);
+       page = virt_to_head_page(objp);
  
         slabp = page_get_slab(page);
  
@@ -2879,15 +2900,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
         BUG_ON(objnr >= cachep->num);
         BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
  
-       if (cachep->flags & SLAB_DEBUG_INITIAL) {
-               /*
-                * Need to call the slab's constructor so the caller can
-                * perform a verify of its state (debugging).  Called without
-                * the cache-lock held.
-                */
-               cachep->ctor(objp + obj_offset(cachep),
-                            cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
-       }
         if (cachep->flags & SLAB_POISON && cachep->dtor) {
                 /* we want to cache poison the object,
                  * call the destruction callback
@@ -3074,7 +3086,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                         slab_error(cachep, "double free, or memory outside"
                                                 " object was overwritten");
                         printk(KERN_ERR
-                               "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
+                               "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
                                 objp, *dbg_redzone1(cachep, objp),
                                 *dbg_redzone2(cachep, objp));
                 }
@@ -3086,20 +3098,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 struct slab *slabp;
                 unsigned objnr;
  
-               slabp = page_get_slab(virt_to_page(objp));
+               slabp = page_get_slab(virt_to_head_page(objp));
                 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
                 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
         }
  #endif
         objp += obj_offset(cachep);
-       if (cachep->ctor && cachep->flags & SLAB_POISON) {
-               unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
-
-               if (!(flags & __GFP_WAIT))
-                       ctor_flags |= SLAB_CTOR_ATOMIC;
-
-               cachep->ctor(objp, cachep, ctor_flags);
-       }
+       if (cachep->ctor && cachep->flags & SLAB_POISON)
+               cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR);
  #if ARCH_SLAB_MINALIGN
         if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
                 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
@@ -3154,7 +3160,7 @@ static int __init failslab_debugfs(void)
         struct dentry *dir;
         int err;
  
-               err = init_fault_attr_dentries(&failslab.attr, "failslab");
+       err = init_fault_attr_dentries(&failslab.attr, "failslab");
         if (err)
                 return err;
         dir = failslab.attr.dentries.dir;
@@ -3192,9 +3198,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
  
         check_irq_off();
  
-       if (should_failslab(cachep, flags))
-               return NULL;
-
         ac = cpu_cache_get(cachep);
         if (likely(ac->avail)) {
                 STATS_INC_ALLOCHIT(cachep);
@@ -3268,7 +3271,7 @@ retry:
                                         flags | GFP_THISNODE, nid);
         }
  
-       if (!obj && !(flags & __GFP_NO_GROW)) {
+       if (!obj) {
                 /*
                  * This allocation will be performed within the constraints
                  * of the current cpuset / memory policy requirements.
@@ -3386,6 +3389,9 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         unsigned long save_flags;
         void *ptr;
  
+       if (should_failslab(cachep, flags))
+               return NULL;
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
  
@@ -3456,6 +3462,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
         unsigned long save_flags;
         void *objp;
  
+       if (should_failslab(cachep, flags))
+               return NULL;
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
         objp = __do_cache_alloc(cachep, flags);
@@ -3750,7 +3759,6 @@ EXPORT_SYMBOL(__kmalloc);
  
  /**
   * krealloc - reallocate memory. The contents will remain unchanged.
- *
   * @p: object to reallocate memory for.
   * @new_size: how many bytes of memory are required.
   * @flags: the type of memory to allocate.
@@ -4148,7 +4156,6 @@ next:
         check_irq_on();
         mutex_unlock(&cache_chain_mutex);
         next_reap_node();
-       refresh_cpu_vm_stats(smp_processor_id());
  out:
         /* Set up the next iteration */
         schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
@@ -4440,16 +4447,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
  static void show_symbol(struct seq_file *m, unsigned long address)
  {
  #ifdef CONFIG_KALLSYMS
-       char *modname;
-       const char *name;
         unsigned long offset, size;
-       char namebuf[KSYM_NAME_LEN+1];
-
-       name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+       char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1];
  
-       if (name) {
+       if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
                 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
-               if (modname)
+               if (modname[0])
                         seq_printf(m, " [%s]", modname);
                 return;
         }