X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslab.c;h=d31a06bfbea5c78998e3c68a5a484b98b3e4579f;hb=db31419404e5ccc7e8e07000a5f1ac440a0eafa0;hp=6f5aeebd430683b7ecbcd1ca28f6efce2b991740;hpb=a8c0f9a41f88da703ade33f9c1626a55c786e8bb;p=linux-2.6

diff --git a/mm/slab.c b/mm/slab.c
index 6f5aeebd43..d31a06bfbe 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -420,6 +420,7 @@ struct kmem_cache {
 	unsigned long max_freeable;
 	unsigned long node_allocs;
 	unsigned long node_frees;
+	unsigned long node_overflow;
 	atomic_t allochit;
 	atomic_t allocmiss;
 	atomic_t freehit;
@@ -465,6 +466,7 @@ struct kmem_cache {
 #define	STATS_INC_ERR(x)	((x)->errors++)
 #define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)
 #define	STATS_INC_NODEFREES(x)	((x)->node_frees++)
+#define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
 #define	STATS_SET_FREEABLE(x, i)					\
 	do {								\
 		if ((x)->max_freeable < i)				\
@@ -484,6 +486,7 @@ struct kmem_cache {
 #define	STATS_INC_ERR(x)	do { } while (0)
 #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
 #define	STATS_INC_NODEFREES(x)	do { } while (0)
+#define STATS_INC_ACOVERFLOW(x)   do { } while (0)
 #define	STATS_SET_FREEABLE(x, i) do { } while (0)
 #define STATS_INC_ALLOCHIT(x)	do { } while (0)
 #define STATS_INC_ALLOCMISS(x)	do { } while (0)
@@ -697,6 +700,14 @@ static enum {
 	FULL
 } g_cpucache_up;
 
+/*
+ * used by boot code to determine if it can use slab based allocator
+ */
+int slab_is_available(void)
+{
+	return g_cpucache_up == FULL;
+}
+
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
@@ -898,6 +909,30 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 	return nc;
 }
 
+/*
+ * Transfer objects in one arraycache to another.
+ * Locking must be handled by the caller.
+ *
+ * Return the number of entries transferred.
+ */
+static int transfer_objects(struct array_cache *to,
+		struct array_cache *from, unsigned int max)
+{
+	/* Figure out how many entries to transfer */
+	int nr = min(min(from->avail, max), to->limit - to->avail);
+
+	if (!nr)
+		return 0;
+
+	memcpy(to->entry + to->avail, from->entry + from->avail -nr,
+			sizeof(void *) *nr);
+
+	from->avail -= nr;
+	to->avail += nr;
+	to->touched = 1;
+	return nr;
+}
+
 #ifdef CONFIG_NUMA
 static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
@@ -947,6 +982,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
 
 	if (ac->avail) {
 		spin_lock(&rl3->list_lock);
+		/*
+		 * Stuff objects into the remote nodes shared array first.
+		 * That way we could avoid the overhead of putting the objects
+		 * into the free lists and getting them back later.
+		 */
+		if (rl3->shared)
+			transfer_objects(rl3->shared, ac, ac->limit);
+
 		free_block(cachep, ac->entry, ac->avail, node);
 		ac->avail = 0;
 		spin_unlock(&rl3->list_lock);
@@ -962,8 +1005,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
 
 	if (l3->alien) {
 		struct array_cache *ac = l3->alien[node];
-		if (ac && ac->avail) {
-			spin_lock_irq(&ac->lock);
+
+		if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
 			__drain_alien_cache(cachep, ac, node);
 			spin_unlock_irq(&ac->lock);
 		}
@@ -1002,7 +1045,7 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
 
 #endif
 
-static int __devinit cpuup_callback(struct notifier_block *nfb,
+static int cpuup_callback(struct notifier_block *nfb,
 				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -1266,8 +1309,7 @@ void __init kmem_cache_init(void)
 		if (cache_cache.num)
 			break;
 	}
-	if (!cache_cache.num)
-		BUG();
+	BUG_ON(!cache_cache.num);
 	cache_cache.gfporder = order;
 	cache_cache.colour = left_over / cache_cache.colour_off;
 	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
@@ -1423,7 +1465,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	int i;
 
 	flags |= cachep->gfpflags;
+#ifndef CONFIG_MMU
+	/* nommu uses slab's for process anonymous memory allocations, so
+	 * requires __GFP_COMP to properly refcount higher order allocations"
+	 */
+	page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder);
+#else
 	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+#endif
 	if (!page)
 		return NULL;
 	addr = page_address(page);
@@ -1943,8 +1992,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	 * Always checks flags, a caller might be expecting debug support which
 	 * isn't available.
 	 */
-	if (flags & ~CREATE_MASK)
-		BUG();
+	BUG_ON(flags & ~CREATE_MASK);
 
 	/*
 	 * Check that size is in terms of words.  This is needed to avoid
@@ -1990,10 +2038,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	align = ralign;
 
 	/* Get cache's description obj. */
-	cachep = kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
+	cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL);
 	if (!cachep)
 		goto oops;
-	memset(cachep, 0, sizeof(struct kmem_cache));
 
 #if DEBUG
 	cachep->obj_size = size;
@@ -2153,11 +2200,14 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
 	check_irq_on();
 	for_each_online_node(node) {
 		l3 = cachep->nodelists[node];
-		if (l3) {
+		if (l3 && l3->alien)
+			drain_alien_cache(cachep, l3->alien);
+	}
+
+	for_each_online_node(node) {
+		l3 = cachep->nodelists[node];
+		if (l3)
 			drain_array(cachep, l3, l3->shared, 1, node);
-			if (l3->alien)
-				drain_alien_cache(cachep, l3->alien);
-		}
 	}
 }
 
@@ -2176,8 +2226,7 @@ static int __node_shrink(struct kmem_cache *cachep, int node)
 
 		slabp = list_entry(l3->slabs_free.prev, struct slab, list);
 #if DEBUG
-		if (slabp->inuse)
-			BUG();
+		BUG_ON(slabp->inuse);
 #endif
 		list_del(&slabp->list);
 
@@ -2218,8 +2267,7 @@ static int __cache_shrink(struct kmem_cache *cachep)
  */
 int kmem_cache_shrink(struct kmem_cache *cachep)
 {
-	if (!cachep || in_interrupt())
-		BUG();
+	BUG_ON(!cachep || in_interrupt());
 
 	return __cache_shrink(cachep);
 }
@@ -2247,8 +2295,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
 	int i;
 	struct kmem_list3 *l3;
 
-	if (!cachep || in_interrupt())
-		BUG();
+	BUG_ON(!cachep || in_interrupt());
 
 	/* Don't let CPUs to come and go */
 	lock_cpu_hotplug();
@@ -2293,13 +2340,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
 static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
-				   int colour_off, gfp_t local_flags)
+				   int colour_off, gfp_t local_flags,
+				   int nodeid)
 {
 	struct slab *slabp;
 
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+					      local_flags, nodeid);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -2309,6 +2358,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 	slabp->inuse = 0;
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
+	slabp->nodeid = nodeid;
 	return slabp;
 }
 
@@ -2447,8 +2497,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	 * Be lazy and only check for valid flags here,  keeping it out of the
 	 * critical path in kmem_cache_alloc().
 	 */
-	if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
-		BUG();
+	BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW));
 	if (flags & SLAB_NO_GROW)
 		return 0;
 
@@ -2495,7 +2544,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 		goto failed;
 
 	/* Get slab management. */
-	slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
+	slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid);
 	if (!slabp)
 		goto opps1;
 
@@ -2681,20 +2730,10 @@ retry:
 	BUG_ON(ac->avail > 0 || !l3);
 	spin_lock(&l3->list_lock);
 
-	if (l3->shared) {
-		struct array_cache *shared_array = l3->shared;
-		if (shared_array->avail) {
-			if (batchcount > shared_array->avail)
-				batchcount = shared_array->avail;
-			shared_array->avail -= batchcount;
-			ac->avail = batchcount;
-			memcpy(ac->entry,
-			       &(shared_array->entry[shared_array->avail]),
-			       sizeof(void *) * batchcount);
-			shared_array->touched = 1;
-			goto alloc_done;
-		}
-	}
+	/* See if we can refill from the shared array */
+	if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
+		goto alloc_done;
+
 	while (batchcount > 0) {
 		struct list_head *entry;
 		struct slab *slabp;
@@ -3066,9 +3105,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
 			if (l3->alien && l3->alien[nodeid]) {
 				alien = l3->alien[nodeid];
 				spin_lock(&alien->lock);
-				if (unlikely(alien->avail == alien->limit))
+				if (unlikely(alien->avail == alien->limit)) {
+					STATS_INC_ACOVERFLOW(cachep);
 					__drain_alien_cache(cachep,
 							    alien, nodeid);
+				}
 				alien->entry[alien->avail++] = objp;
 				spin_unlock(&alien->lock);
 			} else {
@@ -3291,7 +3332,7 @@ void *__alloc_percpu(size_t size)
 	 * and we have no way of figuring out how to fix the array
 	 * that we have allocated then....
 	 */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int node = cpu_to_node(i);
 
 		if (node_online(node))
@@ -3378,7 +3419,7 @@ void free_percpu(const void *objp)
 	/*
 	 * We allocate for all cpus so we cannot use for online cpu here.
 	 */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 	    kfree(p->ptrs[i]);
 	kfree(p);
 }
@@ -3398,63 +3439,86 @@ const char *kmem_cache_name(struct kmem_cache *cachep)
 EXPORT_SYMBOL_GPL(kmem_cache_name);
 
 /*
- * This initializes kmem_list3 for all nodes.
+ * This initializes kmem_list3 or resizes varioius caches for all nodes.
  */
 static int alloc_kmemlist(struct kmem_cache *cachep)
 {
 	int node;
 	struct kmem_list3 *l3;
-	int err = 0;
+	struct array_cache *new_shared;
+	struct array_cache **new_alien;
 
 	for_each_online_node(node) {
-		struct array_cache *nc = NULL, *new;
-		struct array_cache **new_alien = NULL;
-#ifdef CONFIG_NUMA
+
 		new_alien = alloc_alien_cache(node, cachep->limit);
 		if (!new_alien)
 			goto fail;
-#endif
-		new = alloc_arraycache(node, cachep->shared*cachep->batchcount,
+
+		new_shared = alloc_arraycache(node,
+				cachep->shared*cachep->batchcount,
 					0xbaadf00d);
-		if (!new)
+		if (!new_shared) {
+			free_alien_cache(new_alien);
 			goto fail;
+		}
+
 		l3 = cachep->nodelists[node];
 		if (l3) {
+			struct array_cache *shared = l3->shared;
+
 			spin_lock_irq(&l3->list_lock);
 
-			nc = cachep->nodelists[node]->shared;
-			if (nc)
-				free_block(cachep, nc->entry, nc->avail, node);
+			if (shared)
+				free_block(cachep, shared->entry,
+						shared->avail, node);
 
-			l3->shared = new;
-			if (!cachep->nodelists[node]->alien) {
+			l3->shared = new_shared;
+			if (!l3->alien) {
 				l3->alien = new_alien;
 				new_alien = NULL;
 			}
 			l3->free_limit = (1 + nr_cpus_node(node)) *
 					cachep->batchcount + cachep->num;
 			spin_unlock_irq(&l3->list_lock);
-			kfree(nc);
+			kfree(shared);
 			free_alien_cache(new_alien);
 			continue;
 		}
 		l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
-		if (!l3)
+		if (!l3) {
+			free_alien_cache(new_alien);
+			kfree(new_shared);
 			goto fail;
+		}
 
 		kmem_list3_init(l3);
 		l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
 				((unsigned long)cachep) % REAPTIMEOUT_LIST3;
-		l3->shared = new;
+		l3->shared = new_shared;
 		l3->alien = new_alien;
 		l3->free_limit = (1 + nr_cpus_node(node)) *
 					cachep->batchcount + cachep->num;
 		cachep->nodelists[node] = l3;
 	}
-	return err;
+	return 0;
+
 fail:
-	err = -ENOMEM;
-	return err;
+	if (!cachep->next.next) {
+		/* Cache is not active yet. Roll back what we did */
+		node--;
+		while (node >= 0) {
+			if (cachep->nodelists[node]) {
+				l3 = cachep->nodelists[node];
+
+				kfree(l3->shared);
+				free_alien_cache(l3->alien);
+				kfree(l3);
+				cachep->nodelists[node] = NULL;
+			}
+			node--;
+		}
+	}
+	return -ENOMEM;
 }
 
 struct ccupdate_struct {
@@ -3723,7 +3787,7 @@ static void print_slabinfo_header(struct seq_file *m)
 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
-		 "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
 	seq_putc(m, '\n');
@@ -3837,11 +3901,12 @@ static int s_show(struct seq_file *m, void *p)
 		unsigned long max_freeable = cachep->max_freeable;
 		unsigned long node_allocs = cachep->node_allocs;
 		unsigned long node_frees = cachep->node_frees;
+		unsigned long overflows = cachep->node_overflow;
 
 		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-				%4lu %4lu %4lu %4lu", allocs, high, grown,
+				%4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
 				reaped, errors, max_freeable, node_allocs,
-				node_frees);
+				node_frees, overflows);
 	}
 	/* cpu stats */
 	{