X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslab.c;h=bfd654c0ef4149173bb2960e11b4674b1b470dbf;hb=3395ee0588795b0b3bd889c260e55959cf2b61f5;hp=792bfe320a8b2c0896f8568f8d07de40203f8cf2;hpb=3b9f6cb8a1ec791be79c6c7595fea922f12d1e64;p=linux-2.6 diff --git a/mm/slab.c b/mm/slab.c index 792bfe320a..bfd654c0ef 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -86,7 +86,6 @@ * All object allocations for a node occur from node specific slab lists. */ -#include #include #include #include @@ -314,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache, static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); static int enable_cpucache(struct kmem_cache *cachep); -static void cache_reap(void *unused); +static void cache_reap(struct work_struct *unused); /* * This function must be completely optimized away if a constant is passed to @@ -731,7 +730,10 @@ static inline void init_lock_keys(void) } #endif -/* Guard access to the cache-chain. */ +/* + * 1. Guard access to the cache-chain. + * 2. Protect sanity of cpu_online_map against cpu hotplug events + */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -754,7 +756,7 @@ int slab_is_available(void) return g_cpucache_up == FULL; } -static DEFINE_PER_CPU(struct work_struct, reap_work); +static DEFINE_PER_CPU(struct delayed_work, reap_work); static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { @@ -867,6 +869,22 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, dump_stack(); } +/* + * By default on NUMA we use alien caches to stage the freeing of + * objects allocated from other nodes. This causes massive memory + * inefficiencies when using fake NUMA setup to split memory into a + * large number of small nodes, so it can be disabled on the command + * line + */ + +static int use_alien_caches __read_mostly = 1; +static int __init noaliencache_setup(char *s) +{ + use_alien_caches = 0; + return 1; +} +__setup("noaliencache", noaliencache_setup); + #ifdef CONFIG_NUMA /* * Special reaping functions for NUMA systems called from cache_reap(). @@ -884,7 +902,7 @@ static void init_reap_node(int cpu) if (node == MAX_NUMNODES) node = first_node(node_online_map); - __get_cpu_var(reap_node) = node; + per_cpu(reap_node, cpu) = node; } static void next_reap_node(void) @@ -917,16 +935,16 @@ static void next_reap_node(void) */ static void __devinit start_cpu_timer(int cpu) { - struct work_struct *reap_work = &per_cpu(reap_work, cpu); + struct delayed_work *reap_work = &per_cpu(reap_work, cpu); /* * When this gets called from do_initcalls via cpucache_init(), * init_workqueues() has already run, so keventd will be setup * at that time. */ - if (keventd_up() && reap_work->func == NULL) { + if (keventd_up() && reap_work->work.func == NULL) { init_reap_node(cpu); - INIT_WORK(reap_work, cache_reap, NULL); + INIT_DELAYED_WORK(reap_work, cache_reap); schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); } } @@ -1107,15 +1125,18 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) int nodeid = slabp->nodeid; struct kmem_list3 *l3; struct array_cache *alien = NULL; + int node; + + node = numa_node_id(); /* * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(slabp->nodeid == numa_node_id())) + if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches)) return 0; - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; @@ -1190,7 +1211,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; - struct array_cache **alien; + struct array_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, cachep->batchcount); @@ -1202,9 +1223,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, if (!shared) goto bad; - alien = alloc_alien_cache(node, cachep->limit); - if (!alien) - goto bad; + if (use_alien_caches) { + alien = alloc_alien_cache(node, cachep->limit); + if (!alien) + goto bad; + } cachep->array[cpu] = nc; l3 = cachep->nodelists[node]; BUG_ON(!l3); @@ -1228,12 +1251,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, kfree(shared); free_alien_cache(alien); } - mutex_unlock(&cache_chain_mutex); break; case CPU_ONLINE: + mutex_unlock(&cache_chain_mutex); start_cpu_timer(cpu); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DOWN_PREPARE: + mutex_lock(&cache_chain_mutex); + break; + case CPU_DOWN_FAILED: + mutex_unlock(&cache_chain_mutex); + break; case CPU_DEAD: /* * Even if all the cpus of a node are down, we don't free the @@ -1244,8 +1273,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, * gets destroyed at kmem_cache_destroy(). */ /* fall thru */ +#endif case CPU_UP_CANCELED: - mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; @@ -1306,11 +1335,9 @@ free_array_cache: } mutex_unlock(&cache_chain_mutex); break; -#endif } return NOTIFY_OK; bad: - mutex_unlock(&cache_chain_mutex); return NOTIFY_BAD; } @@ -1326,7 +1353,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - BUG_ON(cachep->nodelists[nodeid] != list); ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); BUG_ON(!ptr); @@ -1353,6 +1379,7 @@ void __init kmem_cache_init(void) struct cache_names *names; int i; int order; + int node; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -1387,12 +1414,14 @@ void __init kmem_cache_init(void) * 6) Resize the head arrays of the kmalloc caches to their final sizes. */ + node = numa_node_id(); + /* 1) create the cache_cache */ INIT_LIST_HEAD(&cache_chain); list_add(&cache_cache.next, &cache_chain); cache_cache.colour_off = cache_line_size(); cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; + cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); @@ -1497,19 +1526,18 @@ void __init kmem_cache_init(void) } /* 5) Replace the bootstrap kmem_list3's */ { - int node; + int nid; + /* Replace the static kmem_list3 structures for the boot cpu */ - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], - numa_node_id()); + init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); - for_each_online_node(node) { + for_each_online_node(nid) { init_list(malloc_sizes[INDEX_AC].cs_cachep, - &initkmem_list3[SIZE_AC + node], node); + &initkmem_list3[SIZE_AC + nid], nid); if (INDEX_AC != INDEX_L3) { init_list(malloc_sizes[INDEX_L3].cs_cachep, - &initkmem_list3[SIZE_L3 + node], - node); + &initkmem_list3[SIZE_L3 + nid], nid); } } } @@ -1683,10 +1711,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) static void dump_line(char *data, int offset, int limit) { int i; + unsigned char error = 0; + int bad_count = 0; + printk(KERN_ERR "%03x:", offset); - for (i = 0; i < limit; i++) + for (i = 0; i < limit; i++) { + if (data[offset + i] != POISON_FREE) { + error = data[offset + i]; + bad_count++; + } printk(" %02x", (unsigned char)data[offset + i]); + } printk("\n"); + + if (bad_count == 1) { + error ^= POISON_FREE; + if (!(error & (error - 1))) { + printk(KERN_ERR "Single bit error detected. Probably " + "bad RAM.\n"); +#ifdef CONFIG_X86 + printk(KERN_ERR "Run memtest86+ or a similar memory " + "test tool.\n"); +#else + printk(KERN_ERR "Run a memory test tool.\n"); +#endif + } + } } #endif @@ -2073,11 +2123,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, } /* - * Prevent CPUs from coming and going. - * lock_cpu_hotplug() nests outside cache_chain_mutex + * We use cache_chain_mutex to ensure a consistent view of + * cpu_online_map as well. Please see cpuup_callback */ - lock_cpu_hotplug(); - mutex_lock(&cache_chain_mutex); list_for_each_entry(pc, &cache_chain, next) { @@ -2172,18 +2220,17 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) ralign = BYTES_PER_WORD; - /* 2) arch mandated alignment: disables debug if necessary */ + /* 2) arch mandated alignment */ if (ralign < ARCH_SLAB_MINALIGN) { ralign = ARCH_SLAB_MINALIGN; - if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } - /* 3) caller mandated alignment: disables debug if necessary */ + /* 3) caller mandated alignment */ if (ralign < align) { ralign = align; - if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } + /* disable debug if necessary */ + if (ralign > BYTES_PER_WORD) + flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); /* * 4) Store it. */ @@ -2301,7 +2348,6 @@ oops: panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2419,6 +2465,7 @@ out: return nr_freed; } +/* Called with cache_chain_mutex held to protect against cpu hotplug */ static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; @@ -2449,9 +2496,13 @@ static int __cache_shrink(struct kmem_cache *cachep) */ int kmem_cache_shrink(struct kmem_cache *cachep) { + int ret; BUG_ON(!cachep || in_interrupt()); - return __cache_shrink(cachep); + mutex_lock(&cache_chain_mutex); + ret = __cache_shrink(cachep); + mutex_unlock(&cache_chain_mutex); + return ret; } EXPORT_SYMBOL(kmem_cache_shrink); @@ -2475,23 +2526,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep) { BUG_ON(!cachep || in_interrupt()); - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); - /* Find the cache in the chain of caches. */ mutex_lock(&cache_chain_mutex); /* * the chain is never empty, cache_cache is never destroyed */ list_del(&cachep->next); - mutex_unlock(&cache_chain_mutex); - if (__cache_shrink(cachep)) { slab_error(cachep, "Can't free all objects"); - mutex_lock(&cache_chain_mutex); list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return; } @@ -2499,7 +2543,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) synchronize_rcu(); __kmem_cache_destroy(cachep); - unlock_cpu_hotplug(); + mutex_unlock(&cache_chain_mutex); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -2897,6 +2941,9 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) int batchcount; struct kmem_list3 *l3; struct array_cache *ac; + int node; + + node = numa_node_id(); check_irq_off(); ac = cpu_cache_get(cachep); @@ -2910,7 +2957,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -2940,7 +2987,7 @@ retry: STATS_SET_HIGH(cachep); ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - numa_node_id()); + node); } check_slabp(cachep, slabp); @@ -2959,7 +3006,7 @@ alloc_done: if (unlikely(!ac->avail)) { int x; - x = cache_grow(cachep, flags, numa_node_id()); + x = cache_grow(cachep, flags, node); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); @@ -3035,6 +3082,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, cachep->ctor(objp, cachep, ctor_flags); } +#if ARCH_SLAB_MINALIGN + if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { + printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", + objp, ARCH_SLAB_MINALIGN); + } +#endif return objp; } #else @@ -3124,12 +3177,15 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) struct zone **z; void *obj = NULL; - for (z = zonelist->zones; *z && !obj; z++) + for (z = zonelist->zones; *z && !obj; z++) { + int nid = zone_to_nid(*z); + if (zone_idx(*z) <= ZONE_NORMAL && - cpuset_zone_allowed(*z, flags)) + cpuset_zone_allowed(*z, flags) && + cache->nodelists[nid]) obj = __cache_alloc_node(cache, - flags | __GFP_THISNODE, - zone_to_nid(*z)); + flags | __GFP_THISNODE, nid); + } return obj; } @@ -3466,22 +3522,25 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, } +#ifdef CONFIG_DEBUG_SLAB void *__kmalloc(size_t size, gfp_t flags) { -#ifndef CONFIG_DEBUG_SLAB - return __do_kmalloc(size, flags, NULL); -#else return __do_kmalloc(size, flags, __builtin_return_address(0)); -#endif } EXPORT_SYMBOL(__kmalloc); -#ifdef CONFIG_DEBUG_SLAB void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) { return __do_kmalloc(size, flags, caller); } EXPORT_SYMBOL(__kmalloc_track_caller); + +#else +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc(size, flags, NULL); +} +EXPORT_SYMBOL(__kmalloc); #endif /** @@ -3549,13 +3608,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep) int node; struct kmem_list3 *l3; struct array_cache *new_shared; - struct array_cache **new_alien; + struct array_cache **new_alien = NULL; for_each_online_node(node) { - new_alien = alloc_alien_cache(node, cachep->limit); - if (!new_alien) - goto fail; + if (use_alien_caches) { + new_alien = alloc_alien_cache(node, cachep->limit); + if (!new_alien) + goto fail; + } new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, @@ -3781,7 +3842,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, * If we cannot acquire the cache chain mutex then just give up - we'll try * again on the next iteration. */ -static void cache_reap(void *unused) +static void cache_reap(struct work_struct *unused) { struct kmem_cache *searchp; struct kmem_list3 *l3;