X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslub.c;h=fa28b16236442d80408c792f0140c26015846eca;hb=06b32f3ab6df4c7489729f94bdc7093c72681d4b;hp=f7c120b93c41fa14022cb64dc28a36fb2d61f109;hpb=7ae439ce0c01d7db0c70d1542985969e95ef750d;p=linux-2.6 diff --git a/mm/slub.c b/mm/slub.c index f7c120b93c..fa28b16236 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -78,36 +78,63 @@ * * Overloading of page flags that are otherwise used for LRU management. * - * PageActive The slab is used as a cpu cache. Allocations - * may be performed from the slab. The slab is not - * on any slab list and cannot be moved onto one. + * PageActive The slab is frozen and exempt from list processing. + * This means that the slab is dedicated to a purpose + * such as satisfying allocations for a specific + * processor. Objects may be freed in the slab while + * it is frozen but slab_free will then skip the usual + * list operations. It is up to the processor holding + * the slab to integrate the slab into the slab lists + * when the slab is no longer needed. + * + * One use of this flag is to mark slabs that are + * used for allocations. Then such a slab becomes a cpu + * slab. The cpu slab may be equipped with an additional + * lockless_freelist that allows lockless access to + * free objects in addition to the regular freelist + * that requires the slab lock. * * PageError Slab requires special handling due to debug * options set. This moves slab handling out of - * the fast path. + * the fast path and disables lockless freelists. */ -static inline int SlabDebug(struct page *page) -{ +#define FROZEN (1 << PG_active) + #ifdef CONFIG_SLUB_DEBUG - return PageError(page); +#define SLABDEBUG (1 << PG_error) #else - return 0; +#define SLABDEBUG 0 #endif + +static inline int SlabFrozen(struct page *page) +{ + return page->flags & FROZEN; +} + +static inline void SetSlabFrozen(struct page *page) +{ + page->flags |= FROZEN; +} + +static inline void ClearSlabFrozen(struct page *page) +{ + page->flags &= ~FROZEN; +} + +static inline int SlabDebug(struct page *page) +{ + return page->flags & SLABDEBUG; } static inline void SetSlabDebug(struct page *page) { -#ifdef CONFIG_SLUB_DEBUG - SetPageError(page); -#endif + page->flags |= SLABDEBUG; } static inline void ClearSlabDebug(struct page *page) { -#ifdef CONFIG_SLUB_DEBUG - ClearPageError(page); -#endif + page->flags &= ~SLABDEBUG; } /* @@ -715,6 +742,22 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) return search == NULL; } +static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) +{ + if (s->flags & SLAB_TRACE) { + printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", + s->name, + alloc ? "alloc" : "free", + object, page->inuse, + page->freelist); + + if (!alloc) + print_section("Object", (void *)object, s->objsize); + + dump_stack(); + } +} + /* * Tracking of fully allocated slabs for debugging purposes. */ @@ -739,8 +782,18 @@ static void remove_full(struct kmem_cache *s, struct page *page) spin_unlock(&n->list_lock); } -static int alloc_object_checks(struct kmem_cache *s, struct page *page, - void *object) +static void setup_object_debug(struct kmem_cache *s, struct page *page, + void *object) +{ + if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) + return; + + init_object(s, object, 0); + init_tracking(s, object); +} + +static int alloc_debug_processing(struct kmem_cache *s, struct page *page, + void *object, void *addr) { if (!check_slab(s, page)) goto bad; @@ -755,13 +808,16 @@ static int alloc_object_checks(struct kmem_cache *s, struct page *page, goto bad; } - if (!object) - return 1; - - if (!check_object(s, page, object, 0)) + if (object && !check_object(s, page, object, 0)) goto bad; + /* Success perform special debug activities for allocs */ + if (s->flags & SLAB_STORE_USER) + set_track(s, object, TRACK_ALLOC, addr); + trace(s, page, object, 1); + init_object(s, object, 1); return 1; + bad: if (PageSlab(page)) { /* @@ -779,8 +835,8 @@ bad: return 0; } -static int free_object_checks(struct kmem_cache *s, struct page *page, - void *object) +static int free_debug_processing(struct kmem_cache *s, struct page *page, + void *object, void *addr) { if (!check_slab(s, page)) goto fail; @@ -814,29 +870,22 @@ static int free_object_checks(struct kmem_cache *s, struct page *page, "to slab %s", object, page->slab->name); goto fail; } + + /* Special debug activities for freeing objects */ + if (!SlabFrozen(page) && !page->freelist) + remove_full(s, page); + if (s->flags & SLAB_STORE_USER) + set_track(s, object, TRACK_FREE, addr); + trace(s, page, object, 0); + init_object(s, object, 0); return 1; + fail: printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n", s->name, page, object); return 0; } -static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) -{ - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", - s->name, - alloc ? "alloc" : "free", - object, page->inuse, - page->freelist); - - if (!alloc) - print_section("Object", (void *)object, s->objsize); - - dump_stack(); - } -} - static int __init setup_slub_debug(char *str) { if (!str || *str != '=') @@ -887,13 +936,13 @@ static void kmem_cache_open_debug_check(struct kmem_cache *s) * On 32 bit platforms the limit is 256k. On 64bit platforms * the limit is 512k. * - * Debugging or ctor/dtors may create a need to move the free + * Debugging or ctor may create a need to move the free * pointer. Fail if this happens. */ - if (s->size >= 65535 * sizeof(void *)) { + if (s->objsize >= 65535 * sizeof(void *)) { BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(s->ctor || s->dtor); + BUG_ON(s->ctor); } else /* @@ -905,26 +954,20 @@ static void kmem_cache_open_debug_check(struct kmem_cache *s) s->flags |= slub_debug; } #else +static inline void setup_object_debug(struct kmem_cache *s, + struct page *page, void *object) {} -static inline int alloc_object_checks(struct kmem_cache *s, - struct page *page, void *object) { return 0; } +static inline int alloc_debug_processing(struct kmem_cache *s, + struct page *page, void *object, void *addr) { return 0; } -static inline int free_object_checks(struct kmem_cache *s, - struct page *page, void *object) { return 0; } +static inline int free_debug_processing(struct kmem_cache *s, + struct page *page, void *object, void *addr) { return 0; } -static inline void add_full(struct kmem_cache_node *n, struct page *page) {} -static inline void remove_full(struct kmem_cache *s, struct page *page) {} -static inline void trace(struct kmem_cache *s, struct page *page, - void *object, int alloc) {} -static inline void init_object(struct kmem_cache *s, - void *object, int active) {} -static inline void init_tracking(struct kmem_cache *s, void *object) {} static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } static inline int check_object(struct kmem_cache *s, struct page *page, void *object, int active) { return 1; } -static inline void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, void *addr) {} +static inline void add_full(struct kmem_cache_node *n, struct page *page) {} static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} #define slub_debug 0 #endif @@ -961,13 +1004,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) static void setup_object(struct kmem_cache *s, struct page *page, void *object) { - if (SlabDebug(page)) { - init_object(s, object, 0); - init_tracking(s, object); - } - + setup_object_debug(s, page, object); if (unlikely(s->ctor)) - s->ctor(object, s, SLAB_CTOR_CONSTRUCTOR); + s->ctor(object, s, 0); } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1014,6 +1053,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; + page->lockless_freelist = NULL; page->inuse = 0; out: if (flags & __GFP_WAIT) @@ -1025,15 +1065,12 @@ static void __free_slab(struct kmem_cache *s, struct page *page) { int pages = 1 << s->order; - if (unlikely(SlabDebug(page) || s->dtor)) { + if (unlikely(SlabDebug(page))) { void *p; slab_pad_check(s, page); - for_each_object(p, s, page_address(page)) { - if (s->dtor) - s->dtor(p, s, 0); + for_each_object(p, s, page_address(page)) check_object(s, page, p, 0); - } } mod_zone_page_state(page_zone(page), @@ -1133,11 +1170,12 @@ static void remove_partial(struct kmem_cache *s, * * Must hold list_lock. */ -static int lock_and_del_slab(struct kmem_cache_node *n, struct page *page) +static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) { if (slab_trylock(page)) { list_del(&page->lru); n->nr_partial--; + SetSlabFrozen(page); return 1; } return 0; @@ -1161,7 +1199,7 @@ static struct page *get_partial_node(struct kmem_cache_node *n) spin_lock(&n->list_lock); list_for_each_entry(page, &n->partial, lru) - if (lock_and_del_slab(n, page)) + if (lock_and_freeze_slab(n, page)) goto out; page = NULL; out: @@ -1240,10 +1278,11 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) * * On exit the slab lock will have been dropped. */ -static void putback_slab(struct kmem_cache *s, struct page *page) +static void unfreeze_slab(struct kmem_cache *s, struct page *page) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + ClearSlabFrozen(page); if (page->inuse) { if (page->freelist) @@ -1276,10 +1315,25 @@ static void putback_slab(struct kmem_cache *s, struct page *page) */ static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) { - s->cpu_slab[cpu] = NULL; - ClearPageActive(page); + /* + * Merge cpu freelist into freelist. Typically we get here + * because both freelists are empty. So this is unlikely + * to occur. + */ + while (unlikely(page->lockless_freelist)) { + void **object; + + /* Retrieve object from cpu_freelist */ + object = page->lockless_freelist; + page->lockless_freelist = page->lockless_freelist[page->offset]; - putback_slab(s, page); + /* And put onto the regular freelist */ + object[page->offset] = page->freelist; + page->freelist = object; + page->inuse--; + } + s->cpu_slab[cpu] = NULL; + unfreeze_slab(s, page); } static void flush_slab(struct kmem_cache *s, struct page *page, int cpu) @@ -1322,47 +1376,46 @@ static void flush_all(struct kmem_cache *s) } /* - * slab_alloc is optimized to only modify two cachelines on the fast path - * (aside from the stack): + * Slow path. The lockless freelist is empty or we need to perform + * debugging duties. * - * 1. The page struct - * 2. The first cacheline of the object to be allocated. + * Interrupts are disabled. * - * The only other cache lines that are read (apart from code) is the - * per cpu array in the kmem_cache struct. + * Processing is still very fast if new objects have been freed to the + * regular freelist. In that case we simply take over the regular freelist + * as the lockless freelist and zap the regular freelist. * - * Fastpath is not possible if we need to get a new slab or have - * debugging enabled (which means all slabs are marked with SlabDebug) + * If that is not working then we fall back to the partial lists. We take the + * first element of the freelist as the object to allocate now and move the + * rest of the freelist to the lockless freelist. + * + * And if we were unable to get a new slab from the partial slab lists then + * we need to allocate a new slab. This is slowest path since we may sleep. */ -static void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr) +static void *__slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr, struct page *page) { - struct page *page; void **object; - unsigned long flags; - int cpu; + int cpu = smp_processor_id(); - local_irq_save(flags); - cpu = smp_processor_id(); - page = s->cpu_slab[cpu]; if (!page) goto new_slab; slab_lock(page); if (unlikely(node != -1 && page_to_nid(page) != node)) goto another_slab; -redo: +load_freelist: object = page->freelist; if (unlikely(!object)) goto another_slab; if (unlikely(SlabDebug(page))) goto debug; -have_object: - page->inuse++; - page->freelist = object[page->offset]; + object = page->freelist; + page->lockless_freelist = object[page->offset]; + page->inuse = s->objects; + page->freelist = NULL; slab_unlock(page); - local_irq_restore(flags); return object; another_slab: @@ -1370,11 +1423,9 @@ another_slab: new_slab: page = get_partial(s, gfpflags, node); - if (likely(page)) { -have_slab: + if (page) { s->cpu_slab[cpu] = page; - SetPageActive(page); - goto redo; + goto load_freelist; } page = new_slab(s, gfpflags, node); @@ -1397,24 +1448,58 @@ have_slab: discard_slab(s, page); page = s->cpu_slab[cpu]; slab_lock(page); - goto redo; + goto load_freelist; } /* New slab does not fit our expectations */ flush_slab(s, s->cpu_slab[cpu], cpu); } slab_lock(page); - goto have_slab; + SetSlabFrozen(page); + s->cpu_slab[cpu] = page; + goto load_freelist; } - local_irq_restore(flags); return NULL; debug: - if (!alloc_object_checks(s, page, object)) + object = page->freelist; + if (!alloc_debug_processing(s, page, object, addr)) goto another_slab; - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_ALLOC, addr); - trace(s, page, object, 1); - init_object(s, object, 1); - goto have_object; + + page->inuse++; + page->freelist = object[page->offset]; + slab_unlock(page); + return object; +} + +/* + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) + * have the fastpath folded into their functions. So no function call + * overhead for requests that can be satisfied on the fastpath. + * + * The fastpath works by first checking if the lockless freelist can be used. + * If not then __slab_alloc is called for slow processing. + * + * Otherwise we can simply pick the next object from the lockless free list. + */ +static void __always_inline *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) +{ + struct page *page; + void **object; + unsigned long flags; + + local_irq_save(flags); + page = s->cpu_slab[smp_processor_id()]; + if (unlikely(!page || !page->lockless_freelist || + (node != -1 && page_to_nid(page) != node))) + + object = __slab_alloc(s, gfpflags, node, addr, page); + + else { + object = page->lockless_freelist; + page->lockless_freelist = object[page->offset]; + } + local_irq_restore(flags); + return object; } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) @@ -1432,20 +1517,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); #endif /* - * The fastpath only writes the cacheline of the page struct and the first - * cacheline of the object. + * Slow patch handling. This may still be called frequently since objects + * have a longer lifetime than the cpu slabs in most processing loads. * - * We read the cpu_slab cacheline to check if the slab is the per cpu - * slab for this processor. + * So we still attempt to reduce cache line usage. Just take the slab + * lock and free the item. If there is no additional partial page + * handling required then we can return immediately. */ -static void slab_free(struct kmem_cache *s, struct page *page, +static void __slab_free(struct kmem_cache *s, struct page *page, void *x, void *addr) { void *prior; void **object = (void *)x; - unsigned long flags; - local_irq_save(flags); slab_lock(page); if (unlikely(SlabDebug(page))) @@ -1455,11 +1539,7 @@ checks_ok: page->freelist = object; page->inuse--; - if (unlikely(PageActive(page))) - /* - * Cpu slabs are never on partial lists and are - * never freed. - */ + if (unlikely(SlabFrozen(page))) goto out_unlock; if (unlikely(!page->inuse)) @@ -1475,7 +1555,6 @@ checks_ok: out_unlock: slab_unlock(page); - local_irq_restore(flags); return; slab_empty: @@ -1487,21 +1566,42 @@ slab_empty: slab_unlock(page); discard_slab(s, page); - local_irq_restore(flags); return; debug: - if (!free_object_checks(s, page, x)) + if (!free_debug_processing(s, page, x, addr)) goto out_unlock; - if (!PageActive(page) && !page->freelist) - remove_full(s, page); - if (s->flags & SLAB_STORE_USER) - set_track(s, x, TRACK_FREE, addr); - trace(s, page, object, 0); - init_object(s, object, 0); goto checks_ok; } +/* + * Fastpath with forced inlining to produce a kfree and kmem_cache_free that + * can perform fastpath freeing without additional function calls. + * + * The fastpath is only possible if we are freeing to the current cpu slab + * of this processor. This typically the case if we have just allocated + * the item before. + * + * If fastpath is not possible then fall back to __slab_free where we deal + * with all sorts of special processing. + */ +static void __always_inline slab_free(struct kmem_cache *s, + struct page *page, void *x, void *addr) +{ + void **object = (void *)x; + unsigned long flags; + + local_irq_save(flags); + if (likely(page == s->cpu_slab[smp_processor_id()] && + !SlabDebug(page))) { + object[page->offset] = page->lockless_freelist; + page->lockless_freelist = object; + } else + __slab_free(s, page, x, addr); + + local_irq_restore(flags); +} + void kmem_cache_free(struct kmem_cache *s, void *x) { struct page *page; @@ -1707,7 +1807,7 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag page->freelist = get_freepointer(kmalloc_caches, n); page->inuse++; kmalloc_caches->node[node] = n; - init_object(kmalloc_caches, n, 1); + setup_object_debug(kmalloc_caches, page, n); init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); add_partial(n, page); @@ -1789,7 +1889,7 @@ static int calculate_sizes(struct kmem_cache *s) * then we should never poison the object itself. */ if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && - !s->ctor && !s->dtor) + !s->ctor) s->flags |= __OBJECT_POISON; else s->flags &= ~__OBJECT_POISON; @@ -1817,9 +1917,8 @@ static int calculate_sizes(struct kmem_cache *s) */ s->inuse = size; -#ifdef CONFIG_SLUB_DEBUG if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || - s->ctor || s->dtor)) { + s->ctor)) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on @@ -1832,6 +1931,7 @@ static int calculate_sizes(struct kmem_cache *s) size += sizeof(void *); } +#ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) /* * Need to store information about allocs and frees after @@ -1888,13 +1988,11 @@ static int calculate_sizes(struct kmem_cache *s) static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void *, struct kmem_cache *, unsigned long), - void (*dtor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(void *, struct kmem_cache *, unsigned long)) { memset(s, 0, kmem_size); s->name = name; s->ctor = ctor; - s->dtor = dtor; s->objsize = size; s->flags = flags; s->align = align; @@ -2079,7 +2177,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, down_write(&slub_lock); if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, - flags, NULL, NULL)) + flags, NULL)) goto panic; list_add(&s->list, &slab_caches); @@ -2143,7 +2241,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (s) return slab_alloc(s, flags, -1, __builtin_return_address(0)); - return NULL; + return ZERO_SIZE_PTR; } EXPORT_SYMBOL(__kmalloc); @@ -2154,16 +2252,20 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (s) return slab_alloc(s, flags, node, __builtin_return_address(0)); - return NULL; + return ZERO_SIZE_PTR; } EXPORT_SYMBOL(__kmalloc_node); #endif size_t ksize(const void *object) { - struct page *page = get_object_page(object); + struct page *page; struct kmem_cache *s; + if (object == ZERO_SIZE_PTR) + return 0; + + page = get_object_page(object); BUG_ON(!page); s = page->slab; BUG_ON(!s); @@ -2195,7 +2297,13 @@ void kfree(const void *x) struct kmem_cache *s; struct page *page; - if (!x) + /* + * This has to be an unsigned comparison. According to Linus + * some gcc version treat a pointer as a signed entity. Then + * this comparison would be true for all "negative" pointers + * (which would cover the whole upper half of the address space). + */ + if ((unsigned long)x <= (unsigned long)ZERO_SIZE_PTR) return; page = virt_to_head_page(x); @@ -2300,12 +2408,12 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) void *ret; size_t ks; - if (unlikely(!p)) + if (unlikely(!p || p == ZERO_SIZE_PTR)) return kmalloc(new_size, flags); if (unlikely(!new_size)) { kfree(p); - return NULL; + return ZERO_SIZE_PTR; } ks = ksize(p); @@ -2328,6 +2436,7 @@ EXPORT_SYMBOL(krealloc); void __init kmem_cache_init(void) { int i; + int caches = 0; #ifdef CONFIG_NUMA /* @@ -2337,20 +2446,30 @@ void __init kmem_cache_init(void) */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", sizeof(struct kmem_cache_node), GFP_KERNEL); + kmalloc_caches[0].refcount = -1; + caches++; #endif /* Able to allocate the per node structures */ slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - create_kmalloc_cache(&kmalloc_caches[1], + if (KMALLOC_MIN_SIZE <= 64) { + create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_KERNEL); - create_kmalloc_cache(&kmalloc_caches[2], + caches++; + } + if (KMALLOC_MIN_SIZE <= 128) { + create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_KERNEL); + caches++; + } - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); + caches++; + } slab_state = UP; @@ -2363,13 +2482,12 @@ void __init kmem_cache_init(void) register_cpu_notifier(&slab_notifier); #endif - if (nr_cpu_ids) /* Remove when nr_cpu_ids is fixed upstream ! */ - kmem_size = offsetof(struct kmem_cache, cpu_slab) - + nr_cpu_ids * sizeof(struct page *); + kmem_size = offsetof(struct kmem_cache, cpu_slab) + + nr_cpu_ids * sizeof(struct page *); printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," - " Processors=%d, Nodes=%d\n", - KMALLOC_SHIFT_HIGH, cache_line_size(), + " CPUs=%d, Nodes=%d\n", + caches, cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, nr_cpu_ids, nr_node_ids); } @@ -2382,7 +2500,13 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; - if (s->ctor || s->dtor) + if (s->ctor) + return 1; + + /* + * We may have set a slab to be unmergeable during bootstrap. + */ + if (s->refcount < 0) return 1; return 0; @@ -2390,15 +2514,14 @@ static int slab_unmergeable(struct kmem_cache *s) static struct kmem_cache *find_mergeable(size_t size, size_t align, unsigned long flags, - void (*ctor)(void *, struct kmem_cache *, unsigned long), - void (*dtor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(void *, struct kmem_cache *, unsigned long)) { struct list_head *h; if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) return NULL; - if (ctor || dtor) + if (ctor) return NULL; size = ALIGN(size, sizeof(void *)); @@ -2440,8 +2563,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, { struct kmem_cache *s; + BUG_ON(dtor); down_write(&slub_lock); - s = find_mergeable(size, align, flags, dtor, ctor); + s = find_mergeable(size, align, flags, ctor); if (s) { s->refcount++; /* @@ -2455,7 +2579,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, } else { s = kmalloc(kmem_size, GFP_KERNEL); if (s && kmem_cache_open(s, GFP_KERNEL, name, - size, align, flags, ctor, dtor)) { + size, align, flags, ctor)) { if (sysfs_slab_add(s)) { kfree(s); goto err; @@ -2503,6 +2627,19 @@ static void for_all_slabs(void (*func)(struct kmem_cache *, int), int cpu) up_read(&slub_lock); } +/* + * Version of __flush_cpu_slab for the case that interrupts + * are enabled. + */ +static void cpu_slab_flush(struct kmem_cache *s, int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + __flush_cpu_slab(s, cpu); + local_irq_restore(flags); +} + /* * Use the cpu notifier to insure that the cpu slabs are flushed when * necessary. @@ -2514,8 +2651,10 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: - for_all_slabs(__flush_cpu_slab, cpu); + case CPU_DEAD_FROZEN: + for_all_slabs(cpu_slab_flush, cpu); break; default: break; @@ -2528,97 +2667,12 @@ static struct notifier_block __cpuinitdata slab_notifier = #endif -#ifdef CONFIG_NUMA - -/***************************************************************** - * Generic reaper used to support the page allocator - * (the cpu slabs are reaped by a per slab workqueue). - * - * Maybe move this to the page allocator? - ****************************************************************/ - -static DEFINE_PER_CPU(unsigned long, reap_node); - -static void init_reap_node(int cpu) -{ - int node; - - node = next_node(cpu_to_node(cpu), node_online_map); - if (node == MAX_NUMNODES) - node = first_node(node_online_map); - - __get_cpu_var(reap_node) = node; -} - -static void next_reap_node(void) -{ - int node = __get_cpu_var(reap_node); - - /* - * Also drain per cpu pages on remote zones - */ - if (node != numa_node_id()) - drain_node_pages(node); - - node = next_node(node, node_online_map); - if (unlikely(node >= MAX_NUMNODES)) - node = first_node(node_online_map); - __get_cpu_var(reap_node) = node; -} -#else -#define init_reap_node(cpu) do { } while (0) -#define next_reap_node(void) do { } while (0) -#endif - -#define REAPTIMEOUT_CPUC (2*HZ) - -#ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct delayed_work, reap_work); - -static void cache_reap(struct work_struct *unused) -{ - next_reap_node(); - refresh_cpu_vm_stats(smp_processor_id()); - schedule_delayed_work(&__get_cpu_var(reap_work), - REAPTIMEOUT_CPUC); -} - -static void __devinit start_cpu_timer(int cpu) -{ - struct delayed_work *reap_work = &per_cpu(reap_work, cpu); - - /* - * When this gets called from do_initcalls via cpucache_init(), - * init_workqueues() has already run, so keventd will be setup - * at that time. - */ - if (keventd_up() && reap_work->work.func == NULL) { - init_reap_node(cpu); - INIT_DELAYED_WORK(reap_work, cache_reap); - schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); - } -} - -static int __init cpucache_init(void) -{ - int cpu; - - /* - * Register the timers that drain pcp pages and update vm statistics - */ - for_each_online_cpu(cpu) - start_cpu_timer(cpu); - return 0; -} -__initcall(cpucache_init); -#endif - void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { struct kmem_cache *s = get_slab(size, gfpflags); if (!s) - return NULL; + return ZERO_SIZE_PTR; return slab_alloc(s, gfpflags, -1, caller); } @@ -2629,7 +2683,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s = get_slab(size, gfpflags); if (!s) - return NULL; + return ZERO_SIZE_PTR; return slab_alloc(s, gfpflags, node, caller); } @@ -2823,7 +2877,7 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max) order = get_order(sizeof(struct location) * max); - l = (void *)__get_free_pages(GFP_KERNEL, order); + l = (void *)__get_free_pages(GFP_ATOMIC, order); if (!l) return 0; @@ -3179,17 +3233,6 @@ static ssize_t ctor_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(ctor); -static ssize_t dtor_show(struct kmem_cache *s, char *buf) -{ - if (s->dtor) { - int n = sprint_symbol(buf, (unsigned long)s->dtor); - - return n + sprintf(buf + n, "\n"); - } - return 0; -} -SLAB_ATTR_RO(dtor); - static ssize_t aliases_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", s->refcount - 1); @@ -3421,7 +3464,6 @@ static struct attribute * slab_attrs[] = { &partial_attr.attr, &cpu_slabs_attr.attr, &ctor_attr.attr, - &dtor_attr.attr, &aliases_attr.attr, &align_attr.attr, &sanity_checks_attr.attr,