X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslub.c;h=f426f9bc644b992fd12ec535ac8510b93682fe0c;hb=54f9f80d6543fb7b157d3b11e2e7911dc1379790;hp=addb20a6d67d53e73e2dba2da6492ac0ee93aa38;hpb=99364df764bbef327be2f8b8ffcfbb41a4a1af4d;p=linux-2.6 diff --git a/mm/slub.c b/mm/slub.c index addb20a6d6..f426f9bc64 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -90,7 +90,7 @@ * One use of this flag is to mark slabs that are * used for allocations. Then such a slab becomes a cpu * slab. The cpu slab may be equipped with an additional - * lockless_freelist that allows lockless access to + * freelist that allows lockless access to * free objects in addition to the regular freelist * that requires the slab lock. * @@ -140,11 +140,6 @@ static inline void ClearSlabDebug(struct page *page) /* * Issues still to be resolved: * - * - The per cpu array is updated for each new slab and and is a remote - * cacheline for most nodes. This could become a bouncing cacheline given - * enough frequent updates. There are 16 pointers in a cacheline, so at - * max 16 cpus could compete for the cacheline which may be okay. - * * - Support PAGE_ALLOC_DEBUG. Should be easy to do. * * - Variable sizing of the per node arrays @@ -205,11 +200,6 @@ static inline void ClearSlabDebug(struct page *page) #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif -/* - * The page->inuse field is 16 bit thus we have this limitation - */ -#define MAX_OBJECTS_PER_SLAB 65535 - /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ @@ -277,6 +267,15 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } +static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) +{ +#ifdef CONFIG_SMP + return s->cpu_slab[cpu]; +#else + return &s->cpu_slab; +#endif +} + static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { @@ -729,11 +728,6 @@ static int check_slab(struct kmem_cache *s, struct page *page) slab_err(s, page, "Not a valid slab page"); return 0; } - if (page->offset * sizeof(void *) != s->offset) { - slab_err(s, page, "Corrupted offset %lu", - (unsigned long)(page->offset * sizeof(void *))); - return 0; - } if (page->inuse > s->objects) { slab_err(s, page, "inuse %u > max %u", s->name, page->inuse, s->objects); @@ -872,8 +866,6 @@ bad: slab_fix(s, "Marking all objects used"); page->inuse = s->objects; page->freelist = NULL; - /* Fix up fields that may be corrupted */ - page->offset = s->offset / sizeof(void *); } return 0; } @@ -1055,6 +1047,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (s->flags & SLAB_CACHE_DMA) flags |= SLUB_DMA; + if (s->flags & SLAB_RECLAIM_ACCOUNT) + flags |= __GFP_RECLAIMABLE; + if (node == -1) page = alloc_pages(flags, s->order); else @@ -1088,19 +1083,19 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) void *last; void *p; - BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); + BUG_ON(flags & GFP_SLAB_BUG_MASK); if (flags & __GFP_WAIT) local_irq_enable(); - page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); + page = allocate_slab(s, + flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); if (!page) goto out; n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); - page->offset = s->offset / sizeof(void *); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1123,7 +1118,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; - page->lockless_freelist = NULL; page->inuse = 0; out: if (flags & __GFP_WAIT) @@ -1149,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, - pages); - page->mapping = NULL; __free_pages(page, s->order); } @@ -1383,33 +1376,34 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page) /* * Remove the cpu slab */ -static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) +static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { + struct page *page = c->page; /* * Merge cpu freelist into freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ - while (unlikely(page->lockless_freelist)) { + while (unlikely(c->freelist)) { void **object; /* Retrieve object from cpu_freelist */ - object = page->lockless_freelist; - page->lockless_freelist = page->lockless_freelist[page->offset]; + object = c->freelist; + c->freelist = c->freelist[c->offset]; /* And put onto the regular freelist */ - object[page->offset] = page->freelist; + object[c->offset] = page->freelist; page->freelist = object; page->inuse--; } - s->cpu_slab[cpu] = NULL; + c->page = NULL; unfreeze_slab(s, page); } -static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) +static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { - slab_lock(page); - deactivate_slab(s, page, cpu); + slab_lock(c->page); + deactivate_slab(s, c); } /* @@ -1418,18 +1412,17 @@ static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { - struct page *page = s->cpu_slab[cpu]; + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - if (likely(page)) - flush_slab(s, page, cpu); + if (likely(c && c->page)) + flush_slab(s, c); } static void flush_cpu_slab(void *d) { struct kmem_cache *s = d; - int cpu = smp_processor_id(); - __flush_cpu_slab(s, cpu); + __flush_cpu_slab(s, smp_processor_id()); } static void flush_all(struct kmem_cache *s) @@ -1445,6 +1438,19 @@ static void flush_all(struct kmem_cache *s) #endif } +/* + * Check if the objects in a per cpu structure fit numa + * locality expectations. + */ +static inline int node_match(struct kmem_cache_cpu *c, int node) +{ +#ifdef CONFIG_NUMA + if (node != -1 && c->node != node) + return 0; +#endif + return 1; +} + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -1463,45 +1469,46 @@ static void flush_all(struct kmem_cache *s) * we need to allocate a new slab. This is slowest path since we may sleep. */ static void *__slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr, struct page *page) + gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) { void **object; - int cpu = smp_processor_id(); + struct page *new; - if (!page) + if (!c->page) goto new_slab; - slab_lock(page); - if (unlikely(node != -1 && page_to_nid(page) != node)) + slab_lock(c->page); + if (unlikely(!node_match(c, node))) goto another_slab; load_freelist: - object = page->freelist; + object = c->page->freelist; if (unlikely(!object)) goto another_slab; - if (unlikely(SlabDebug(page))) + if (unlikely(SlabDebug(c->page))) goto debug; - object = page->freelist; - page->lockless_freelist = object[page->offset]; - page->inuse = s->objects; - page->freelist = NULL; - slab_unlock(page); + object = c->page->freelist; + c->freelist = object[c->offset]; + c->page->inuse = s->objects; + c->page->freelist = NULL; + c->node = page_to_nid(c->page); + slab_unlock(c->page); return object; another_slab: - deactivate_slab(s, page, cpu); + deactivate_slab(s, c); new_slab: - page = get_partial(s, gfpflags, node); - if (page) { - s->cpu_slab[cpu] = page; + new = get_partial(s, gfpflags, node); + if (new) { + c->page = new; goto load_freelist; } - page = new_slab(s, gfpflags, node); - if (page) { - cpu = smp_processor_id(); - if (s->cpu_slab[cpu]) { + new = new_slab(s, gfpflags, node); + if (new) { + c = get_cpu_slab(s, smp_processor_id()); + if (c->page) { /* * Someone else populated the cpu_slab while we * enabled interrupts, or we have gotten scheduled @@ -1509,34 +1516,33 @@ new_slab: * requested node even if __GFP_THISNODE was * specified. So we need to recheck. */ - if (node == -1 || - page_to_nid(s->cpu_slab[cpu]) == node) { + if (node_match(c, node)) { /* * Current cpuslab is acceptable and we * want the current one since its cache hot */ - discard_slab(s, page); - page = s->cpu_slab[cpu]; - slab_lock(page); + discard_slab(s, new); + slab_lock(c->page); goto load_freelist; } /* New slab does not fit our expectations */ - flush_slab(s, s->cpu_slab[cpu], cpu); + flush_slab(s, c); } - slab_lock(page); - SetSlabFrozen(page); - s->cpu_slab[cpu] = page; + slab_lock(new); + SetSlabFrozen(new); + c->page = new; goto load_freelist; } return NULL; debug: - object = page->freelist; - if (!alloc_debug_processing(s, page, object, addr)) + object = c->page->freelist; + if (!alloc_debug_processing(s, c->page, object, addr)) goto another_slab; - page->inuse++; - page->freelist = object[page->offset]; - slab_unlock(page); + c->page->inuse++; + c->page->freelist = object[c->offset]; + c->node = -1; + slab_unlock(c->page); return object; } @@ -1553,25 +1559,24 @@ debug: static void __always_inline *slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr) { - struct page *page; void **object; unsigned long flags; + struct kmem_cache_cpu *c; local_irq_save(flags); - page = s->cpu_slab[smp_processor_id()]; - if (unlikely(!page || !page->lockless_freelist || - (node != -1 && page_to_nid(page) != node))) + c = get_cpu_slab(s, smp_processor_id()); + if (unlikely(!c->freelist || !node_match(c, node))) - object = __slab_alloc(s, gfpflags, node, addr, page); + object = __slab_alloc(s, gfpflags, node, addr, c); else { - object = page->lockless_freelist; - page->lockless_freelist = object[page->offset]; + object = c->freelist; + c->freelist = object[c->offset]; } local_irq_restore(flags); if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, s->objsize); + memset(object, 0, c->objsize); return object; } @@ -1599,7 +1604,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, void *addr) + void *x, void *addr, unsigned int offset) { void *prior; void **object = (void *)x; @@ -1609,7 +1614,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, if (unlikely(SlabDebug(page))) goto debug; checks_ok: - prior = object[page->offset] = page->freelist; + prior = object[offset] = page->freelist; page->freelist = object; page->inuse--; @@ -1664,15 +1669,16 @@ static void __always_inline slab_free(struct kmem_cache *s, { void **object = (void *)x; unsigned long flags; + struct kmem_cache_cpu *c; local_irq_save(flags); debug_check_no_locks_freed(object, s->objsize); - if (likely(page == s->cpu_slab[smp_processor_id()] && - !SlabDebug(page))) { - object[page->offset] = page->lockless_freelist; - page->lockless_freelist = object; + c = get_cpu_slab(s, smp_processor_id()); + if (likely(page == c->page && c->node >= 0)) { + object[c->offset] = c->freelist; + c->freelist = object; } else - __slab_free(s, page, x, addr); + __slab_free(s, page, x, addr, c->offset); local_irq_restore(flags); } @@ -1759,14 +1765,6 @@ static inline int slab_order(int size, int min_objects, int rem; int min_order = slub_min_order; - /* - * If we would create too many object per slab then reduce - * the slab order even if it goes below slub_min_order. - */ - while (min_order > 0 && - (PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size) - min_order--; - for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); order <= max_order; order++) { @@ -1781,9 +1779,6 @@ static inline int slab_order(int size, int min_objects, if (rem <= slab_size / fract_leftover) break; - /* If the next size is too high then exit now */ - if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size) - break; } return order; @@ -1858,6 +1853,16 @@ static unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } +static void init_kmem_cache_cpu(struct kmem_cache *s, + struct kmem_cache_cpu *c) +{ + c->page = NULL; + c->freelist = NULL; + c->node = 0; + c->offset = s->offset / sizeof(void *); + c->objsize = s->objsize; +} + static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -1869,6 +1874,131 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) #endif } +#ifdef CONFIG_SMP +/* + * Per cpu array for per cpu structures. + * + * The per cpu array places all kmem_cache_cpu structures from one processor + * close together meaning that it becomes possible that multiple per cpu + * structures are contained in one cacheline. This may be particularly + * beneficial for the kmalloc caches. + * + * A desktop system typically has around 60-80 slabs. With 100 here we are + * likely able to get per cpu structures for all caches from the array defined + * here. We must be able to cover all kmalloc caches during bootstrap. + * + * If the per cpu array is exhausted then fall back to kmalloc + * of individual cachelines. No sharing is possible then. + */ +#define NR_KMEM_CACHE_CPU 100 + +static DEFINE_PER_CPU(struct kmem_cache_cpu, + kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; + +static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); +static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE; + +static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, + int cpu, gfp_t flags) +{ + struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); + + if (c) + per_cpu(kmem_cache_cpu_free, cpu) = + (void *)c->freelist; + else { + /* Table overflow: So allocate ourselves */ + c = kmalloc_node( + ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), + flags, cpu_to_node(cpu)); + if (!c) + return NULL; + } + + init_kmem_cache_cpu(s, c); + return c; +} + +static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) +{ + if (c < per_cpu(kmem_cache_cpu, cpu) || + c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { + kfree(c); + return; + } + c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); + per_cpu(kmem_cache_cpu_free, cpu) = c; +} + +static void free_kmem_cache_cpus(struct kmem_cache *s) +{ + int cpu; + + for_each_online_cpu(cpu) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + + if (c) { + s->cpu_slab[cpu] = NULL; + free_kmem_cache_cpu(c, cpu); + } + } +} + +static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) +{ + int cpu; + + for_each_online_cpu(cpu) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + + if (c) + continue; + + c = alloc_kmem_cache_cpu(s, cpu, flags); + if (!c) { + free_kmem_cache_cpus(s); + return 0; + } + s->cpu_slab[cpu] = c; + } + return 1; +} + +/* + * Initialize the per cpu array. + */ +static void init_alloc_cpu_cpu(int cpu) +{ + int i; + + if (cpu_isset(cpu, kmem_cach_cpu_free_init_once)) + return; + + for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) + free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); + + cpu_set(cpu, kmem_cach_cpu_free_init_once); +} + +static void __init init_alloc_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) + init_alloc_cpu_cpu(cpu); + } + +#else +static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} +static inline void init_alloc_cpu(void) {} + +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) +{ + init_kmem_cache_cpu(s, &s->cpu_slab); + return 1; +} +#endif + #ifdef CONFIG_NUMA /* * No kmalloc_node yet so do it by hand. We know that this is the first @@ -1876,10 +2006,11 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) * possible. * * Note that this function only works on the kmalloc_node_cache - * when allocating for the kmalloc_node_cache. + * when allocating for the kmalloc_node_cache. This is used for bootstrapping + * memory on a fresh node that has no slab structures yet. */ -static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags, - int node) +static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, + int node) { struct page *page; struct kmem_cache_node *n; @@ -1921,7 +2052,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) { int node; - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = s->node[node]; if (n && n != &s->local_node) kmem_cache_free(kmalloc_caches, n); @@ -1939,7 +2070,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) else local_node = 0; - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n; if (local_node == node) @@ -2077,14 +2208,7 @@ static int calculate_sizes(struct kmem_cache *s) */ s->objects = (PAGE_SIZE << s->order) / size; - /* - * Verify that the number of objects is within permitted limits. - * The page->inuse field is only 16 bit wide! So we cannot have - * more than 64k objects per slab. - */ - if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB) - return 0; - return 1; + return !!s->objects; } @@ -2107,9 +2231,12 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, #ifdef CONFIG_NUMA s->defrag_ratio = 100; #endif + if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) + goto error; - if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) + if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) return 1; + free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " @@ -2192,7 +2319,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) flush_all(s); /* Attempt to free all objects */ - for_each_online_node(node) { + free_kmem_cache_cpus(s); + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); n->nr_partial -= free_list(s, n, &n->partial); @@ -2227,11 +2355,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned; +struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1]; +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; #endif static int __init setup_slub_min_order(char *str) @@ -2397,12 +2525,8 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) return ZERO_SIZE_PTR; index = size_index[(size - 1) / 8]; - } else { - if (size > KMALLOC_MAX_SIZE) - return NULL; - + } else index = fls(size - 1); - } #ifdef CONFIG_ZONE_DMA if (unlikely((flags & SLUB_DMA))) @@ -2414,9 +2538,15 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) void *__kmalloc(size_t size, gfp_t flags) { - struct kmem_cache *s = get_slab(size, flags); + struct kmem_cache *s; + + if (unlikely(size > PAGE_SIZE / 2)) + return (void *)__get_free_pages(flags | __GFP_COMP, + get_order(size)); - if (ZERO_OR_NULL_PTR(s)) + s = get_slab(size, flags); + + if (unlikely(ZERO_OR_NULL_PTR(s))) return s; return slab_alloc(s, flags, -1, __builtin_return_address(0)); @@ -2426,9 +2556,15 @@ EXPORT_SYMBOL(__kmalloc); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { - struct kmem_cache *s = get_slab(size, flags); + struct kmem_cache *s; - if (ZERO_OR_NULL_PTR(s)) + if (unlikely(size > PAGE_SIZE / 2)) + return (void *)__get_free_pages(flags | __GFP_COMP, + get_order(size)); + + s = get_slab(size, flags); + + if (unlikely(ZERO_OR_NULL_PTR(s))) return s; return slab_alloc(s, flags, node, __builtin_return_address(0)); @@ -2441,7 +2577,8 @@ size_t ksize(const void *object) struct page *page; struct kmem_cache *s; - if (ZERO_OR_NULL_PTR(object)) + BUG_ON(!object); + if (unlikely(object == ZERO_SIZE_PTR)) return 0; page = get_object_page(object); @@ -2473,22 +2610,17 @@ EXPORT_SYMBOL(ksize); void kfree(const void *x) { - struct kmem_cache *s; struct page *page; - /* - * This has to be an unsigned comparison. According to Linus - * some gcc version treat a pointer as a signed entity. Then - * this comparison would be true for all "negative" pointers - * (which would cover the whole upper half of the address space). - */ - if (ZERO_OR_NULL_PTR(x)) + if (unlikely(ZERO_OR_NULL_PTR(x))) return; page = virt_to_head_page(x); - s = page->slab; - - slab_free(s, page, (void *)x, __builtin_return_address(0)); + if (unlikely(!PageSlab(page))) { + put_page(page); + return; + } + slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); } EXPORT_SYMBOL(kfree); @@ -2517,7 +2649,7 @@ int kmem_cache_shrink(struct kmem_cache *s) return -ENOMEM; flush_all(s); - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { n = get_node(s, node); if (!n->nr_partial) @@ -2575,6 +2707,8 @@ void __init kmem_cache_init(void) int i; int caches = 0; + init_alloc_cpu(); + #ifdef CONFIG_NUMA /* * Must first have the slab cache available for the allocations of the @@ -2602,7 +2736,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -2629,16 +2763,18 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); + kmem_size = offsetof(struct kmem_cache, cpu_slab) + + nr_cpu_ids * sizeof(struct kmem_cache_cpu *); +#else + kmem_size = sizeof(struct kmem_cache); #endif - kmem_size = offsetof(struct kmem_cache, cpu_slab) + - nr_cpu_ids * sizeof(struct page *); printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", @@ -2717,12 +2853,21 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { + int cpu; + s->refcount++; /* * Adjust the object sizes so that we clear * the complete object on kzalloc. */ s->objsize = max(s->objsize, (int)size); + + /* + * And then we need to update the object size in the + * per cpu structures + */ + for_each_online_cpu(cpu) + get_cpu_slab(s, cpu)->objsize = s->objsize; s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); if (sysfs_slab_alias(s, name)) @@ -2765,15 +2910,29 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, unsigned long flags; switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + init_alloc_cpu_cpu(cpu); + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) + s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, + GFP_KERNEL); + up_read(&slub_lock); + break; + case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: down_read(&slub_lock); list_for_each_entry(s, &slab_caches, list) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + local_irq_save(flags); __flush_cpu_slab(s, cpu); local_irq_restore(flags); + free_kmem_cache_cpu(c, cpu); + s->cpu_slab[cpu] = NULL; } up_read(&slub_lock); break; @@ -2790,9 +2949,14 @@ static struct notifier_block __cpuinitdata slab_notifier = void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { - struct kmem_cache *s = get_slab(size, gfpflags); + struct kmem_cache *s; + + if (unlikely(size > PAGE_SIZE / 2)) + return (void *)__get_free_pages(gfpflags | __GFP_COMP, + get_order(size)); + s = get_slab(size, gfpflags); - if (ZERO_OR_NULL_PTR(s)) + if (unlikely(ZERO_OR_NULL_PTR(s))) return s; return slab_alloc(s, gfpflags, -1, caller); @@ -2801,9 +2965,14 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, void *caller) { - struct kmem_cache *s = get_slab(size, gfpflags); + struct kmem_cache *s; + + if (unlikely(size > PAGE_SIZE / 2)) + return (void *)__get_free_pages(gfpflags | __GFP_COMP, + get_order(size)); + s = get_slab(size, gfpflags); - if (ZERO_OR_NULL_PTR(s)) + if (unlikely(ZERO_OR_NULL_PTR(s))) return s; return slab_alloc(s, gfpflags, node, caller); @@ -2902,7 +3071,7 @@ static long validate_slab_cache(struct kmem_cache *s) return -ENOMEM; flush_all(s); - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); count += validate_slab_node(s, n, map); @@ -3116,13 +3285,13 @@ static int list_locations(struct kmem_cache *s, char *buf, int node; if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), - GFP_KERNEL)) + GFP_TEMPORARY)) return sprintf(buf, "Out of memory\n"); /* Push back cpu slabs */ flush_all(s); - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); unsigned long flags; struct page *page; @@ -3230,11 +3399,18 @@ static unsigned long slab_objects(struct kmem_cache *s, per_cpu = nodes + nr_node_ids; for_each_possible_cpu(cpu) { - struct page *page = s->cpu_slab[cpu]; + struct page *page; int node; + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + if (!c) + continue; + + page = c->page; + node = c->node; + if (node < 0) + continue; if (page) { - node = page_to_nid(page); if (flags & SO_CPU) { int x = 0; @@ -3249,7 +3425,7 @@ static unsigned long slab_objects(struct kmem_cache *s, } } - for_each_online_node(node) { + for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); if (flags & SO_PARTIAL) { @@ -3277,7 +3453,7 @@ static unsigned long slab_objects(struct kmem_cache *s, x = sprintf(buf, "%lu", total); #ifdef CONFIG_NUMA - for_each_online_node(node) + for_each_node_state(node, N_NORMAL_MEMORY) if (nodes[node]) x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); @@ -3291,13 +3467,19 @@ static int any_slab_objects(struct kmem_cache *s) int node; int cpu; - for_each_possible_cpu(cpu) - if (s->cpu_slab[cpu]) + for_each_possible_cpu(cpu) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + + if (c && c->page) return 1; + } - for_each_node(node) { + for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); + if (!n) + continue; + if (n->nr_partial || atomic_long_read(&n->nr_slabs)) return 1; }