X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslub.c;h=5db3da5a60bf15507227eead81474a2575dfc653;hb=884f2810b15b6bb489c9dca5013aafbea2f19fba;hp=4a8585befd84887a2e31d26bf95d0019cca5c110;hpb=47bfdc0d5a18a4b760ffb6a332932aaa5c0859e0;p=linux-2.6 diff --git a/mm/slub.c b/mm/slub.c index 4a8585befd..5db3da5a60 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -97,12 +97,6 @@ * * - Support PAGE_ALLOC_DEBUG. Should be easy to do. * - * - Support DEBUG_SLAB_LEAK. Trouble is we do not know where the full - * slabs are in SLUB. - * - * - SLAB_DEBUG_INITIAL is not supported but I have never seen a use of - * it. - * * - Variable sizing of the per node arrays */ @@ -129,9 +123,17 @@ #endif /* - * Flags from the regular SLAB that SLUB does not support: + * Mininum number of partial slabs. These will be left on the partial + * lists even if they are empty. kmem_cache_shrink may reclaim them. + */ +#define MIN_PARTIAL 2 + +/* + * Maximum number of desirable partial slabs. + * The existence of more partial slabs makes kmem_cache_shrink + * sort the partial list by the number of objects in the. */ -#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) +#define MAX_PARTIAL 10 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER) @@ -297,9 +299,6 @@ static void set_track(struct kmem_cache *s, void *object, memset(p, 0, sizeof(struct track)); } -#define set_tracking(__s, __o, __a) set_track(__s, __o, __a, \ - __builtin_return_address(0)) - static void init_tracking(struct kmem_cache *s, void *object) { if (s->flags & SLAB_STORE_USER) { @@ -452,7 +451,7 @@ static int check_valid_pointer(struct kmem_cache *s, struct page *page, static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { - printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n", + printk(KERN_ERR "@@@ SLUB %s: Restoring %s (0x%x) from 0x%p-0x%p\n", s->name, message, data, from, to - 1); memset(from, data, to - from); } @@ -499,9 +498,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) return 1; if (!check_bytes(p + length, POISON_INUSE, remainder)) { - printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n", - s->name, p); - dump_stack(); + slab_err(s, page, "Padding check failed"); restore_bytes(s, "slab padding", POISON_INUSE, p + length, p + length + remainder); return 0; @@ -587,30 +584,25 @@ static int check_slab(struct kmem_cache *s, struct page *page) VM_BUG_ON(!irqs_disabled()); if (!PageSlab(page)) { - printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p " - "flags=%lx mapping=0x%p count=%d \n", - s->name, page, page->flags, page->mapping, + slab_err(s, page, "Not a valid slab page flags=%lx " + "mapping=0x%p count=%d", page->flags, page->mapping, page_count(page)); return 0; } if (page->offset * sizeof(void *) != s->offset) { - printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p" - " flags=0x%lx mapping=0x%p count=%d\n", - s->name, + slab_err(s, page, "Corrupted offset %lu flags=0x%lx " + "mapping=0x%p count=%d", (unsigned long)(page->offset * sizeof(void *)), - page, page->flags, page->mapping, page_count(page)); - dump_stack(); return 0; } if (page->inuse > s->objects) { - printk(KERN_ERR "SLUB: %s Inuse %u > max %u in slab " - "page @0x%p flags=%lx mapping=0x%p count=%d\n", - s->name, page->inuse, s->objects, page, page->flags, + slab_err(s, page, "inuse %u > max %u @0x%p flags=%lx " + "mapping=0x%p count=%d", + s->name, page->inuse, s->objects, page->flags, page->mapping, page_count(page)); - dump_stack(); return 0; } /* Slab_pad_check fixes things up after itself */ @@ -639,12 +631,13 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) set_freepointer(s, object, NULL); break; } else { - printk(KERN_ERR "SLUB: %s slab 0x%p " - "freepointer 0x%p corrupted.\n", - s->name, page, fp); - dump_stack(); + slab_err(s, page, "Freepointer 0x%p corrupt", + fp); page->freelist = NULL; page->inuse = s->objects; + printk(KERN_ERR "@@@ SLUB %s: Freelist " + "cleared. Slab 0x%p\n", + s->name, page); return 0; } break; @@ -655,15 +648,40 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) } if (page->inuse != s->objects - nr) { - printk(KERN_ERR "slab %s: page 0x%p wrong object count." - " counter is %d but counted were %d\n", - s->name, page, page->inuse, - s->objects - nr); + slab_err(s, page, "Wrong object count. Counter is %d but " + "counted were %d", s, page, page->inuse, + s->objects - nr); page->inuse = s->objects - nr; + printk(KERN_ERR "@@@ SLUB %s: Object count adjusted. " + "Slab @0x%p\n", s->name, page); } return search == NULL; } +/* + * Tracking of fully allocated slabs for debugging + */ +static void add_full(struct kmem_cache_node *n, struct page *page) +{ + spin_lock(&n->list_lock); + list_add(&page->lru, &n->full); + spin_unlock(&n->list_lock); +} + +static void remove_full(struct kmem_cache *s, struct page *page) +{ + struct kmem_cache_node *n; + + if (!(s->flags & SLAB_STORE_USER)) + return; + + n = get_node(s, page_to_nid(page)); + + spin_lock(&n->list_lock); + list_del(&page->lru); + spin_unlock(&n->list_lock); +} + static int alloc_object_checks(struct kmem_cache *s, struct page *page, void *object) { @@ -671,15 +689,13 @@ static int alloc_object_checks(struct kmem_cache *s, struct page *page, goto bad; if (object && !on_freelist(s, page, object)) { - printk(KERN_ERR "SLUB: %s Object 0x%p@0x%p " - "already allocated.\n", - s->name, object, page); - goto dump; + slab_err(s, page, "Object 0x%p already allocated", object); + goto bad; } if (!check_valid_pointer(s, page, object)) { object_err(s, page, object, "Freelist Pointer check fails"); - goto dump; + goto bad; } if (!object) @@ -687,17 +703,8 @@ static int alloc_object_checks(struct kmem_cache *s, struct page *page, if (!check_object(s, page, object, 0)) goto bad; - init_object(s, object, 1); - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - dump_stack(); - } return 1; -dump: - dump_stack(); bad: if (PageSlab(page)) { /* @@ -722,15 +729,12 @@ static int free_object_checks(struct kmem_cache *s, struct page *page, goto fail; if (!check_valid_pointer(s, page, object)) { - printk(KERN_ERR "SLUB: %s slab 0x%p invalid " - "object pointer 0x%p\n", - s->name, page, object); + slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; } if (on_freelist(s, page, object)) { - printk(KERN_ERR "SLUB: %s slab 0x%p object " - "0x%p already free.\n", s->name, page, object); + slab_err(s, page, "Object 0x%p already free", object); goto fail; } @@ -739,32 +743,22 @@ static int free_object_checks(struct kmem_cache *s, struct page *page, if (unlikely(s != page->slab)) { if (!PageSlab(page)) - printk(KERN_ERR "slab_free %s size %d: attempt to" - "free object(0x%p) outside of slab.\n", - s->name, s->size, object); + slab_err(s, page, "Attempt to free object(0x%p) " + "outside of slab", object); else - if (!page->slab) + if (!page->slab) { printk(KERN_ERR - "slab_free : no slab(NULL) for object 0x%p.\n", + "SLUB : no slab for object 0x%p.\n", object); + dump_stack(); + } else - printk(KERN_ERR "slab_free %s(%d): object at 0x%p" - " belongs to slab %s(%d)\n", - s->name, s->size, object, - page->slab->name, page->slab->size); + slab_err(s, page, "object at 0x%p belongs " + "to slab %s", object, page->slab->name); goto fail; } - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - print_section("Object", object, s->objsize); - dump_stack(); - } - init_object(s, object, 0); return 1; fail: - dump_stack(); printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n", s->name, page, object); return 0; @@ -808,14 +802,8 @@ static void setup_object(struct kmem_cache *s, struct page *page, init_tracking(s, object); } - if (unlikely(s->ctor)) { - int mode = SLAB_CTOR_CONSTRUCTOR; - - if (!(s->flags & __GFP_WAIT)) - mode |= SLAB_CTOR_ATOMIC; - - s->ctor(object, s, mode); - } + if (unlikely(s->ctor)) + s->ctor(object, s, SLAB_CTOR_CONSTRUCTOR); } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -827,9 +815,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) void *last; void *p; - if (flags & __GFP_NO_GROW) - return NULL; - BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK)); if (flags & __GFP_WAIT) @@ -953,10 +938,16 @@ static __always_inline int slab_trylock(struct page *page) /* * Management of partially allocated slabs */ -static void add_partial(struct kmem_cache *s, struct page *page) +static void add_partial_tail(struct kmem_cache_node *n, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + spin_lock(&n->list_lock); + n->nr_partial++; + list_add_tail(&page->lru, &n->partial); + spin_unlock(&n->list_lock); +} +static void add_partial(struct kmem_cache_node *n, struct page *page) +{ spin_lock(&n->list_lock); n->nr_partial++; list_add(&page->lru, &n->partial); @@ -1056,7 +1047,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) n = get_node(s, zone_to_nid(*z)); if (n && cpuset_zone_allowed_hardwall(*z, flags) && - n->nr_partial > 2) { + n->nr_partial > MIN_PARTIAL) { page = get_partial_node(n); if (page) return page; @@ -1090,13 +1081,31 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) */ static void putback_slab(struct kmem_cache *s, struct page *page) { + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + if (page->inuse) { + if (page->freelist) - add_partial(s, page); + add_partial(n, page); + else if (PageError(page) && (s->flags & SLAB_STORE_USER)) + add_full(n, page); slab_unlock(page); + } else { - slab_unlock(page); - discard_slab(s, page); + if (n->nr_partial < MIN_PARTIAL) { + /* + * Adding an empty page to the partial slabs in order + * to avoid page allocator overhead. This page needs to + * come after all the others that are not fully empty + * in order to make sure that we do maximum + * defragmentation. + */ + add_partial_tail(n, page); + slab_unlock(page); + } else { + slab_unlock(page); + discard_slab(s, page); + } } } @@ -1163,8 +1172,8 @@ static void flush_all(struct kmem_cache *s) * Fastpath is not possible if we need to get a new slab or have * debugging enabled (which means all slabs are marked with PageError) */ -static __always_inline void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node) +static void *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) { struct page *page; void **object; @@ -1238,20 +1247,27 @@ debug: if (!alloc_object_checks(s, page, object)) goto another_slab; if (s->flags & SLAB_STORE_USER) - set_tracking(s, object, TRACK_ALLOC); + set_track(s, object, TRACK_ALLOC, addr); + if (s->flags & SLAB_TRACE) { + printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n", + s->name, object, page->inuse, + page->freelist); + dump_stack(); + } + init_object(s, object, 1); goto have_object; } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1); + return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); } EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return slab_alloc(s, gfpflags, node); + return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif @@ -1262,7 +1278,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * * No special cachelines need to be read */ -static void slab_free(struct kmem_cache *s, struct page *page, void *x) +static void slab_free(struct kmem_cache *s, struct page *page, + void *x, void *addr) { void *prior; void **object = (void *)x; @@ -1294,7 +1311,7 @@ checks_ok: * then add it. */ if (unlikely(!prior)) - add_partial(s, page); + add_partial(get_node(s, page_to_nid(page)), page); out_unlock: slab_unlock(page); @@ -1304,7 +1321,7 @@ out_unlock: slab_empty: if (prior) /* - * Partially used slab that is on the partial list. + * Slab on the partial list. */ remove_partial(s, page); @@ -1314,34 +1331,37 @@ slab_empty: return; debug: - if (free_object_checks(s, page, x)) - goto checks_ok; - goto out_unlock; + if (!free_object_checks(s, page, x)) + goto out_unlock; + if (!PageActive(page) && !page->freelist) + remove_full(s, page); + if (s->flags & SLAB_STORE_USER) + set_track(s, x, TRACK_FREE, addr); + if (s->flags & SLAB_TRACE) { + printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n", + s->name, object, page->inuse, + page->freelist); + print_section("Object", (void *)object, s->objsize); + dump_stack(); + } + init_object(s, object, 0); + goto checks_ok; } void kmem_cache_free(struct kmem_cache *s, void *x) { - struct page * page; - - page = virt_to_page(x); - - if (unlikely(PageCompound(page))) - page = page->first_page; + struct page *page; + page = virt_to_head_page(x); - if (unlikely(PageError(page) && (s->flags & SLAB_STORE_USER))) - set_tracking(s, x, TRACK_FREE); - slab_free(s, page, x); + slab_free(s, page, x, __builtin_return_address(0)); } EXPORT_SYMBOL(kmem_cache_free); /* Figure out on which slab object the object resides */ static struct page *get_object_page(const void *x) { - struct page *page = virt_to_page(x); - - if (unlikely(PageCompound(page))) - page = page->first_page; + struct page *page = virt_to_head_page(x); if (!PageSlab(page)) return NULL; @@ -1459,7 +1479,7 @@ static unsigned long calculate_alignment(unsigned long flags, * specified alignment though. If that is greater * then use it. */ - if ((flags & (SLAB_MUST_HWCACHE_ALIGN | SLAB_HWCACHE_ALIGN)) && + if ((flags & SLAB_HWCACHE_ALIGN) && size > L1_CACHE_BYTES / 2) return max_t(unsigned long, align, L1_CACHE_BYTES); @@ -1475,6 +1495,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) atomic_long_set(&n->nr_slabs, 0); spin_lock_init(&n->list_lock); INIT_LIST_HEAD(&n->partial); + INIT_LIST_HEAD(&n->full); } #ifdef CONFIG_NUMA @@ -1507,7 +1528,7 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag init_object(kmalloc_caches, n, 1); init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); - add_partial(kmalloc_caches, page); + add_partial(n, page); return n; } @@ -1710,8 +1731,6 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, s->flags = flags; s->align = align; - BUG_ON(flags & SLUB_UNIMPLEMENTED); - /* * The page->offset field is only 16 bit wide. This is an offset * in units of words from the beginning of an object. If the slab @@ -1840,7 +1859,7 @@ static int kmem_cache_close(struct kmem_cache *s) for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); - free_list(s, n, &n->partial); + n->nr_partial -= free_list(s, n, &n->partial); if (atomic_long_read(&n->nr_slabs)) return 1; } @@ -1979,7 +1998,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index = kmalloc_index(size); - if (!size) + if (!index) return NULL; /* Allocation too large? */ @@ -2025,7 +2044,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s = get_slab(size, flags); if (s) - return kmem_cache_alloc(s, flags); + return slab_alloc(s, flags, -1, __builtin_return_address(0)); return NULL; } EXPORT_SYMBOL(__kmalloc); @@ -2036,7 +2055,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s = get_slab(size, flags); if (s) - return kmem_cache_alloc_node(s, flags, node); + return slab_alloc(s, flags, node, __builtin_return_address(0)); return NULL; } EXPORT_SYMBOL(__kmalloc_node); @@ -2081,19 +2100,93 @@ void kfree(const void *x) if (!x) return; - page = virt_to_page(x); - - if (unlikely(PageCompound(page))) - page = page->first_page; - + page = virt_to_head_page(x); s = page->slab; - if (unlikely(PageError(page) && (s->flags & SLAB_STORE_USER))) - set_tracking(s, (void *)x, TRACK_FREE); - slab_free(s, page, (void *)x); + slab_free(s, page, (void *)x, __builtin_return_address(0)); } EXPORT_SYMBOL(kfree); +/* + * kmem_cache_shrink removes empty slabs from the partial lists + * and then sorts the partially allocated slabs by the number + * of items in use. The slabs with the most items in use + * come first. New allocations will remove these from the + * partial list because they are full. The slabs with the + * least items are placed last. If it happens that the objects + * are freed then the page can be returned to the page allocator. + */ +int kmem_cache_shrink(struct kmem_cache *s) +{ + int node; + int i; + struct kmem_cache_node *n; + struct page *page; + struct page *t; + struct list_head *slabs_by_inuse = + kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); + unsigned long flags; + + if (!slabs_by_inuse) + return -ENOMEM; + + flush_all(s); + for_each_online_node(node) { + n = get_node(s, node); + + if (!n->nr_partial) + continue; + + for (i = 0; i < s->objects; i++) + INIT_LIST_HEAD(slabs_by_inuse + i); + + spin_lock_irqsave(&n->list_lock, flags); + + /* + * Build lists indexed by the items in use in + * each slab or free slabs if empty. + * + * Note that concurrent frees may occur while + * we hold the list_lock. page->inuse here is + * the upper limit. + */ + list_for_each_entry_safe(page, t, &n->partial, lru) { + if (!page->inuse && slab_trylock(page)) { + /* + * Must hold slab lock here because slab_free + * may have freed the last object and be + * waiting to release the slab. + */ + list_del(&page->lru); + n->nr_partial--; + slab_unlock(page); + discard_slab(s, page); + } else { + if (n->nr_partial > MAX_PARTIAL) + list_move(&page->lru, + slabs_by_inuse + page->inuse); + } + } + + if (n->nr_partial <= MAX_PARTIAL) + goto out; + + /* + * Rebuild the partial list with the slabs filled up + * most first and the least used slabs at the end. + */ + for (i = s->objects - 1; i >= 0; i--) + list_splice(slabs_by_inuse + i, n->partial.prev); + + out: + spin_unlock_irqrestore(&n->list_lock, flags); + } + + kfree(slabs_by_inuse); + return 0; +} +EXPORT_SYMBOL(kmem_cache_shrink); + /** * krealloc - reallocate memory. The contents will remain unchanged. * @@ -2120,10 +2213,7 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) return NULL; } - page = virt_to_page(p); - - if (unlikely(PageCompound(page))) - page = page->first_page; + page = virt_to_head_page(p); new_cache = get_slab(new_size, flags); @@ -2302,7 +2392,7 @@ void *kmem_cache_zalloc(struct kmem_cache *s, gfp_t flags) { void *x; - x = kmem_cache_alloc(s, flags); + x = slab_alloc(s, flags, -1, __builtin_return_address(0)); if (x) memset(x, 0, s->objsize); return x; @@ -2349,17 +2439,6 @@ static struct notifier_block __cpuinitdata slab_notifier = #endif -/*************************************************************** - * Compatiblility definitions - **************************************************************/ - -int kmem_cache_shrink(struct kmem_cache *s) -{ - flush_all(s); - return 0; -} -EXPORT_SYMBOL(kmem_cache_shrink); - #ifdef CONFIG_NUMA /***************************************************************** @@ -2510,37 +2589,281 @@ static void resiliency_test(void) {}; void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { struct kmem_cache *s = get_slab(size, gfpflags); - void *object; if (!s) return NULL; - object = kmem_cache_alloc(s, gfpflags); - - if (object && (s->flags & SLAB_STORE_USER)) - set_track(s, object, TRACK_ALLOC, caller); - - return object; + return slab_alloc(s, gfpflags, -1, caller); } void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, void *caller) { struct kmem_cache *s = get_slab(size, gfpflags); - void *object; if (!s) return NULL; - object = kmem_cache_alloc_node(s, gfpflags, node); + return slab_alloc(s, gfpflags, node, caller); +} - if (object && (s->flags & SLAB_STORE_USER)) - set_track(s, object, TRACK_ALLOC, caller); +#ifdef CONFIG_SYSFS - return object; +static int validate_slab(struct kmem_cache *s, struct page *page) +{ + void *p; + void *addr = page_address(page); + unsigned long map[BITS_TO_LONGS(s->objects)]; + + if (!check_slab(s, page) || + !on_freelist(s, page, NULL)) + return 0; + + /* Now we know that a valid freelist exists */ + bitmap_zero(map, s->objects); + + for(p = page->freelist; p; p = get_freepointer(s, p)) { + set_bit((p - addr) / s->size, map); + if (!check_object(s, page, p, 0)) + return 0; + } + + for(p = addr; p < addr + s->objects * s->size; p += s->size) + if (!test_bit((p - addr) / s->size, map)) + if (!check_object(s, page, p, 1)) + return 0; + return 1; } -#ifdef CONFIG_SYSFS +static void validate_slab_slab(struct kmem_cache *s, struct page *page) +{ + if (slab_trylock(page)) { + validate_slab(s, page); + slab_unlock(page); + } else + printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", + s->name, page); + + if (s->flags & DEBUG_DEFAULT_FLAGS) { + if (!PageError(page)) + printk(KERN_ERR "SLUB %s: PageError not set " + "on slab 0x%p\n", s->name, page); + } else { + if (PageError(page)) + printk(KERN_ERR "SLUB %s: PageError set on " + "slab 0x%p\n", s->name, page); + } +} + +static int validate_slab_node(struct kmem_cache *s, struct kmem_cache_node *n) +{ + unsigned long count = 0; + struct page *page; + unsigned long flags; + + spin_lock_irqsave(&n->list_lock, flags); + + list_for_each_entry(page, &n->partial, lru) { + validate_slab_slab(s, page); + count++; + } + if (count != n->nr_partial) + printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " + "counter=%ld\n", s->name, count, n->nr_partial); + + if (!(s->flags & SLAB_STORE_USER)) + goto out; + + list_for_each_entry(page, &n->full, lru) { + validate_slab_slab(s, page); + count++; + } + if (count != atomic_long_read(&n->nr_slabs)) + printk(KERN_ERR "SLUB: %s %ld slabs counted but " + "counter=%ld\n", s->name, count, + atomic_long_read(&n->nr_slabs)); + +out: + spin_unlock_irqrestore(&n->list_lock, flags); + return count; +} + +static unsigned long validate_slab_cache(struct kmem_cache *s) +{ + int node; + unsigned long count = 0; + + flush_all(s); + for_each_online_node(node) { + struct kmem_cache_node *n = get_node(s, node); + + count += validate_slab_node(s, n); + } + return count; +} + +/* + * Generate lists of locations where slabcache objects are allocated + * and freed. + */ + +struct location { + unsigned long count; + void *addr; +}; + +struct loc_track { + unsigned long max; + unsigned long count; + struct location *loc; +}; + +static void free_loc_track(struct loc_track *t) +{ + if (t->max) + free_pages((unsigned long)t->loc, + get_order(sizeof(struct location) * t->max)); +} + +static int alloc_loc_track(struct loc_track *t, unsigned long max) +{ + struct location *l; + int order; + + if (!max) + max = PAGE_SIZE / sizeof(struct location); + + order = get_order(sizeof(struct location) * max); + + l = (void *)__get_free_pages(GFP_KERNEL, order); + + if (!l) + return 0; + + if (t->count) { + memcpy(l, t->loc, sizeof(struct location) * t->count); + free_loc_track(t); + } + t->max = max; + t->loc = l; + return 1; +} + +static int add_location(struct loc_track *t, struct kmem_cache *s, + void *addr) +{ + long start, end, pos; + struct location *l; + void *caddr; + + start = -1; + end = t->count; + + for ( ; ; ) { + pos = start + (end - start + 1) / 2; + + /* + * There is nothing at "end". If we end up there + * we need to add something to before end. + */ + if (pos == end) + break; + + caddr = t->loc[pos].addr; + if (addr == caddr) { + t->loc[pos].count++; + return 1; + } + + if (addr < caddr) + end = pos; + else + start = pos; + } + + /* + * Not found. Insert new tracking element + */ + if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max)) + return 0; + + l = t->loc + pos; + if (pos < t->count) + memmove(l + 1, l, + (t->count - pos) * sizeof(struct location)); + t->count++; + l->count = 1; + l->addr = addr; + return 1; +} + +static void process_slab(struct loc_track *t, struct kmem_cache *s, + struct page *page, enum track_item alloc) +{ + void *addr = page_address(page); + unsigned long map[BITS_TO_LONGS(s->objects)]; + void *p; + + bitmap_zero(map, s->objects); + for (p = page->freelist; p; p = get_freepointer(s, p)) + set_bit((p - addr) / s->size, map); + + for (p = addr; p < addr + s->objects * s->size; p += s->size) + if (!test_bit((p - addr) / s->size, map)) { + void *addr = get_track(s, p, alloc)->addr; + + add_location(t, s, addr); + } +} + +static int list_locations(struct kmem_cache *s, char *buf, + enum track_item alloc) +{ + int n = 0; + unsigned long i; + struct loc_track t; + int node; + + t.count = 0; + t.max = 0; + + /* Push back cpu slabs */ + flush_all(s); + + for_each_online_node(node) { + struct kmem_cache_node *n = get_node(s, node); + unsigned long flags; + struct page *page; + + if (!atomic_read(&n->nr_slabs)) + continue; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + process_slab(&t, s, page, alloc); + list_for_each_entry(page, &n->full, lru) + process_slab(&t, s, page, alloc); + spin_unlock_irqrestore(&n->list_lock, flags); + } + + for (i = 0; i < t.count; i++) { + void *addr = t.loc[i].addr; + + if (n > PAGE_SIZE - 100) + break; + n += sprintf(buf + n, "%7ld ", t.loc[i].count); + if (addr) + n += sprint_symbol(buf + n, (unsigned long)t.loc[i].addr); + else + n += sprintf(buf + n, ""); + n += sprintf(buf + n, "\n"); + } + + free_loc_track(&t); + if (!t.count) + n += sprintf(buf, "No data\n"); + return n; +} static unsigned long count_partial(struct kmem_cache_node *n) { @@ -2671,7 +2994,6 @@ struct slab_attribute { static struct slab_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) - static ssize_t slab_size_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", s->size); @@ -2801,8 +3123,7 @@ SLAB_ATTR(reclaim_account); static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", !!(s->flags & - (SLAB_HWCACHE_ALIGN|SLAB_MUST_HWCACHE_ALIGN))); + return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); } SLAB_ATTR_RO(hwcache_align); @@ -2877,6 +3198,57 @@ static ssize_t store_user_store(struct kmem_cache *s, } SLAB_ATTR(store_user); +static ssize_t validate_show(struct kmem_cache *s, char *buf) +{ + return 0; +} + +static ssize_t validate_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + if (buf[0] == '1') + validate_slab_cache(s); + else + return -EINVAL; + return length; +} +SLAB_ATTR(validate); + +static ssize_t shrink_show(struct kmem_cache *s, char *buf) +{ + return 0; +} + +static ssize_t shrink_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + if (buf[0] == '1') { + int rc = kmem_cache_shrink(s); + + if (rc) + return rc; + } else + return -EINVAL; + return length; +} +SLAB_ATTR(shrink); + +static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) +{ + if (!(s->flags & SLAB_STORE_USER)) + return -ENOSYS; + return list_locations(s, buf, TRACK_ALLOC); +} +SLAB_ATTR_RO(alloc_calls); + +static ssize_t free_calls_show(struct kmem_cache *s, char *buf) +{ + if (!(s->flags & SLAB_STORE_USER)) + return -ENOSYS; + return list_locations(s, buf, TRACK_FREE); +} +SLAB_ATTR_RO(free_calls); + #ifdef CONFIG_NUMA static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) { @@ -2916,6 +3288,10 @@ static struct attribute * slab_attrs[] = { &red_zone_attr.attr, &poison_attr.attr, &store_user_attr.attr, + &validate_attr.attr, + &shrink_attr.attr, + &alloc_calls_attr.attr, + &free_calls_attr.attr, #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif @@ -3042,7 +3418,7 @@ static int sysfs_slab_add(struct kmem_cache *s) * This is typically the case for debug situations. In that * case we can catch duplicate names easily. */ - sysfs_remove_link(&slab_subsys.kset.kobj, s->name); + sysfs_remove_link(&slab_subsys.kobj, s->name); name = s->name; } else { /* @@ -3097,8 +3473,8 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) /* * If we have a leftover link then remove it. */ - sysfs_remove_link(&slab_subsys.kset.kobj, name); - return sysfs_create_link(&slab_subsys.kset.kobj, + sysfs_remove_link(&slab_subsys.kobj, name); + return sysfs_create_link(&slab_subsys.kobj, &s->kobj, name); }