X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslub.c;h=c81f52a7215345c3e3f08ca3fccfbfb02afbaeb1;hb=5e6d444ea1f72b8148354a9baf0ea8fa3dd0425b;hp=c58a974d15acec2a1c7f1899b2e93abe11b231ad;hpb=35e5d7ee27680aef6dc3fab45a5ecd9952d9791a;p=linux-2.6 diff --git a/mm/slub.c b/mm/slub.c index c58a974d15..c81f52a721 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -89,17 +89,25 @@ static inline int SlabDebug(struct page *page) { +#ifdef CONFIG_SLUB_DEBUG return PageError(page); +#else + return 0; +#endif } static inline void SetSlabDebug(struct page *page) { +#ifdef CONFIG_SLUB_DEBUG SetPageError(page); +#endif } static inline void ClearSlabDebug(struct page *page) { +#ifdef CONFIG_SLUB_DEBUG ClearPageError(page); +#endif } /* @@ -195,7 +203,19 @@ static enum { static DECLARE_RWSEM(slub_lock); LIST_HEAD(slab_caches); -#ifdef CONFIG_SYSFS +/* + * Tracking user of a slab. + */ +struct track { + void *addr; /* Called from address */ + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; + +enum track_item { TRACK_ALLOC, TRACK_FREE }; + +#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); @@ -223,6 +243,23 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } +static inline int check_valid_pointer(struct kmem_cache *s, + struct page *page, const void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + if (object < base || object >= base + s->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + /* * Slow version of get and set free pointer. * @@ -255,6 +292,14 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) return (p - addr) / s->size; } +#ifdef CONFIG_SLUB_DEBUG +/* + * Debug settings: + */ +static int slub_debug; + +static char *slub_debug_slabs; + /* * Object debugging */ @@ -290,18 +335,6 @@ static void print_section(char *text, u8 *addr, unsigned int length) } } -/* - * Tracking user of a slab. - */ -struct track { - void *addr; /* Called from address */ - int cpu; /* Was running on cpu */ - int pid; /* Pid context */ - unsigned long when; /* When did the operation occur */ -}; - -enum track_item { TRACK_ALLOC, TRACK_FREE }; - static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc) { @@ -436,23 +469,6 @@ static int check_bytes(u8 *start, unsigned int value, unsigned int bytes) return 1; } -static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, const void *object) -{ - void *base; - - if (!object) - return 1; - - base = page_address(page); - if (object < base || object >= base + s->objects * s->size || - (object - base) % s->size) { - return 0; - } - - return 1; -} - /* * Object layout: * @@ -805,6 +821,113 @@ fail: return 0; } +static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) +{ + if (s->flags & SLAB_TRACE) { + printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", + s->name, + alloc ? "alloc" : "free", + object, page->inuse, + page->freelist); + + if (!alloc) + print_section("Object", (void *)object, s->objsize); + + dump_stack(); + } +} + +static int __init setup_slub_debug(char *str) +{ + if (!str || *str != '=') + slub_debug = DEBUG_DEFAULT_FLAGS; + else { + str++; + if (*str == 0 || *str == ',') + slub_debug = DEBUG_DEFAULT_FLAGS; + else + for( ;*str && *str != ','; str++) + switch (*str) { + case 'f' : case 'F' : + slub_debug |= SLAB_DEBUG_FREE; + break; + case 'z' : case 'Z' : + slub_debug |= SLAB_RED_ZONE; + break; + case 'p' : case 'P' : + slub_debug |= SLAB_POISON; + break; + case 'u' : case 'U' : + slub_debug |= SLAB_STORE_USER; + break; + case 't' : case 'T' : + slub_debug |= SLAB_TRACE; + break; + default: + printk(KERN_ERR "slub_debug option '%c' " + "unknown. skipped\n",*str); + } + } + + if (*str == ',') + slub_debug_slabs = str + 1; + return 1; +} + +__setup("slub_debug", setup_slub_debug); + +static void kmem_cache_open_debug_check(struct kmem_cache *s) +{ + /* + * The page->offset field is only 16 bit wide. This is an offset + * in units of words from the beginning of an object. If the slab + * size is bigger then we cannot move the free pointer behind the + * object anymore. + * + * On 32 bit platforms the limit is 256k. On 64bit platforms + * the limit is 512k. + * + * Debugging or ctor/dtors may create a need to move the free + * pointer. Fail if this happens. + */ + if (s->size >= 65535 * sizeof(void *)) { + BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | + SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); + BUG_ON(s->ctor || s->dtor); + } + else + /* + * Enable debugging if selected on the kernel commandline. + */ + if (slub_debug && (!slub_debug_slabs || + strncmp(slub_debug_slabs, s->name, + strlen(slub_debug_slabs)) == 0)) + s->flags |= slub_debug; +} +#else + +static inline int alloc_object_checks(struct kmem_cache *s, + struct page *page, void *object) { return 0; } + +static inline int free_object_checks(struct kmem_cache *s, + struct page *page, void *object) { return 0; } + +static inline void add_full(struct kmem_cache_node *n, struct page *page) {} +static inline void remove_full(struct kmem_cache *s, struct page *page) {} +static inline void trace(struct kmem_cache *s, struct page *page, + void *object, int alloc) {} +static inline void init_object(struct kmem_cache *s, + void *object, int active) {} +static inline void init_tracking(struct kmem_cache *s, void *object) {} +static inline int slab_pad_check(struct kmem_cache *s, struct page *page) + { return 1; } +static inline int check_object(struct kmem_cache *s, struct page *page, + void *object, int active) { return 1; } +static inline void set_track(struct kmem_cache *s, void *object, + enum track_item alloc, void *addr) {} +static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} +#define slub_debug 0 +#endif /* * Slab allocation and freeing */ @@ -1289,12 +1412,7 @@ debug: goto another_slab; if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - dump_stack(); - } + trace(s, page, object, 1); init_object(s, object, 1); goto have_object; } @@ -1379,13 +1497,7 @@ debug: remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, x, TRACK_FREE, addr); - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - print_section("Object", (void *)object, s->objsize); - dump_stack(); - } + trace(s, page, object, 0); init_object(s, object, 0); goto checks_ok; } @@ -1440,13 +1552,6 @@ static int slub_min_objects = DEFAULT_MIN_OBJECTS; */ static int slub_nomerge; -/* - * Debug settings: - */ -static int slub_debug; - -static char *slub_debug_slabs; - /* * Calculate the order of allocation given an slab object size. * @@ -1472,34 +1577,75 @@ static char *slub_debug_slabs; * requested a higher mininum order then we start with that one instead of * the smallest order which will fit the object. */ -static int calculate_order(int size) +static inline int slab_order(int size, int min_objects, + int max_order, int fract_leftover) { int order; int rem; - for (order = max(slub_min_order, fls(size - 1) - PAGE_SHIFT); - order < MAX_ORDER; order++) { - unsigned long slab_size = PAGE_SIZE << order; + for (order = max(slub_min_order, + fls(min_objects * size - 1) - PAGE_SHIFT); + order <= max_order; order++) { - if (slub_max_order > order && - slab_size < slub_min_objects * size) - continue; + unsigned long slab_size = PAGE_SIZE << order; - if (slab_size < size) + if (slab_size < min_objects * size) continue; rem = slab_size % size; - if (rem <= slab_size / 8) + if (rem <= slab_size / fract_leftover) break; } - if (order >= MAX_ORDER) - return -E2BIG; return order; } +static inline int calculate_order(int size) +{ + int order; + int min_objects; + int fraction; + + /* + * Attempt to find best configuration for a slab. This + * works by first attempting to generate a layout with + * the best configuration and backing off gradually. + * + * First we reduce the acceptable waste in a slab. Then + * we reduce the minimum objects required in a slab. + */ + min_objects = slub_min_objects; + while (min_objects > 1) { + fraction = 8; + while (fraction >= 4) { + order = slab_order(size, min_objects, + slub_max_order, fraction); + if (order <= slub_max_order) + return order; + fraction /= 2; + } + min_objects /= 2; + } + + /* + * We were unable to place multiple objects in a slab. Now + * lets see if we can place a single object there. + */ + order = slab_order(size, 1, slub_max_order, 1); + if (order <= slub_max_order) + return order; + + /* + * Doh this slab cannot be placed using slub_max_order. + */ + order = slab_order(size, 1, MAX_ORDER, 1); + if (order <= MAX_ORDER) + return order; + return -ENOSYS; +} + /* * Figure out what the alignment of the objects will be. */ @@ -1655,6 +1801,7 @@ static int calculate_sizes(struct kmem_cache *s) */ size = ALIGN(size, sizeof(void *)); +#ifdef CONFIG_SLUB_DEBUG /* * If we are Redzoning then check if there is some space between the * end of the object and the free pointer. If not then add an @@ -1662,6 +1809,7 @@ static int calculate_sizes(struct kmem_cache *s) */ if ((flags & SLAB_RED_ZONE) && size == s->objsize) size += sizeof(void *); +#endif /* * With that we have determined the number of bytes in actual use @@ -1669,6 +1817,7 @@ static int calculate_sizes(struct kmem_cache *s) */ s->inuse = size; +#ifdef CONFIG_SLUB_DEBUG if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || s->ctor || s->dtor)) { /* @@ -1699,6 +1848,7 @@ static int calculate_sizes(struct kmem_cache *s) * of the object. */ size += sizeof(void *); +#endif /* * Determine the alignment based on various parameters that the @@ -1748,32 +1898,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, s->objsize = size; s->flags = flags; s->align = align; - - /* - * The page->offset field is only 16 bit wide. This is an offset - * in units of words from the beginning of an object. If the slab - * size is bigger then we cannot move the free pointer behind the - * object anymore. - * - * On 32 bit platforms the limit is 256k. On 64bit platforms - * the limit is 512k. - * - * Debugging or ctor/dtors may create a need to move the free - * pointer. Fail if this happens. - */ - if (s->size >= 65535 * sizeof(void *)) { - BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(ctor || dtor); - } - else - /* - * Enable debugging if selected on the kernel commandline. - */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, - strlen(slub_debug_slabs)) == 0)) - s->flags |= slub_debug; + kmem_cache_open_debug_check(s); if (!calculate_sizes(s)) goto error; @@ -1944,45 +2069,6 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -static int __init setup_slub_debug(char *str) -{ - if (!str || *str != '=') - slub_debug = DEBUG_DEFAULT_FLAGS; - else { - str++; - if (*str == 0 || *str == ',') - slub_debug = DEBUG_DEFAULT_FLAGS; - else - for( ;*str && *str != ','; str++) - switch (*str) { - case 'f' : case 'F' : - slub_debug |= SLAB_DEBUG_FREE; - break; - case 'z' : case 'Z' : - slub_debug |= SLAB_RED_ZONE; - break; - case 'p' : case 'P' : - slub_debug |= SLAB_POISON; - break; - case 'u' : case 'U' : - slub_debug |= SLAB_STORE_USER; - break; - case 't' : case 'T' : - slub_debug |= SLAB_TRACE; - break; - default: - printk(KERN_ERR "slub_debug option '%c' " - "unknown. skipped\n",*str); - } - } - - if (*str == ',') - slub_debug_slabs = str + 1; - return 1; -} - -__setup("slub_debug", setup_slub_debug); - static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, const char *name, int size, gfp_t gfp_flags) { @@ -2549,8 +2635,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return slab_alloc(s, gfpflags, node, caller); } -#ifdef CONFIG_SYSFS - +#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page) { void *p; @@ -2707,6 +2792,13 @@ static void resiliency_test(void) {}; struct location { unsigned long count; void *addr; + long long sum_time; + long min_time; + long max_time; + long min_pid; + long max_pid; + cpumask_t cpus; + nodemask_t nodes; }; struct loc_track { @@ -2747,11 +2839,12 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max) } static int add_location(struct loc_track *t, struct kmem_cache *s, - void *addr) + const struct track *track) { long start, end, pos; struct location *l; void *caddr; + unsigned long age = jiffies - track->when; start = -1; end = t->count; @@ -2767,12 +2860,29 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, break; caddr = t->loc[pos].addr; - if (addr == caddr) { - t->loc[pos].count++; + if (track->addr == caddr) { + + l = &t->loc[pos]; + l->count++; + if (track->when) { + l->sum_time += age; + if (age < l->min_time) + l->min_time = age; + if (age > l->max_time) + l->max_time = age; + + if (track->pid < l->min_pid) + l->min_pid = track->pid; + if (track->pid > l->max_pid) + l->max_pid = track->pid; + + cpu_set(track->cpu, l->cpus); + } + node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; } - if (addr < caddr) + if (track->addr < caddr) end = pos; else start = pos; @@ -2790,7 +2900,16 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, (t->count - pos) * sizeof(struct location)); t->count++; l->count = 1; - l->addr = addr; + l->addr = track->addr; + l->sum_time = age; + l->min_time = age; + l->max_time = age; + l->min_pid = track->pid; + l->max_pid = track->pid; + cpus_clear(l->cpus); + cpu_set(track->cpu, l->cpus); + nodes_clear(l->nodes); + node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; } @@ -2806,11 +2925,8 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, set_bit(slab_index(p, s, addr), map); for_each_object(p, s, addr) - if (!test_bit(slab_index(p, s, addr), map)) { - void *addr = get_track(s, p, alloc)->addr; - - add_location(t, s, addr); - } + if (!test_bit(slab_index(p, s, addr), map)) + add_location(t, s, get_track(s, p, alloc)); } static int list_locations(struct kmem_cache *s, char *buf, @@ -2844,15 +2960,47 @@ static int list_locations(struct kmem_cache *s, char *buf, } for (i = 0; i < t.count; i++) { - void *addr = t.loc[i].addr; + struct location *l = &t.loc[i]; if (n > PAGE_SIZE - 100) break; - n += sprintf(buf + n, "%7ld ", t.loc[i].count); - if (addr) - n += sprint_symbol(buf + n, (unsigned long)t.loc[i].addr); + n += sprintf(buf + n, "%7ld ", l->count); + + if (l->addr) + n += sprint_symbol(buf + n, (unsigned long)l->addr); else n += sprintf(buf + n, ""); + + if (l->sum_time != l->min_time) { + unsigned long remainder; + + n += sprintf(buf + n, " age=%ld/%ld/%ld", + l->min_time, + div_long_long_rem(l->sum_time, l->count, &remainder), + l->max_time); + } else + n += sprintf(buf + n, " age=%ld", + l->min_time); + + if (l->min_pid != l->max_pid) + n += sprintf(buf + n, " pid=%ld-%ld", + l->min_pid, l->max_pid); + else + n += sprintf(buf + n, " pid=%ld", + l->min_pid); + + if (num_online_cpus() > 1 && !cpus_empty(l->cpus)) { + n += sprintf(buf + n, " cpus="); + n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, + l->cpus); + } + + if (num_online_nodes() > 1 && !nodes_empty(l->nodes)) { + n += sprintf(buf + n, " nodes="); + n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, + l->nodes); + } + n += sprintf(buf + n, "\n"); }