X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslab.c;h=98ac20bc0de9a3ad443ef9f8bd54cb1c58ed5de4;hb=d616e09ab33aa4d013a93c9b393efd5cebf78521;hp=c32af7e7581ef0ff11ef112f94b6a84da50cafc4;hpb=5006ecc2d5073d4e52f54381fd0fee1575d4ce22;p=linux-2.6 diff --git a/mm/slab.c b/mm/slab.c index c32af7e758..98ac20bc0d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -207,11 +207,6 @@ typedef unsigned int kmem_bufctl_t; #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) -/* Max number of objs-per-slab for caches which use off-slab slabs. - * Needed to avoid a possible looping condition in cache_grow(). - */ -static unsigned long offslab_limit; - /* * struct slab * @@ -336,6 +331,8 @@ static __always_inline int index_of(const size_t size) return 0; } +static int slab_early_init = 1; + #define INDEX_AC index_of(sizeof(struct arraycache_init)) #define INDEX_L3 index_of(sizeof(struct kmem_list3)) @@ -597,6 +594,7 @@ static inline struct kmem_cache *page_get_cache(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + BUG_ON(!PageSlab(page)); return (struct kmem_cache *)page->lru.next; } @@ -609,6 +607,7 @@ static inline struct slab *page_get_slab(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + BUG_ON(!PageSlab(page)); return (struct slab *)page->lru.prev; } @@ -700,6 +699,14 @@ static enum { FULL } g_cpucache_up; +/* + * used by boot code to determine if it can use slab based allocator + */ +int slab_is_available(void) +{ + return g_cpucache_up == FULL; +} + static DEFINE_PER_CPU(struct work_struct, reap_work); static void free_block(struct kmem_cache *cachep, void **objpp, int len, @@ -1021,6 +1028,40 @@ static void drain_alien_cache(struct kmem_cache *cachep, } } } + +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +{ + struct slab *slabp = virt_to_slab(objp); + int nodeid = slabp->nodeid; + struct kmem_list3 *l3; + struct array_cache *alien = NULL; + + /* + * Make sure we are not freeing a object from another node to the array + * cache on this cpu. + */ + if (likely(slabp->nodeid == numa_node_id())) + return 0; + + l3 = cachep->nodelists[numa_node_id()]; + STATS_INC_NODEFREES(cachep); + if (l3->alien && l3->alien[nodeid]) { + alien = l3->alien[nodeid]; + spin_lock(&alien->lock); + if (unlikely(alien->avail == alien->limit)) { + STATS_INC_ACOVERFLOW(cachep); + __drain_alien_cache(cachep, alien, nodeid); + } + alien->entry[alien->avail++] = objp; + spin_unlock(&alien->lock); + } else { + spin_lock(&(cachep->nodelists[nodeid])->list_lock); + free_block(cachep, &objp, 1, nodeid); + spin_unlock(&(cachep->nodelists[nodeid])->list_lock); + } + return 1; +} + #else #define drain_alien_cache(cachep, alien) do { } while (0) @@ -1035,6 +1076,11 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) { } +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +{ + return 0; +} + #endif static int cpuup_callback(struct notifier_block *nfb, @@ -1332,6 +1378,8 @@ void __init kmem_cache_init(void) NULL, NULL); } + slab_early_init = 0; + while (sizes->cs_size != ULONG_MAX) { /* * For performance, all the general caches are L1 aligned. @@ -1348,12 +1396,6 @@ void __init kmem_cache_init(void) NULL, NULL); } - /* Inc off-slab bufctl limit until the ceiling is hit. */ - if (!(OFF_SLAB(sizes->cs_cachep))) { - offslab_limit = sizes->cs_size - sizeof(struct slab); - offslab_limit /= sizeof(kmem_bufctl_t); - } - sizes->cs_dmacachep = kmem_cache_create(names->name_dma, sizes->cs_size, ARCH_KMALLOC_MINALIGN, @@ -1453,31 +1495,29 @@ __initcall(cpucache_init); static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct page *page; - void *addr; + int nr_pages; int i; - flags |= cachep->gfpflags; #ifndef CONFIG_MMU - /* nommu uses slab's for process anonymous memory allocations, so - * requires __GFP_COMP to properly refcount higher order allocations" + /* + * Nommu uses slab's for process anonymous memory allocations, and thus + * requires __GFP_COMP to properly refcount higher order allocations */ - page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder); -#else - page = alloc_pages_node(nodeid, flags, cachep->gfporder); + flags |= __GFP_COMP; #endif + flags |= cachep->gfpflags; + + page = alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) return NULL; - addr = page_address(page); - i = (1 << cachep->gfporder); + nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_add(i, &slab_reclaim_pages); - add_page_state(nr_slab, i); - while (i--) { - __SetPageSlab(page); - page++; - } - return addr; + atomic_add(nr_pages, &slab_reclaim_pages); + add_page_state(nr_slab, nr_pages); + for (i = 0; i < nr_pages; i++) + __SetPageSlab(page + i); + return page_address(page); } /* @@ -1772,6 +1812,7 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) static size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, size_t align, unsigned long flags) { + unsigned long offslab_limit; size_t left_over = 0; int gfporder; @@ -1783,9 +1824,18 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, if (!num) continue; - /* More than offslab_limit objects will cause problems */ - if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit) - break; + if (flags & CFLGS_OFF_SLAB) { + /* + * Max number of objs-per-slab for caches which + * use off-slab slabs. Needed to avoid a possible + * looping condition in cache_grow(). + */ + offslab_limit = size - sizeof(struct slab); + offslab_limit /= sizeof(kmem_bufctl_t); + + if (num > offslab_limit) + break; + } /* Found something acceptable - save it away */ cachep->num = num; @@ -1906,8 +1956,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, void (*dtor)(void*, struct kmem_cache *, unsigned long)) { size_t left_over, slab_size, ralign; - struct kmem_cache *cachep = NULL; - struct list_head *p; + struct kmem_cache *cachep = NULL, *pc; /* * Sanity checks... these are all serious usage bugs. @@ -1927,8 +1976,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, mutex_lock(&cache_chain_mutex); - list_for_each(p, &cache_chain) { - struct kmem_cache *pc = list_entry(p, struct kmem_cache, next); + list_for_each_entry(pc, &cache_chain, next) { mm_segment_t old_fs = get_fs(); char tmp; int res; @@ -2062,8 +2110,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, #endif #endif - /* Determine if the slab management is 'on' or 'off' slab. */ - if (size >= (PAGE_SIZE >> 3)) + /* + * Determine if the slab management is 'on' or 'off' slab. + * (bootstrapping cannot cope with offslab caches so don't do + * it too early on.) + */ + if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) /* * Size is large, assume best to place the slab management obj * off-slab (should allow better packing of objs). @@ -2192,11 +2244,14 @@ static void drain_cpu_caches(struct kmem_cache *cachep) check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; - if (l3) { + if (l3 && l3->alien) + drain_alien_cache(cachep, l3->alien); + } + + for_each_online_node(node) { + l3 = cachep->nodelists[node]; + if (l3) drain_array(cachep, l3, l3->shared, 1, node); - if (l3->alien) - drain_alien_cache(cachep, l3->alien); - } } } @@ -2450,23 +2505,28 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, slabp->inuse--; } -static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, - void *objp) +/* + * Map pages beginning at addr to the given cache and slab. This is required + * for the slab allocator to be able to lookup the cache and slab of a + * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. + */ +static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, + void *addr) { - int i; + int nr_pages; struct page *page; - /* Nasty!!!!!! I hope this is OK. */ - page = virt_to_page(objp); + page = virt_to_page(addr); - i = 1; + nr_pages = 1; if (likely(!PageCompound(page))) - i <<= cachep->gfporder; + nr_pages <<= cache->gfporder; + do { - page_set_cache(page, cachep); - page_set_slab(page, slabp); + page_set_cache(page, cache); + page_set_slab(page, slab); page++; - } while (--i); + } while (--nr_pages); } /* @@ -2538,7 +2598,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) goto opps1; slabp->nodeid = nodeid; - set_slab_attr(cachep, slabp, objp); + slab_map_pages(cachep, slabp, objp); cache_init_objs(cachep, slabp, ctor_flags); @@ -2586,6 +2646,28 @@ static void kfree_debugcheck(const void *objp) } } +static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) +{ + unsigned long redzone1, redzone2; + + redzone1 = *dbg_redzone1(cache, obj); + redzone2 = *dbg_redzone2(cache, obj); + + /* + * Redzone is ok. + */ + if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE) + return; + + if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE) + slab_error(cache, "double free detected"); + else + slab_error(cache, "memory outside object was overwritten"); + + printk(KERN_ERR "%p: redzone 1:0x%lx, redzone 2:0x%lx.\n", + obj, redzone1, redzone2); +} + static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, void *caller) { @@ -2597,27 +2679,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_page(objp); - if (page_get_cache(page) != cachep) { - printk(KERN_ERR "mismatch in kmem_cache_free: expected " - "cache %p, got %p\n", - page_get_cache(page), cachep); - printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); - printk(KERN_ERR "%p is %s.\n", page_get_cache(page), - page_get_cache(page)->name); - WARN_ON(1); - } slabp = page_get_slab(page); if (cachep->flags & SLAB_RED_ZONE) { - if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || - *dbg_redzone2(cachep, objp) != RED_ACTIVE) { - slab_error(cachep, "double free, or memory outside" - " object was overwritten"); - printk(KERN_ERR "%p: redzone 1:0x%lx, " - "redzone 2:0x%lx.\n", - objp, *dbg_redzone1(cachep, objp), - *dbg_redzone2(cachep, objp)); - } + verify_redzone_free(cachep, objp); *dbg_redzone1(cachep, objp) = RED_INACTIVE; *dbg_redzone2(cachep, objp) = RED_INACTIVE; } @@ -3077,41 +3142,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - /* Make sure we are not freeing a object from another - * node to the array cache on this cpu. - */ -#ifdef CONFIG_NUMA - { - struct slab *slabp; - slabp = virt_to_slab(objp); - if (unlikely(slabp->nodeid != numa_node_id())) { - struct array_cache *alien = NULL; - int nodeid = slabp->nodeid; - struct kmem_list3 *l3; - - l3 = cachep->nodelists[numa_node_id()]; - STATS_INC_NODEFREES(cachep); - if (l3->alien && l3->alien[nodeid]) { - alien = l3->alien[nodeid]; - spin_lock(&alien->lock); - if (unlikely(alien->avail == alien->limit)) { - STATS_INC_ACOVERFLOW(cachep); - __drain_alien_cache(cachep, - alien, nodeid); - } - alien->entry[alien->avail++] = objp; - spin_unlock(&alien->lock); - } else { - spin_lock(&(cachep->nodelists[nodeid])-> - list_lock); - free_block(cachep, &objp, 1, nodeid); - spin_unlock(&(cachep->nodelists[nodeid])-> - list_lock); - } - return; - } - } -#endif + if (cache_free_alien(cachep, objp)) + return; + if (likely(ac->avail < ac->limit)) { STATS_INC_FREEHIT(cachep); ac->entry[ac->avail++] = objp; @@ -3244,26 +3277,10 @@ EXPORT_SYMBOL(kmalloc_node); #endif /** - * kmalloc - allocate memory + * __do_kmalloc - allocate memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. + * @flags: the type of memory to allocate (see kmalloc). * @caller: function caller for debug tracking of the caller - * - * kmalloc is the normal method of allocating memory - * in the kernel. - * - * The @flags argument may be one of: - * - * %GFP_USER - Allocate memory on behalf of user. May sleep. - * - * %GFP_KERNEL - Allocate normal kernel ram. May sleep. - * - * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers. - * - * Additionally, the %GFP_DMA flag may be set to indicate the memory - * must be suitable for DMA. This can mean different things on different - * platforms. For example, on i386, it means that the memory must come - * from the first 16MB. */ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller) @@ -3361,6 +3378,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; + BUG_ON(virt_to_cache(objp) != cachep); + local_irq_save(flags); __cache_free(cachep, objp); local_irq_restore(flags); @@ -3670,7 +3689,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, */ static void cache_reap(void *unused) { - struct list_head *walk; + struct kmem_cache *searchp; struct kmem_list3 *l3; int node = numa_node_id(); @@ -3681,13 +3700,11 @@ static void cache_reap(void *unused) return; } - list_for_each(walk, &cache_chain) { - struct kmem_cache *searchp; + list_for_each_entry(searchp, &cache_chain, next) { struct list_head *p; int tofree; struct slab *slabp; - searchp = list_entry(walk, struct kmem_cache, next); check_irq_on(); /* @@ -3815,7 +3832,6 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = p; - struct list_head *q; struct slab *slabp; unsigned long active_objs; unsigned long num_objs; @@ -3836,15 +3852,13 @@ static int s_show(struct seq_file *m, void *p) check_irq_on(); spin_lock_irq(&l3->list_lock); - list_for_each(q, &l3->slabs_full) { - slabp = list_entry(q, struct slab, list); + list_for_each_entry(slabp, &l3->slabs_full, list) { if (slabp->inuse != cachep->num && !error) error = "slabs_full accounting error"; active_objs += cachep->num; active_slabs++; } - list_for_each(q, &l3->slabs_partial) { - slabp = list_entry(q, struct slab, list); + list_for_each_entry(slabp, &l3->slabs_partial, list) { if (slabp->inuse == cachep->num && !error) error = "slabs_partial inuse accounting error"; if (!slabp->inuse && !error) @@ -3852,8 +3866,7 @@ static int s_show(struct seq_file *m, void *p) active_objs += slabp->inuse; active_slabs++; } - list_for_each(q, &l3->slabs_free) { - slabp = list_entry(q, struct slab, list); + list_for_each_entry(slabp, &l3->slabs_free, list) { if (slabp->inuse && !error) error = "slabs_free/inuse accounting error"; num_slabs++; @@ -3946,7 +3959,7 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, { char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; int limit, batchcount, shared, res; - struct list_head *p; + struct kmem_cache *cachep; if (count > MAX_SLABINFO_WRITE) return -EINVAL; @@ -3965,10 +3978,7 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, /* Find the cache in the chain of caches. */ mutex_lock(&cache_chain_mutex); res = -EINVAL; - list_for_each(p, &cache_chain) { - struct kmem_cache *cachep; - - cachep = list_entry(p, struct kmem_cache, next); + list_for_each_entry(cachep, &cache_chain, next) { if (!strcmp(cachep->name, kbuf)) { if (limit < 1 || batchcount < 1 || batchcount > limit || shared < 0) { @@ -4070,7 +4080,6 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = p; - struct list_head *q; struct slab *slabp; struct kmem_list3 *l3; const char *name; @@ -4095,14 +4104,10 @@ static int leaks_show(struct seq_file *m, void *p) check_irq_on(); spin_lock_irq(&l3->list_lock); - list_for_each(q, &l3->slabs_full) { - slabp = list_entry(q, struct slab, list); + list_for_each_entry(slabp, &l3->slabs_full, list) handle_slab(n, cachep, slabp); - } - list_for_each(q, &l3->slabs_partial) { - slabp = list_entry(q, struct slab, list); + list_for_each_entry(slabp, &l3->slabs_partial, list) handle_slab(n, cachep, slabp); - } spin_unlock_irq(&l3->list_lock); } name = cachep->name;