X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslob.c;h=ec33fcdc852e2c72ecc95194ada98bf702d3a14a;hb=0ce49a3945474fc942ec37c0c0efece60f592f80;hp=8ee64fed2bb56db97792b7fa4a7d8dc665123979;hpb=95b35127f13661abb0dc3459042cdb417d21e692;p=linux-2.6 diff --git a/mm/slob.c b/mm/slob.c index 8ee64fed2b..ec33fcdc85 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -3,33 +3,51 @@ * * Matt Mackall 12/30/03 * + * NUMA support by Paul Mundt, 2007. + * * How SLOB works: * * The core of SLOB is a traditional K&R style heap allocator, with * support for returning aligned objects. The granularity of this - * allocator is 4 bytes on 32-bit and 8 bytes on 64-bit, though it - * could be as low as 2 if the compiler alignment requirements allow. + * allocator is as little as 2 bytes, however typically most architectures + * will require 4 bytes on 32-bit and 8 bytes on 64-bit. * - * The slob heap is a linked list of pages from __get_free_page, and + * The slob heap is a linked list of pages from alloc_pages(), and * within each page, there is a singly-linked list of free blocks (slob_t). * The heap is grown on demand and allocation from the heap is currently * first-fit. * * Above this is an implementation of kmalloc/kfree. Blocks returned - * from kmalloc are 4-byte aligned and prepended with a 4-byte header. + * from kmalloc are prepended with a 4-byte header with the kmalloc size. * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls - * __get_free_pages directly so that it can return page-aligned blocks - * and keeps a linked list of such pages and their orders. These - * objects are detected in kfree() by their page alignment. + * alloc_pages() directly, allocating compound pages so the page order + * does not have to be separately tracked, and also stores the exact + * allocation size in page->private so that it can be used to accurately + * provide ksize(). These objects are detected in kfree() because slob_page() + * is false for them. * * SLAB is emulated on top of SLOB by simply calling constructors and * destructors for every SLAB allocation. Objects are returned with the * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which * case the low-level allocator will fragment blocks to create the proper * alignment. Again, objects of page-size or greater are allocated by - * calling __get_free_pages. As SLAB objects know their size, no separate + * calling alloc_pages(). As SLAB objects know their size, no separate * size bookkeeping is necessary and there is essentially no allocation - * space overhead. + * space overhead, and compound pages aren't needed for multi-page + * allocations. + * + * NUMA support in SLOB is fairly simplistic, pushing most of the real + * logic down to the page allocator, and simply doing the node accounting + * on the upper levels. In the event that a node id is explicitly + * provided, alloc_pages_node() with the specified node id is used + * instead. The common case (or when the node id isn't explicitly provided) + * will default to the current node, as per numa_node_id(). + * + * Node aware pages are still inserted in to the global freelist, and + * these are scanned for by matching against the node id encoded in the + * page flags. As a result, block allocations that can be satisfied from + * the freelist will only be done so on pages residing on the same node, + * in order to prevent random node placement. */ #include @@ -42,13 +60,6 @@ #include #include -/* SLOB_MIN_ALIGN == sizeof(long) */ -#if BITS_PER_BYTE == 32 -#define SLOB_MIN_ALIGN 4 -#else -#define SLOB_MIN_ALIGN 8 -#endif - /* * slob_block has a field 'units', which indicates size of block if +ve, * or offset of next block if -ve (in SLOB_UNITs). @@ -57,19 +68,15 @@ * Those with larger size contain their size in the first SLOB_UNIT of * memory, and the offset of the next free block in the second SLOB_UNIT. */ -#if PAGE_SIZE <= (32767 * SLOB_MIN_ALIGN) +#if PAGE_SIZE <= (32767 * 2) typedef s16 slobidx_t; #else typedef s32 slobidx_t; #endif -/* - * Align struct slob_block to long for now, but can some embedded - * architectures get away with less? - */ struct slob_block { slobidx_t units; -} __attribute__((aligned(SLOB_MIN_ALIGN))); +}; typedef struct slob_block slob_t; /* @@ -212,6 +219,23 @@ static int slob_last(slob_t *s) return !((unsigned long)slob_next(s) & ~PAGE_MASK); } +static void *slob_new_page(gfp_t gfp, int order, int node) +{ + void *page; + +#ifdef CONFIG_NUMA + if (node != -1) + page = alloc_pages_node(node, gfp, order); + else +#endif + page = alloc_pages(gfp, order); + + if (!page) + return NULL; + + return page_address(page); +} + /* * Allocate a slob block within a given slob_page sp. */ @@ -266,26 +290,46 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) /* * slob_alloc: entry point into the slob allocator. */ -static void *slob_alloc(size_t size, gfp_t gfp, int align) +static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) { struct slob_page *sp; + struct list_head *prev; slob_t *b = NULL; unsigned long flags; spin_lock_irqsave(&slob_lock, flags); /* Iterate through each partially free page, try to find room */ list_for_each_entry(sp, &free_slob_pages, list) { - if (sp->units >= SLOB_UNITS(size)) { - b = slob_page_alloc(sp, size, align); - if (b) - break; - } +#ifdef CONFIG_NUMA + /* + * If there's a node specification, search for a partial + * page with a matching node id in the freelist. + */ + if (node != -1 && page_to_nid(&sp->page) != node) + continue; +#endif + /* Enough room on this page? */ + if (sp->units < SLOB_UNITS(size)) + continue; + + /* Attempt to alloc */ + prev = sp->list.prev; + b = slob_page_alloc(sp, size, align); + if (!b) + continue; + + /* Improve fragment distribution and reduce our average + * search time by starting our next search here. (see + * Knuth vol 1, sec 2.5, pg 449) */ + if (free_slob_pages.next != prev->next) + list_move_tail(&free_slob_pages, prev->next); + break; } spin_unlock_irqrestore(&slob_lock, flags); /* Not enough space: must allocate a new page */ if (!b) { - b = (slob_t *)__get_free_page(gfp); + b = slob_new_page(gfp, 0, node); if (!b) return 0; sp = (struct slob_page *)virt_to_page(b); @@ -301,6 +345,8 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align) BUG_ON(!b); spin_unlock_irqrestore(&slob_lock, flags); } + if (unlikely((gfp & __GFP_ZERO) && b)) + memset(b, 0, size); return b; } @@ -314,7 +360,7 @@ static void slob_free(void *block, int size) slobidx_t units; unsigned long flags; - if (!block) + if (ZERO_OR_NULL_PTR(block)) return; BUG_ON(!size); @@ -381,140 +427,71 @@ out: * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. */ -struct bigblock { - int order; - void *pages; - struct bigblock *next; -}; -typedef struct bigblock bigblock_t; - -static bigblock_t *bigblocks; - -static DEFINE_SPINLOCK(block_lock); - - -void *__kmalloc(size_t size, gfp_t gfp) -{ - slob_t *m; - bigblock_t *bb; - unsigned long flags; - - if (size < PAGE_SIZE - SLOB_UNIT) { - m = slob_alloc(size + SLOB_UNIT, gfp, 0); - if (m) - m->units = size; - return m+1; - } - - bb = slob_alloc(sizeof(bigblock_t), gfp, 0); - if (!bb) - return 0; - - bb->order = get_order(size); - bb->pages = (void *)__get_free_pages(gfp, bb->order); - - if (bb->pages) { - spin_lock_irqsave(&block_lock, flags); - bb->next = bigblocks; - bigblocks = bb; - spin_unlock_irqrestore(&block_lock, flags); - return bb->pages; - } +#ifndef ARCH_KMALLOC_MINALIGN +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) +#endif - slob_free(bb, sizeof(bigblock_t)); - return 0; -} -EXPORT_SYMBOL(__kmalloc); +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) +#endif -/** - * krealloc - reallocate memory. The contents will remain unchanged. - * - * @p: object to reallocate memory for. - * @new_size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * The contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. If @p is %NULL, krealloc() - * behaves exactly like kmalloc(). If @size is 0 and @p is not a - * %NULL pointer, the object pointed to is freed. - */ -void *krealloc(const void *p, size_t new_size, gfp_t flags) +void *__kmalloc_node(size_t size, gfp_t gfp, int node) { - void *ret; + unsigned int *m; + int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); - if (unlikely(!p)) - return kmalloc_track_caller(new_size, flags); + if (size < PAGE_SIZE - align) { + if (!size) + return ZERO_SIZE_PTR; - if (unlikely(!new_size)) { - kfree(p); - return NULL; - } + m = slob_alloc(size + align, gfp, align, node); + if (m) + *m = size; + return (void *)m + align; + } else { + void *ret; - ret = kmalloc_track_caller(new_size, flags); - if (ret) { - memcpy(ret, p, min(new_size, ksize(p))); - kfree(p); + ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); + if (ret) { + struct page *page; + page = virt_to_page(ret); + page->private = size; + } + return ret; } - return ret; } -EXPORT_SYMBOL(krealloc); +EXPORT_SYMBOL(__kmalloc_node); void kfree(const void *block) { struct slob_page *sp; - slob_t *m; - bigblock_t *bb, **last = &bigblocks; - unsigned long flags; - if (!block) + if (ZERO_OR_NULL_PTR(block)) return; sp = (struct slob_page *)virt_to_page(block); - if (!slob_page(sp)) { - /* on the big block list */ - spin_lock_irqsave(&block_lock, flags); - for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) { - if (bb->pages == block) { - *last = bb->next; - spin_unlock_irqrestore(&block_lock, flags); - free_pages((unsigned long)block, bb->order); - slob_free(bb, sizeof(bigblock_t)); - return; - } - } - spin_unlock_irqrestore(&block_lock, flags); - WARN_ON(1); - return; - } - - m = (slob_t *)block - 1; - slob_free(m, m->units + SLOB_UNIT); - return; + if (slob_page(sp)) { + int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int *m = (unsigned int *)(block - align); + slob_free(m, *m + align); + } else + put_page(&sp->page); } - EXPORT_SYMBOL(kfree); +/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ size_t ksize(const void *block) { struct slob_page *sp; - bigblock_t *bb; - unsigned long flags; - if (!block) + if (ZERO_OR_NULL_PTR(block)) return 0; sp = (struct slob_page *)virt_to_page(block); - if (!slob_page(sp)) { - spin_lock_irqsave(&block_lock, flags); - for (bb = bigblocks; bb; bb = bb->next) - if (bb->pages == block) { - spin_unlock_irqrestore(&slob_lock, flags); - return PAGE_SIZE << bb->order; - } - spin_unlock_irqrestore(&block_lock, flags); - } - - return ((slob_t *)block - 1)->units + SLOB_UNIT; + if (slob_page(sp)) + return ((slob_t *)block - 1)->units + SLOB_UNIT; + else + return sp->page.private; } struct kmem_cache { @@ -526,12 +503,11 @@ struct kmem_cache { struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long), - void (*dtor)(void*, struct kmem_cache *, unsigned long)) + void (*ctor)(void*, struct kmem_cache *, unsigned long)) { struct kmem_cache *c; - c = slob_alloc(sizeof(struct kmem_cache), flags, 0); + c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); if (c) { c->name = name; @@ -544,6 +520,8 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, c->ctor = ctor; /* ignore alignment unless it's forced */ c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; + if (c->align < ARCH_SLAB_MINALIGN) + c->align = ARCH_SLAB_MINALIGN; if (c->align < align) c->align = align; } else if (flags & SLAB_PANIC) @@ -559,31 +537,21 @@ void kmem_cache_destroy(struct kmem_cache *c) } EXPORT_SYMBOL(kmem_cache_destroy); -void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) +void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; if (c->size < PAGE_SIZE) - b = slob_alloc(c->size, flags, c->align); + b = slob_alloc(c->size, flags, c->align, node); else - b = (void *)__get_free_pages(flags, get_order(c->size)); + b = slob_new_page(flags, get_order(c->size), node); if (c->ctor) c->ctor(b, c, 0); return b; } -EXPORT_SYMBOL(kmem_cache_alloc); - -void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) -{ - void *ret = kmem_cache_alloc(c, flags); - if (ret) - memset(ret, 0, c->size); - - return ret; -} -EXPORT_SYMBOL(kmem_cache_zalloc); +EXPORT_SYMBOL(kmem_cache_alloc_node); static void __kmem_cache_free(void *b, int size) { @@ -638,6 +606,14 @@ int kmem_ptr_validate(struct kmem_cache *a, const void *b) return 0; } +static unsigned int slob_ready __read_mostly; + +int slab_is_available(void) +{ + return slob_ready; +} + void __init kmem_cache_init(void) { + slob_ready = 1; }