From 7cc718d56c8297bd3a3c106ca09e8abf9814bb27 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 10 Feb 2008 11:21:54 +0100 Subject: slab: avoid double initialization & do initialization in 1 place - alloc_slabmgmt: initialize all slab fields in 1 place - slab->nodeid was initialized twice: in alloc_slabmgmt and immediately after it in cache_grow Signed-off-by: Marcin Slusarz CC: Christoph Lameter Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slab.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 40c00dacbe4..473e6c2eaef 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2630,6 +2630,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, slabp->colouroff = colour_off; slabp->s_mem = objp + colour_off; slabp->nodeid = nodeid; + slabp->free = 0; return slabp; } @@ -2683,7 +2684,6 @@ static void cache_init_objs(struct kmem_cache *cachep, slab_bufctl(slabp)[i] = i + 1; } slab_bufctl(slabp)[i - 1] = BUFCTL_END; - slabp->free = 0; } static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) @@ -2816,7 +2816,6 @@ static int cache_grow(struct kmem_cache *cachep, if (!slabp) goto opps1; - slabp->nodeid = nodeid; slab_map_pages(cachep, slabp, objp); cache_init_objs(cachep, slabp); -- cgit v1.2.3 From a973e9dd1e140a65bed694a2c5c8d53e9cba1a23 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 1 Mar 2008 13:40:44 -0800 Subject: Revert "unique end pointer" patch This only made sense for the alternate fastpath which was reverted last week. Mathieu is working on a new version that addresses the fastpath issues but that new code first needs to go through mm and it is not clear if we need the unique end pointers with his new scheme. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- include/linux/mm_types.h | 5 +--- mm/slub.c | 70 ++++++++++++++++-------------------------------- 2 files changed, 24 insertions(+), 51 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bfee0bd1d43..34023c65d46 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -64,10 +64,7 @@ struct page { #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; #endif - struct { - struct kmem_cache *slab; /* SLUB: Pointer to slab */ - void *end; /* SLUB: end marker */ - }; + struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct page *first_page; /* Compound tail pages */ }; union { diff --git a/mm/slub.c b/mm/slub.c index 74c65af0a54..a873953e5a1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,32 +291,15 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } -/* - * The end pointer in a slab is special. It points to the first object in the - * slab but has bit 0 set to mark it. - * - * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 - * in the mapping set. - */ -static inline int is_end(void *addr) -{ - return (unsigned long)addr & PAGE_MAPPING_ANON; -} - -static void *slab_address(struct page *page) -{ - return page->end - PAGE_MAPPING_ANON; -} - static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { void *base; - if (object == page->end) + if (!object) return 1; - base = slab_address(page); + base = page_address(page); if (object < base || object >= base + s->objects * s->size || (object - base) % s->size) { return 0; @@ -349,8 +332,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Scan freelist */ #define for_each_free_object(__p, __s, __free) \ - for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ - __p)) + for (__p = (__free); __p; __p = get_freepointer((__s), __p)) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -502,7 +484,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = slab_address(page); + u8 *addr = page_address(page); print_tracking(s, p); @@ -680,7 +662,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!(s->flags & SLAB_POISON)) return 1; - start = slab_address(page); + start = page_address(page); end = start + (PAGE_SIZE << s->order); length = s->objects * s->size; remainder = end - (start + length); @@ -748,7 +730,7 @@ static int check_object(struct kmem_cache *s, struct page *page, * of the free objects in this slab. May cause * another error because the object count is now wrong. */ - set_freepointer(s, p, page->end); + set_freepointer(s, p, NULL); return 0; } return 1; @@ -782,18 +764,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) void *fp = page->freelist; void *object = NULL; - while (fp != page->end && nr <= s->objects) { + while (fp && nr <= s->objects) { if (fp == search) return 1; if (!check_valid_pointer(s, page, fp)) { if (object) { object_err(s, page, object, "Freechain corrupt"); - set_freepointer(s, object, page->end); + set_freepointer(s, object, NULL); break; } else { slab_err(s, page, "Freepointer corrupt"); - page->freelist = page->end; + page->freelist = NULL; page->inuse = s->objects; slab_fix(s, "Freelist cleared"); return 0; @@ -899,7 +881,7 @@ bad: */ slab_fix(s, "Marking all objects used"); page->inuse = s->objects; - page->freelist = page->end; + page->freelist = NULL; } return 0; } @@ -939,7 +921,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, } /* Special debug activities for freeing objects */ - if (!SlabFrozen(page) && page->freelist == page->end) + if (!SlabFrozen(page) && !page->freelist) remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); @@ -1124,7 +1106,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) SetSlabDebug(page); start = page_address(page); - page->end = start + 1; if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << s->order); @@ -1136,7 +1117,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) last = p; } setup_object(s, page, last); - set_freepointer(s, last, page->end); + set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; @@ -1152,7 +1133,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) void *p; slab_pad_check(s, page); - for_each_object(p, s, slab_address(page)) + for_each_object(p, s, page_address(page)) check_object(s, page, p, 0); ClearSlabDebug(page); } @@ -1162,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); - page->mapping = NULL; __free_pages(page, s->order); } @@ -1366,7 +1346,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) ClearSlabFrozen(page); if (page->inuse) { - if (page->freelist != page->end) { + if (page->freelist) { add_partial(n, page, tail); stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { @@ -1410,12 +1390,8 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) * Merge cpu freelist into freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. - * - * We need to use _is_end here because deactivate slab may - * be called for a debug slab. Then c->freelist may contain - * a dummy pointer. */ - while (unlikely(!is_end(c->freelist))) { + while (unlikely(c->freelist)) { void **object; tail = 0; /* Hot objects. Put the slab first */ @@ -1517,7 +1493,7 @@ static void *__slab_alloc(struct kmem_cache *s, stat(c, ALLOC_REFILL); load_freelist: object = c->page->freelist; - if (unlikely(object == c->page->end)) + if (unlikely(!object)) goto another_slab; if (unlikely(SlabDebug(c->page))) goto debug; @@ -1525,7 +1501,7 @@ load_freelist: object = c->page->freelist; c->freelist = object[c->offset]; c->page->inuse = s->objects; - c->page->freelist = c->page->end; + c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); @@ -1607,7 +1583,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); - if (unlikely(is_end(c->freelist) || !node_match(c, node))) + if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1677,7 +1653,7 @@ checks_ok: * was not on the partial list before * then add it. */ - if (unlikely(prior == page->end)) { + if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); stat(c, FREE_ADD_PARTIAL); } @@ -1687,7 +1663,7 @@ out_unlock: return; slab_empty: - if (prior != page->end) { + if (prior) { /* * Slab still on the partial list. */ @@ -1910,7 +1886,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, struct kmem_cache_cpu *c) { c->page = NULL; - c->freelist = (void *)PAGE_MAPPING_ANON; + c->freelist = NULL; c->node = 0; c->offset = s->offset / sizeof(void *); c->objsize = s->objsize; @@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { void *p; - void *addr = slab_address(page); + void *addr = page_address(page); if (!check_slab(s, page) || !on_freelist(s, page, NULL)) @@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { - void *addr = slab_address(page); + void *addr = page_address(page); DECLARE_BITMAP(map, s->objects); void *p; -- cgit v1.2.3 From d9acf4b7b62d783d84273a61aed41a0f025b08ac Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 15:22:21 -0800 Subject: slub: rename slab_objects to show_slab_objects The sysfs callback is better named show_slab_objects since it is always called from the xxx_show callbacks. We need the name for other purposes later. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a873953e5a1..e01d399894c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3567,7 +3567,7 @@ enum slab_stat_type { #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) -static unsigned long slab_objects(struct kmem_cache *s, +static unsigned long show_slab_objects(struct kmem_cache *s, char *buf, unsigned long flags) { unsigned long total = 0; @@ -3730,25 +3730,25 @@ SLAB_ATTR_RO(aliases); static ssize_t slabs_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); + return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); } SLAB_ATTR_RO(slabs); static ssize_t partial_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_PARTIAL); + return show_slab_objects(s, buf, SO_PARTIAL); } SLAB_ATTR_RO(partial); static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_CPU); + return show_slab_objects(s, buf, SO_CPU); } SLAB_ATTR_RO(cpu_slabs); static ssize_t objects_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); + return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); } SLAB_ATTR_RO(objects); -- cgit v1.2.3 From e153362a50a34439718a938a851bba977116e19a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:24 -0800 Subject: slub: Remove objsize check in kmem_cache_flags() There is no page->offset anymore and also no associated limit on the number of objects. The page->offset field was removed for 2.6.24. So the check in kmem_cache_flags() is now also obsolete (should have been dropped earlier, somehow a hunk vanished). Reviewed-by: Pekka Enberg Signed-by: Christoph Lameter --- mm/slub.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index e01d399894c..d7d0d866b6b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -997,30 +997,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize, void (*ctor)(struct kmem_cache *, void *)) { /* - * The page->offset field is only 16 bit wide. This is an offset - * in units of words from the beginning of an object. If the slab - * size is bigger then we cannot move the free pointer behind the - * object anymore. - * - * On 32 bit platforms the limit is 256k. On 64bit platforms - * the limit is 512k. - * - * Debugging or ctor may create a need to move the free - * pointer. Fail if this happens. + * Enable debugging if selected on the kernel commandline. */ - if (objsize >= 65535 * sizeof(void *)) { - BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(ctor); - } else { - /* - * Enable debugging if selected on the kernel commandline. - */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, - strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; - } + if (slub_debug && (!slub_debug_slabs || + strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) + flags |= slub_debug; return flags; } -- cgit v1.2.3 From d692ef6dcd20da60786470654410e85f29c2ddd9 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:24 -0800 Subject: slub: Remove useless checks in alloc_debug_processing Alloc debug processing is never called with a NULL object pointer. No reason to check for NULL. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d7d0d866b6b..0a5a1001590 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -852,7 +852,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, if (!check_slab(s, page)) goto bad; - if (object && !on_freelist(s, page, object)) { + if (!on_freelist(s, page, object)) { object_err(s, page, object, "Object already allocated"); goto bad; } @@ -862,7 +862,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, goto bad; } - if (object && !check_object(s, page, object, 0)) + if (!check_object(s, page, object, 0)) goto bad; /* Success perform special debug activities for allocs */ -- cgit v1.2.3 From 27d9e4e94862c89d171cf70911b4f11ad69fb54e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:25 -0800 Subject: slub: Use the objsize from the kmem_cache_cpu structure No need to access the kmem_cache structure. We have the same value in kmem_cache_cpu. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 0a5a1001590..b49570ca08b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1681,8 +1681,8 @@ static __always_inline void slab_free(struct kmem_cache *s, unsigned long flags; local_irq_save(flags); - debug_check_no_locks_freed(object, s->objsize); c = get_cpu_slab(s, smp_processor_id()); + debug_check_no_locks_freed(object, c->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; -- cgit v1.2.3 From ae20bfda6813387af18c7fdbc0f8b1fa7be2d05b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:25 -0800 Subject: slub: Remove BUG_ON() from ksize and omit checks for !SLUB_DEBUG The BUG_ONs are useless since the pointer derefs will lead to NULL deref errors anyways. Some of the checks are not necessary if no debugging is possible. Signed-off-by: Christoph Lameter --- mm/slub.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b49570ca08b..09b5dc82df5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2610,19 +2610,17 @@ size_t ksize(const void *object) struct page *page; struct kmem_cache *s; - BUG_ON(!object); if (unlikely(object == ZERO_SIZE_PTR)) return 0; page = virt_to_head_page(object); - BUG_ON(!page); if (unlikely(!PageSlab(page))) return PAGE_SIZE << compound_order(page); s = page->slab; - BUG_ON(!s); +#ifdef CONFIG_SLUB_DEBUG /* * Debugging requires use of the padding between object * and whatever may come after it. @@ -2630,6 +2628,7 @@ size_t ksize(const void *object) if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) return s->objsize; +#endif /* * If we have the need to store the freelist pointer * back there or track user information then we can @@ -2637,7 +2636,6 @@ size_t ksize(const void *object) */ if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) return s->inuse; - /* * Else we can use all the padding etc for the allocation */ -- cgit v1.2.3 From d8b42bf54be18b5d0bad941b3a1d3e8f022651a7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:25 -0800 Subject: slub: Rearrange #ifdef CONFIG_SLUB_DEBUG in calculate_sizes() Group SLUB_DEBUG code together to reduce the number of #ifdefs. Move some debug checks under the #ifdef. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 09b5dc82df5..72f5f4ecd1d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2129,6 +2129,14 @@ static int calculate_sizes(struct kmem_cache *s) unsigned long size = s->objsize; unsigned long align = s->align; + /* + * Round up object size to the next word boundary. We can only + * place the free pointer at word boundaries and this determines + * the possible location of the free pointer. + */ + size = ALIGN(size, sizeof(void *)); + +#ifdef CONFIG_SLUB_DEBUG /* * Determine if we can poison the object itself. If the user of * the slab may touch the object after free or before allocation @@ -2140,14 +2148,7 @@ static int calculate_sizes(struct kmem_cache *s) else s->flags &= ~__OBJECT_POISON; - /* - * Round up object size to the next word boundary. We can only - * place the free pointer at word boundaries and this determines - * the possible location of the free pointer. - */ - size = ALIGN(size, sizeof(void *)); -#ifdef CONFIG_SLUB_DEBUG /* * If we are Redzoning then check if there is some space between the * end of the object and the free pointer. If not then add an -- cgit v1.2.3 From 6446faa2ff30ca77c5b25e886bbbfb81c63f1c91 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:26 -0800 Subject: slub: Fix up comments Provide comments and fix up various spelling / style issues. Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 4 ++-- mm/slub.c | 49 +++++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 57deecc79d5..b00c1c73eb0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -61,7 +61,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int order; + int order; /* Current preferred allocation order */ /* * Avoid an extra cache line for UP, SMP and for the node local to @@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size) if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; /* * The following is only needed to support architectures with a larger page * size than 4k. */ - if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; diff --git a/mm/slub.c b/mm/slub.c index 72f5f4ecd1d..10d546954ef 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } +/* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { @@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Padding to reach required alignment boundary or at mininum - * one word if debuggin is on to be able to detect writes + * one word if debugging is on to be able to detect writes * before the word boundary. * * Padding is done using 0x5a (POISON_INUSE) @@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) * may return off node objects because partial slabs are obtained * from other nodes and filled up. * - * If /sys/slab/xx/defrag_ratio is set to 100 (which makes + * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes * defrag_ratio = 1000) then every (well almost) allocation will * first attempt to defrag slab caches on other nodes. This means * scanning over all nodes to look for partial slabs which may be @@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs * to come after the other slabs with objects in - * order to fill them up. That way the size of the - * partial list stays small. kmem_cache_shrink can - * reclaim empty slabs from the partial list. + * so that the others get filled first. That way the + * size of the partial list stays small. + * + * kmem_cache_shrink can reclaim any empty slabs from the + * partial list. */ add_partial(n, page, 1); slab_unlock(page); @@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) if (c->freelist) stat(c, DEACTIVATE_REMOTE_FREES); /* - * Merge cpu freelist into freelist. Typically we get here + * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ @@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* * Flush cpu slab. + * * Called from IPI handler with interrupts disabled. */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) @@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) * rest of the freelist to the lockless freelist. * * And if we were unable to get a new slab from the partial slab lists then - * we need to allocate a new slab. This is slowest path since we may sleep. + * we need to allocate a new slab. This is the slowest path since it involves + * a call to the page allocator and the setup of a new slab. */ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) @@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s, slab_lock(c->page); if (unlikely(!node_match(c, node))) goto another_slab; + stat(c, ALLOC_REFILL); + load_freelist: object = c->page->freelist; if (unlikely(!object)) @@ -1616,6 +1623,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, if (unlikely(SlabDebug(page))) goto debug; + checks_ok: prior = object[offset] = page->freelist; page->freelist = object; @@ -1630,8 +1638,7 @@ checks_ok: goto slab_empty; /* - * Objects left in the slab. If it - * was not on the partial list before + * Objects left in the slab. If it was not on the partial list before * then add it. */ if (unlikely(!prior)) { @@ -1845,13 +1852,11 @@ static unsigned long calculate_alignment(unsigned long flags, unsigned long align, unsigned long size) { /* - * If the user wants hardware cache aligned objects then - * follow that suggestion if the object is sufficiently - * large. + * If the user wants hardware cache aligned objects then follow that + * suggestion if the object is sufficiently large. * - * The hardware cache alignment cannot override the - * specified alignment though. If that is greater - * then use it. + * The hardware cache alignment cannot override the specified + * alignment though. If that is greater then use it. */ if ((flags & SLAB_HWCACHE_ALIGN) && size > cache_line_size() / 2) @@ -2049,6 +2054,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, #endif init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); + /* * lockdep requires consistent irq usage for each lock * so even though there cannot be a race this early in @@ -2301,7 +2307,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) /* * We could also check if the object is on the slabs freelist. * But this would be too expensive and it seems that the main - * purpose of kmem_ptr_valid is to check if the object belongs + * purpose of kmem_ptr_valid() is to check if the object belongs * to a certain slab. */ return 1; @@ -2913,7 +2919,7 @@ void __init kmem_cache_init(void) /* * Patch up the size_index table if we have strange large alignment * requirements for the kmalloc array. This is only the case for - * mips it seems. The standard arches will not generate any code here. + * MIPS it seems. The standard arches will not generate any code here. * * Largest permitted alignment is 256 bytes due to the way we * handle the index determination for the smaller caches. @@ -2942,7 +2948,6 @@ void __init kmem_cache_init(void) kmem_size = sizeof(struct kmem_cache); #endif - printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", @@ -3039,12 +3044,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, */ for_each_online_cpu(cpu) get_cpu_slab(s, cpu)->objsize = s->objsize; + s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); + if (sysfs_slab_alias(s, name)) goto err; return s; } + s = kmalloc(kmem_size, GFP_KERNEL); if (s) { if (kmem_cache_open(s, GFP_KERNEL, name, @@ -3927,7 +3935,6 @@ SLAB_ATTR(remote_node_defrag_ratio); #endif #ifdef CONFIG_SLUB_STATS - static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) { unsigned long sum = 0; @@ -4111,8 +4118,8 @@ static struct kset *slab_kset; #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: - * format - * :[flags-]size:[memory address of kmemcache] + * + * Format :[flags-]size */ static char *create_unique_id(struct kmem_cache *s) { -- cgit v1.2.3 From 7693143481730686362cc6360e3d47c012d9b2c8 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Sat, 1 Mar 2008 13:43:54 -0800 Subject: slub: look up object from the freelist once We only need to look up object from c->page->freelist once in __slab_alloc(). Signed-off-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 10d546954ef..db8026ba049 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1486,7 +1486,6 @@ load_freelist: if (unlikely(SlabDebug(c->page))) goto debug; - object = c->page->freelist; c->freelist = object[c->offset]; c->page->inuse = s->objects; c->page->freelist = NULL; @@ -1542,7 +1541,6 @@ new_slab: return NULL; debug: - object = c->page->freelist; if (!alloc_debug_processing(s, c->page, object, addr)) goto another_slab; -- cgit v1.2.3 From f619cfe1bda809a97c407f4c723eb3235ecd64e5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 1 Mar 2008 13:56:40 -0800 Subject: slub: Add kmalloc_large_node() to support kmalloc_node fallback Slub is missing some NUMA support for large kmallocs. Provide that. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index db8026ba049..ecacacdce9d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2592,13 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); +static void *kmalloc_large_node(size_t size, gfp_t flags, int node) +{ + struct page *page = alloc_pages_node(node, flags | __GFP_COMP, + get_order(size)); + + if (page) + return page_address(page); + else + return NULL; +} + #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; if (unlikely(size > PAGE_SIZE)) - return kmalloc_large(size, flags); + return kmalloc_large_node(size, flags, node); s = get_slab(size, flags); @@ -3146,7 +3157,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; if (unlikely(size > PAGE_SIZE)) - return kmalloc_large(size, gfpflags); + return kmalloc_large_node(size, gfpflags, node); s = get_slab(size, gfpflags); -- cgit v1.2.3 From 62e5c4b4d6351707346695fd9e151b6cda85cbe1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 2 Mar 2008 23:28:24 +0300 Subject: slub: fix possible NULL pointer dereference This patch fix possible NULL pointer dereference if kzalloc failed. To be able to return proper error code the function return type is changed to ssize_t (according to callees and sysfs definitions). Signed-off-by: Cyrill Gorcunov Signed-off-by: Christoph Lameter --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ecacacdce9d..0863fd38a5c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3564,8 +3564,8 @@ enum slab_stat_type { #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) -static unsigned long show_slab_objects(struct kmem_cache *s, - char *buf, unsigned long flags) +static ssize_t show_slab_objects(struct kmem_cache *s, + char *buf, unsigned long flags) { unsigned long total = 0; int cpu; @@ -3575,6 +3575,8 @@ static unsigned long show_slab_objects(struct kmem_cache *s, unsigned long *per_cpu; nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); + if (!nodes) + return -ENOMEM; per_cpu = nodes + nr_node_ids; for_each_possible_cpu(cpu) { -- cgit v1.2.3