aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/Makefile2
-rw-r--r--mm/bootmem.c41
-rw-r--r--mm/highmem.c23
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mempool.c46
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/mmzone.c50
-rw-r--r--mm/msync.c2
-rw-r--r--mm/page_alloc.c64
-rw-r--r--mm/slab.c318
-rw-r--r--mm/slob.c10
-rw-r--r--mm/swap.c2
-rw-r--r--mm/util.c10
-rw-r--r--mm/vmscan.c8
16 files changed, 494 insertions, 110 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index bd80460360d..332f5c29b53 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -138,8 +138,8 @@ config SPLIT_PTLOCK_CPUS
#
config MIGRATION
bool "Page migration"
- def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
- depends on SWAP
+ def_bool y if NUMA
+ depends on SWAP && NUMA
help
Allows the migration of the physical location of pages of processes
while the virtual addresses are not changed. This is useful for
diff --git a/mm/Makefile b/mm/Makefile
index f10c753dce6..0b8f73f2ed1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
- prio_tree.o util.o $(mmu-y)
+ prio_tree.o util.o mmzone.o $(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 35c32290f71..d3e3bd2ffce 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -33,6 +33,7 @@ EXPORT_SYMBOL(max_pfn); /* This is exported so
* dma_get_required_mask(), which uses
* it, can be an inline function */
+static LIST_HEAD(bdata_list);
#ifdef CONFIG_CRASH_DUMP
/*
* If we have booted due to a crash, max_pfn will be a very low value. We need
@@ -52,6 +53,27 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
return mapsize;
}
+/*
+ * link bdata in order
+ */
+static void link_bootmem(bootmem_data_t *bdata)
+{
+ bootmem_data_t *ent;
+ if (list_empty(&bdata_list)) {
+ list_add(&bdata->list, &bdata_list);
+ return;
+ }
+ /* insert in order */
+ list_for_each_entry(ent, &bdata_list, list) {
+ if (bdata->node_boot_start < ent->node_boot_start) {
+ list_add_tail(&bdata->list, &ent->list);
+ return;
+ }
+ }
+ list_add_tail(&bdata->list, &bdata_list);
+ return;
+}
+
/*
* Called once to set up the allocator itself.
@@ -62,13 +84,11 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;
- pgdat->pgdat_next = pgdat_list;
- pgdat_list = pgdat;
-
mapsize = ALIGN(mapsize, sizeof(long));
bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
bdata->node_boot_start = (start << PAGE_SHIFT);
bdata->node_low_pfn = end;
+ link_bootmem(bdata);
/*
* Initially all pages are reserved - setup_arch() has to
@@ -152,7 +172,7 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
*
* NOTE: This function is _not_ reentrant.
*/
-static void * __init
+void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
@@ -383,12 +403,11 @@ unsigned long __init free_all_bootmem (void)
void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
{
- pg_data_t *pgdat = pgdat_list;
+ bootmem_data_t *bdata;
void *ptr;
- for_each_pgdat(pgdat)
- if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
- align, goal, 0)))
+ list_for_each_entry(bdata, &bdata_list, list)
+ if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
return(ptr);
/*
@@ -416,11 +435,11 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne
void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
{
- pg_data_t *pgdat = pgdat_list;
+ bootmem_data_t *bdata;
void *ptr;
- for_each_pgdat(pgdat)
- if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
+ list_for_each_entry(bdata, &bdata_list, list)
+ if ((ptr = __alloc_bootmem_core(bdata, size,
align, goal, LOW32LIMIT)))
return(ptr);
diff --git a/mm/highmem.c b/mm/highmem.c
index d0ea1eec6a9..55885f64af4 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -31,14 +31,9 @@
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
{
- return alloc_page(gfp_mask | GFP_DMA);
-}
-
-static void page_pool_free(void *page, void *data)
-{
- __free_page(page);
+ return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
}
/*
@@ -51,11 +46,6 @@ static void page_pool_free(void *page, void *data)
*/
#ifdef CONFIG_HIGHMEM
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
-{
- return alloc_page(gfp_mask);
-}
-
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -229,7 +219,7 @@ static __init int init_emergency_pool(void)
if (!i.totalhigh)
return 0;
- page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
+ page_pool = mempool_create_page_pool(POOL_SIZE, 0);
if (!page_pool)
BUG();
printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
@@ -272,7 +262,8 @@ int init_emergency_isa_pool(void)
if (isa_page_pool)
return 0;
- isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
+ isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+ mempool_free_pages, (void *) 0);
if (!isa_page_pool)
BUG();
@@ -337,7 +328,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
bio_put(bio);
}
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
{
if (bio->bi_size)
return 1;
@@ -384,7 +375,7 @@ static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int
}
static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
- mempool_t *pool)
+ mempool_t *pool)
{
struct page *page;
struct bio *bio = NULL;
diff --git a/mm/memory.c b/mm/memory.c
index 80c3fb370f9..8d8f52569f3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -395,12 +395,16 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
return NULL;
}
-#ifdef CONFIG_DEBUG_VM
+ /*
+ * Add some anal sanity checks for now. Eventually,
+ * we should just do "return pfn_to_page(pfn)", but
+ * in the meantime we check that we get a valid pfn,
+ * and that the resulting page looks ok.
+ */
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr);
return NULL;
}
-#endif
/*
* NOTE! We still have PageReserved() pages in the page
@@ -1067,6 +1071,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
}
if (pages) {
pages[i] = page;
+
+ flush_anon_page(page, start);
flush_dcache_page(page);
}
if (vmas)
@@ -2348,10 +2354,8 @@ int make_pages_present(unsigned long addr, unsigned long end)
if (!vma)
return -1;
write = (vma->vm_flags & VM_WRITE) != 0;
- if (addr >= end)
- BUG();
- if (end > vma->vm_end)
- BUG();
+ BUG_ON(addr >= end);
+ BUG_ON(end > vma->vm_end);
len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4f71cfd29c6..dec8249e972 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -912,7 +912,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
/*
* Check if this process has the right to modify the specified
* process. The right exists if the process has administrative
- * capabilities, superuser priviledges or the same
+ * capabilities, superuser privileges or the same
* userid as the target process.
*/
if ((current->euid != task->suid) && (current->euid != task->uid) &&
diff --git a/mm/mempool.c b/mm/mempool.c
index f71893ed354..fe6e05289cc 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -183,8 +183,8 @@ EXPORT_SYMBOL(mempool_resize);
*/
void mempool_destroy(mempool_t *pool)
{
- if (pool->curr_nr != pool->min_nr)
- BUG(); /* There were outstanding elements */
+ /* Check for outstanding elements */
+ BUG_ON(pool->curr_nr != pool->min_nr);
free_pool(pool);
}
EXPORT_SYMBOL(mempool_destroy);
@@ -289,3 +289,45 @@ void mempool_free_slab(void *element, void *pool_data)
kmem_cache_free(mem, element);
}
EXPORT_SYMBOL(mempool_free_slab);
+
+/*
+ * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory
+ * specfied by pool_data
+ */
+void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t)(long)pool_data;
+ return kmalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kmalloc);
+
+void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t) pool_data;
+ return kzalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kzalloc);
+
+void mempool_kfree(void *element, void *pool_data)
+{
+ kfree(element);
+}
+EXPORT_SYMBOL(mempool_kfree);
+
+/*
+ * A simple mempool-backed page allocator that allocates pages
+ * of the order specified by pool_data.
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ return alloc_pages(gfp_mask, order);
+}
+EXPORT_SYMBOL(mempool_alloc_pages);
+
+void mempool_free_pages(void *element, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ __free_pages(element, order);
+}
+EXPORT_SYMBOL(mempool_free_pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 0eb9894db6d..4f5b5709136 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1040,12 +1040,11 @@ munmap_back:
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
error = -ENOMEM;
goto unacct_error;
}
- memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_start = addr;
@@ -1896,12 +1895,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
/*
* create a vma struct for an anonymous mapping
*/
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
- memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_start = addr;
diff --git a/mm/mmzone.c b/mm/mmzone.c
new file mode 100644
index 00000000000..b022370e612
--- /dev/null
+++ b/mm/mmzone.c
@@ -0,0 +1,50 @@
+/*
+ * linux/mm/mmzone.c
+ *
+ * management codes for pgdats and zones.
+ */
+
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+
+struct pglist_data *first_online_pgdat(void)
+{
+ return NODE_DATA(first_online_node);
+}
+
+EXPORT_SYMBOL(first_online_pgdat);
+
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+{
+ int nid = next_online_node(pgdat->node_id);
+
+ if (nid == MAX_NUMNODES)
+ return NULL;
+ return NODE_DATA(nid);
+}
+EXPORT_SYMBOL(next_online_pgdat);
+
+
+/*
+ * next_zone - helper magic for for_each_zone()
+ */
+struct zone *next_zone(struct zone *zone)
+{
+ pg_data_t *pgdat = zone->zone_pgdat;
+
+ if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
+ zone++;
+ else {
+ pgdat = next_online_pgdat(pgdat);
+ if (pgdat)
+ zone = pgdat->node_zones;
+ else
+ zone = NULL;
+ }
+ return zone;
+}
+EXPORT_SYMBOL(next_zone);
+
diff --git a/mm/msync.c b/mm/msync.c
index 2672b8dc3d8..bc6c9537636 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -126,7 +126,7 @@ static unsigned long msync_page_range(struct vm_area_struct *vma,
* write out the dirty pages and wait on the writeout and check the result.
* Or the application may run fadvise(FADV_DONTNEED) against the fd to start
* async writeout immediately.
- * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
+ * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/
static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a5c3f8bd98a..dc523a1f270 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,7 +49,6 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
long nr_swap_pages;
@@ -1201,7 +1200,7 @@ unsigned int nr_free_highpages (void)
pg_data_t *pgdat;
unsigned int pages = 0;
- for_each_pgdat(pgdat)
+ for_each_online_pgdat(pgdat)
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
return pages;
@@ -1343,7 +1342,7 @@ void get_zone_counts(unsigned long *active,
*active = 0;
*inactive = 0;
*free = 0;
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
unsigned long l, m, n;
__get_zone_counts(&l, &m, &n, pgdat);
*active += l;
@@ -2029,8 +2028,9 @@ static __meminit void zone_pcp_init(struct zone *zone)
setup_pageset(zone_pcp(zone,cpu), batch);
#endif
}
- printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
- zone->name, zone->present_pages, batch);
+ if (zone->present_pages)
+ printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
+ zone->name, zone->present_pages, batch);
}
static __meminit void init_currently_empty_zone(struct zone *zone,
@@ -2041,7 +2041,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
zone_wait_table_init(zone, size);
pgdat->nr_zones = zone_idx(zone) + 1;
- zone->zone_mem_map = pfn_to_page(zone_start_pfn);
zone->zone_start_pfn = zone_start_pfn;
memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2169,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
{
pg_data_t *pgdat;
loff_t node = *pos;
-
- for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+ for (pgdat = first_online_pgdat();
+ pgdat && node;
+ pgdat = next_online_pgdat(pgdat))
--node;
return pgdat;
@@ -2181,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
pg_data_t *pgdat = (pg_data_t *)arg;
(*pos)++;
- return pgdat->pgdat_next;
+ return next_online_pgdat(pgdat);
}
static void frag_stop(struct seq_file *m, void *arg)
@@ -2482,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void)
struct pglist_data *pgdat;
int j, idx;
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long present_pages = zone->present_pages;
@@ -2701,8 +2701,7 @@ void *__init alloc_large_system_hash(const char *tablename,
else
numentries <<= (PAGE_SHIFT - scale);
}
- /* rounded up to nearest power of 2 in size */
- numentries = 1UL << (long_log2(numentries) + 1);
+ numentries = roundup_pow_of_two(numentries);
/* limit allocation size to 1/16 total memory by default */
if (max == 0) {
@@ -2745,3 +2744,44 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+ return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ int nid = arch_pfn_to_nid(pfn);
+ return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+ long section_id = page_to_section(page);
+ return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
diff --git a/mm/slab.c b/mm/slab.c
index 26138c9f8f0..4cbf8bb1355 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -204,7 +204,8 @@
typedef unsigned int kmem_bufctl_t;
#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
-#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
+#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
+#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
/* Max number of objs-per-slab for caches which use off-slab slabs.
* Needed to avoid a possible looping condition in cache_grow().
@@ -897,6 +898,30 @@ static struct array_cache *alloc_arraycache(int node, int entries,
return nc;
}
+/*
+ * Transfer objects in one arraycache to another.
+ * Locking must be handled by the caller.
+ *
+ * Return the number of entries transferred.
+ */
+static int transfer_objects(struct array_cache *to,
+ struct array_cache *from, unsigned int max)
+{
+ /* Figure out how many entries to transfer */
+ int nr = min(min(from->avail, max), to->limit - to->avail);
+
+ if (!nr)
+ return 0;
+
+ memcpy(to->entry + to->avail, from->entry + from->avail -nr,
+ sizeof(void *) *nr);
+
+ from->avail -= nr;
+ to->avail += nr;
+ to->touched = 1;
+ return nr;
+}
+
#ifdef CONFIG_NUMA
static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
@@ -946,6 +971,13 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
if (ac->avail) {
spin_lock(&rl3->list_lock);
+ /*
+ * Stuff objects into the remote nodes shared array first.
+ * That way we could avoid the overhead of putting the objects
+ * into the free lists and getting them back later.
+ */
+ transfer_objects(rl3->shared, ac, ac->limit);
+
free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0;
spin_unlock(&rl3->list_lock);
@@ -961,8 +993,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
if (l3->alien) {
struct array_cache *ac = l3->alien[node];
- if (ac && ac->avail) {
- spin_lock_irq(&ac->lock);
+
+ if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
__drain_alien_cache(cachep, ac, node);
spin_unlock_irq(&ac->lock);
}
@@ -1989,10 +2021,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
align = ralign;
/* Get cache's description obj. */
- cachep = kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
+ cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL);
if (!cachep)
goto oops;
- memset(cachep, 0, sizeof(struct kmem_cache));
#if DEBUG
cachep->obj_size = size;
@@ -2399,7 +2430,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
/* Verify that the slab belongs to the intended node */
WARN_ON(slabp->nodeid != nodeid);
- if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+ if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
printk(KERN_ERR "slab: double free detected in cache "
"'%s', objp %p\n", cachep->name, objp);
BUG();
@@ -2605,6 +2636,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
*/
cachep->dtor(objp + obj_offset(cachep), cachep, 0);
}
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+ slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
+#endif
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2677,20 +2711,10 @@ retry:
BUG_ON(ac->avail > 0 || !l3);
spin_lock(&l3->list_lock);
- if (l3->shared) {
- struct array_cache *shared_array = l3->shared;
- if (shared_array->avail) {
- if (batchcount > shared_array->avail)
- batchcount = shared_array->avail;
- shared_array->avail -= batchcount;
- ac->avail = batchcount;
- memcpy(ac->entry,
- &(shared_array->entry[shared_array->avail]),
- sizeof(void *) * batchcount);
- shared_array->touched = 1;
- goto alloc_done;
- }
- }
+ /* See if we can refill from the shared array */
+ if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
+ goto alloc_done;
+
while (batchcount > 0) {
struct list_head *entry;
struct slab *slabp;
@@ -2788,6 +2812,16 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
}
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+ {
+ struct slab *slabp;
+ unsigned objnr;
+
+ slabp = page_get_slab(virt_to_page(objp));
+ objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
+ slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
+ }
+#endif
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON) {
unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
@@ -3094,6 +3128,23 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
EXPORT_SYMBOL(kmem_cache_alloc);
/**
+ * kmem_cache_alloc - Allocate an object. The memory is set to zero.
+ * @cache: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache and set the allocated memory to zero.
+ * The flags are only relevant if the cache has no available objects.
+ */
+void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
+{
+ void *ret = __cache_alloc(cache, flags, __builtin_return_address(0));
+ if (ret)
+ memset(ret, 0, obj_size(cache));
+ return ret;
+}
+EXPORT_SYMBOL(kmem_cache_zalloc);
+
+/**
* kmem_ptr_validate - check if an untrusted pointer might
* be a slab entry.
* @cachep: the cache we're checking against
@@ -3220,22 +3271,23 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
return __cache_alloc(cachep, flags, caller);
}
-#ifndef CONFIG_DEBUG_SLAB
void *__kmalloc(size_t size, gfp_t flags)
{
+#ifndef CONFIG_DEBUG_SLAB
return __do_kmalloc(size, flags, NULL);
+#else
+ return __do_kmalloc(size, flags, __builtin_return_address(0));
+#endif
}
EXPORT_SYMBOL(__kmalloc);
-#else
-
+#ifdef CONFIG_DEBUG_SLAB
void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
{
return __do_kmalloc(size, flags, caller);
}
EXPORT_SYMBOL(__kmalloc_track_caller);
-
#endif
#ifdef CONFIG_SMP
@@ -3259,7 +3311,7 @@ void *__alloc_percpu(size_t size)
* and we have no way of figuring out how to fix the array
* that we have allocated then....
*/
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
int node = cpu_to_node(i);
if (node_online(node))
@@ -3346,7 +3398,7 @@ void free_percpu(const void *objp)
/*
* We allocate for all cpus so we cannot use for online cpu here.
*/
- for_each_cpu(i)
+ for_each_possible_cpu(i)
kfree(p->ptrs[i]);
kfree(p);
}
@@ -3366,63 +3418,86 @@ const char *kmem_cache_name(struct kmem_cache *cachep)
EXPORT_SYMBOL_GPL(kmem_cache_name);
/*
- * This initializes kmem_list3 for all nodes.
+ * This initializes kmem_list3 or resizes varioius caches for all nodes.
*/
static int alloc_kmemlist(struct kmem_cache *cachep)
{
int node;
struct kmem_list3 *l3;
- int err = 0;
+ struct array_cache *new_shared;
+ struct array_cache **new_alien;
for_each_online_node(node) {
- struct array_cache *nc = NULL, *new;
- struct array_cache **new_alien = NULL;
-#ifdef CONFIG_NUMA
+
new_alien = alloc_alien_cache(node, cachep->limit);
if (!new_alien)
goto fail;
-#endif
- new = alloc_arraycache(node, cachep->shared*cachep->batchcount,
+
+ new_shared = alloc_arraycache(node,
+ cachep->shared*cachep->batchcount,
0xbaadf00d);
- if (!new)
+ if (!new_shared) {
+ free_alien_cache(new_alien);
goto fail;
+ }
+
l3 = cachep->nodelists[node];
if (l3) {
+ struct array_cache *shared = l3->shared;
+
spin_lock_irq(&l3->list_lock);
- nc = cachep->nodelists[node]->shared;
- if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
+ if (shared)
+ free_block(cachep, shared->entry,
+ shared->avail, node);
- l3->shared = new;
- if (!cachep->nodelists[node]->alien) {
+ l3->shared = new_shared;
+ if (!l3->alien) {
l3->alien = new_alien;
new_alien = NULL;
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock);
- kfree(nc);
+ kfree(shared);
free_alien_cache(new_alien);
continue;
}
l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
- if (!l3)
+ if (!l3) {
+ free_alien_cache(new_alien);
+ kfree(new_shared);
goto fail;
+ }
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
- l3->shared = new;
+ l3->shared = new_shared;
l3->alien = new_alien;
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
cachep->nodelists[node] = l3;
}
- return err;
+ return 0;
+
fail:
- err = -ENOMEM;
- return err;
+ if (!cachep->next.next) {
+ /* Cache is not active yet. Roll back what we did */
+ node--;
+ while (node >= 0) {
+ if (cachep->nodelists[node]) {
+ l3 = cachep->nodelists[node];
+
+ kfree(l3->shared);
+ free_alien_cache(l3->alien);
+ kfree(l3);
+ cachep->nodelists[node] = NULL;
+ }
+ node--;
+ }
+ }
+ return -ENOMEM;
}
struct ccupdate_struct {
@@ -3899,6 +3974,159 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
res = count;
return res;
}
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+
+static void *leaks_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t n = *pos;
+ struct list_head *p;
+
+ mutex_lock(&cache_chain_mutex);
+ p = cache_chain.next;
+ while (n--) {
+ p = p->next;
+ if (p == &cache_chain)
+ return NULL;
+ }
+ return list_entry(p, struct kmem_cache, next);
+}
+
+static inline int add_caller(unsigned long *n, unsigned long v)
+{
+ unsigned long *p;
+ int l;
+ if (!v)
+ return 1;
+ l = n[1];
+ p = n + 2;
+ while (l) {
+ int i = l/2;
+ unsigned long *q = p + 2 * i;
+ if (*q == v) {
+ q[1]++;
+ return 1;
+ }
+ if (*q > v) {
+ l = i;
+ } else {
+ p = q + 2;
+ l -= i + 1;
+ }
+ }
+ if (++n[1] == n[0])
+ return 0;
+ memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
+ p[0] = v;
+ p[1] = 1;
+ return 1;
+}
+
+static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+{
+ void *p;
+ int i;
+ if (n[0] == n[1])
+ return;
+ for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
+ if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+ continue;
+ if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
+ return;
+ }
+}
+
+static void show_symbol(struct seq_file *m, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char *modname;
+ const char *name;
+ unsigned long offset, size;
+ char namebuf[KSYM_NAME_LEN+1];
+
+ name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+
+ if (name) {
+ seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
+ if (modname)
+ seq_printf(m, " [%s]", modname);
+ return;
+ }
+#endif
+ seq_printf(m, "%p", (void *)address);
+}
+
+static int leaks_show(struct seq_file *m, void *p)
+{
+ struct kmem_cache *cachep = p;
+ struct list_head *q;
+ struct slab *slabp;
+ struct kmem_list3 *l3;
+ const char *name;
+ unsigned long *n = m->private;
+ int node;
+ int i;
+
+ if (!(cachep->flags & SLAB_STORE_USER))
+ return 0;
+ if (!(cachep->flags & SLAB_RED_ZONE))
+ return 0;
+
+ /* OK, we can do it */
+
+ n[1] = 0;
+
+ for_each_online_node(node) {
+ l3 = cachep->nodelists[node];
+ if (!l3)
+ continue;
+
+ check_irq_on();
+ spin_lock_irq(&l3->list_lock);
+
+ list_for_each(q, &l3->slabs_full) {
+ slabp = list_entry(q, struct slab, list);
+ handle_slab(n, cachep, slabp);
+ }
+ list_for_each(q, &l3->slabs_partial) {
+ slabp = list_entry(q, struct slab, list);
+ handle_slab(n, cachep, slabp);
+ }
+ spin_unlock_irq(&l3->list_lock);
+ }
+ name = cachep->name;
+ if (n[0] == n[1]) {
+ /* Increase the buffer size */
+ mutex_unlock(&cache_chain_mutex);
+ m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
+ if (!m->private) {
+ /* Too bad, we are really out */
+ m->private = n;
+ mutex_lock(&cache_chain_mutex);
+ return -ENOMEM;
+ }
+ *(unsigned long *)m->private = n[0] * 2;
+ kfree(n);
+ mutex_lock(&cache_chain_mutex);
+ /* Now make sure this entry will be retried */
+ m->count = m->size;
+ return 0;
+ }
+ for (i = 0; i < n[1]; i++) {
+ seq_printf(m, "%s: %lu ", name, n[2*i+3]);
+ show_symbol(m, n[2*i+2]);
+ seq_putc(m, '\n');
+ }
+ return 0;
+}
+
+struct seq_operations slabstats_op = {
+ .start = leaks_start,
+ .next = s_next,
+ .stop = s_stop,
+ .show = leaks_show,
+};
+#endif
#endif
/**
diff --git a/mm/slob.c b/mm/slob.c
index a1f42bdc024..9bcc7e2cabf 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -294,6 +294,16 @@ void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
}
EXPORT_SYMBOL(kmem_cache_alloc);
+void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
+{
+ void *ret = kmem_cache_alloc(c, flags);
+ if (ret)
+ memset(ret, 0, c->size);
+
+ return ret;
+}
+EXPORT_SYMBOL(kmem_cache_zalloc);
+
void kmem_cache_free(struct kmem_cache *c, void *b)
{
if (c->dtor)
diff --git a/mm/swap.c b/mm/swap.c
index 91b7e2026f6..88895c249bc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -512,7 +512,7 @@ long percpu_counter_sum(struct percpu_counter *fbc)
spin_lock(&fbc->lock);
ret = fbc->count;
- for_each_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
long *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
}
diff --git a/mm/util.c b/mm/util.c
index 49e29f751b5..7368479220b 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -5,18 +5,18 @@
#include <asm/uaccess.h>
/**
- * kzalloc - allocate memory. The memory is set to zero.
+ * __kzalloc - allocate memory. The memory is set to zero.
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*/
-void *kzalloc(size_t size, gfp_t flags)
+void *__kzalloc(size_t size, gfp_t flags)
{
- void *ret = kmalloc(size, flags);
+ void *ret = ____kmalloc(size, flags);
if (ret)
memset(ret, 0, size);
return ret;
}
-EXPORT_SYMBOL(kzalloc);
+EXPORT_SYMBOL(__kzalloc);
/*
* kstrdup - allocate space for and copy an existing string
@@ -33,7 +33,7 @@ char *kstrdup(const char *s, gfp_t gfp)
return NULL;
len = strlen(s) + 1;
- buf = kmalloc(len, gfp);
+ buf = ____kmalloc(len, gfp);
if (buf)
memcpy(buf, s, len);
return buf;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fd572bbdc9f..acdf001d694 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1305,7 +1305,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
current->reclaim_state = &reclaim_state;
repeat:
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
unsigned long freed;
freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1335,7 +1335,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
cpumask_t mask;
if (action == CPU_ONLINE) {
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
mask = node_to_cpumask(pgdat->node_id);
if (any_online_cpu(mask) != NR_CPUS)
/* One of our CPUs online: restore mask */
@@ -1351,12 +1351,14 @@ static int __init kswapd_init(void)
pg_data_t *pgdat;
swap_setup();
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
pid_t pid;
pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
BUG_ON(pid < 0);
+ read_lock(&tasklist_lock);
pgdat->kswapd = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
}
total_memory = nr_free_pagecache_pages();
hotcpu_notifier(cpu_callback, 0);