diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/madvise.c | 21 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/mempolicy.c | 18 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/slab.c | 10 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 106 |
8 files changed, 129 insertions, 64 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 67f29516662..508707704d2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -85,7 +85,7 @@ void free_huge_page(struct page *page) BUG_ON(page_count(page)); INIT_LIST_HEAD(&page->lru); - page[1].mapping = NULL; + page[1].lru.next = NULL; /* reset dtor */ spin_lock(&hugetlb_lock); enqueue_huge_page(page); @@ -105,7 +105,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) } spin_unlock(&hugetlb_lock); set_page_count(page, 1); - page[1].mapping = (void *)free_huge_page; + page[1].lru.next = (void *)free_huge_page; /* set dtor */ for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) clear_user_highpage(&page[i], addr); return page; diff --git a/mm/madvise.c b/mm/madvise.c index ae0ae3ea299..af3d573b014 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -22,16 +22,23 @@ static long madvise_behavior(struct vm_area_struct * vma, struct mm_struct * mm = vma->vm_mm; int error = 0; pgoff_t pgoff; - int new_flags = vma->vm_flags & ~VM_READHINTMASK; + int new_flags = vma->vm_flags; switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; case MADV_SEQUENTIAL: - new_flags |= VM_SEQ_READ; + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: - new_flags |= VM_RAND_READ; + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; - default: + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + new_flags &= ~VM_DONTCOPY; break; } @@ -177,6 +184,12 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, long error; switch (behavior) { + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) { + error = -EINVAL; + break; + } + case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: diff --git a/mm/memory.c b/mm/memory.c index 2bee1f21aa8..9abc6008544 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); +int randomize_va_space __read_mostly = 1; + +static int __init disable_randmaps(char *s) +{ + randomize_va_space = 0; + return 0; +} +__setup("norandmaps", disable_randmaps); + + /* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none. Usually (but diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3bd7fb7e4b7..323fdcf128c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -132,19 +132,29 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) } return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL; } + /* Generate a custom zonelist for the BIND policy. */ static struct zonelist *bind_zonelist(nodemask_t *nodes) { struct zonelist *zl; - int num, max, nd; + int num, max, nd, k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); - zl = kmalloc(sizeof(void *) * max, GFP_KERNEL); + zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); if (!zl) return NULL; num = 0; - for_each_node_mask(nd, *nodes) - zl->zones[num++] = &NODE_DATA(nd)->node_zones[policy_zone]; + /* First put in the highest zones from all nodes, then all the next + lower zones etc. Avoid empty zones because the memory allocator + doesn't like them. If you implement node hot removal you + have to fix that. */ + for (k = policy_zone; k >= 0; k--) { + for_each_node_mask(nd, *nodes) { + struct zone *z = &NODE_DATA(nd)->node_zones[k]; + if (z->present_pages > 0) + zl->zones[num++] = z; + } + } zl->zones[num] = NULL; return zl; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dde04ff4be3..62c12252858 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -56,6 +56,7 @@ long nr_swap_pages; int percpu_pagelist_fraction; static void fastcall free_hot_cold_page(struct page *page, int cold); +static void __free_pages_ok(struct page *page, unsigned int order); /* * results with 256, 32 in the lowmem_reserve sysctl: @@ -169,20 +170,23 @@ static void bad_page(struct page *page) * All pages have PG_compound set. All pages have their ->private pointing at * the head page (even the head page has this). * - * The first tail page's ->mapping, if non-zero, holds the address of the - * compound page's put_page() function. - * - * The order of the allocation is stored in the first tail page's ->index - * This is only for debug at present. This usage means that zero-order pages - * may not be compound. + * The first tail page's ->lru.next holds the address of the compound page's + * put_page() function. Its ->lru.prev holds the order of allocation. + * This usage means that zero-order pages may not be compound. */ + +static void free_compound_page(struct page *page) +{ + __free_pages_ok(page, (unsigned long)page[1].lru.prev); +} + static void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - page[1].mapping = NULL; - page[1].index = order; + page[1].lru.next = (void *)free_compound_page; /* set dtor */ + page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; @@ -196,7 +200,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - if (unlikely(page[1].index != order)) + if (unlikely((unsigned long)page[1].lru.prev != order)) bad_page(page); for (i = 0; i < nr_pages; i++) { diff --git a/mm/slab.c b/mm/slab.c index d66c2b0d971..add05d808a4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1717,6 +1717,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, BUG(); } + /* + * Prevent CPUs from coming and going. + * lock_cpu_hotplug() nests outside cache_chain_mutex + */ + lock_cpu_hotplug(); + mutex_lock(&cache_chain_mutex); list_for_each(p, &cache_chain) { @@ -1918,8 +1924,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->dtor = dtor; cachep->name = name; - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); if (g_cpucache_up == FULL) { enable_cpucache(cachep); @@ -1978,12 +1982,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); - unlock_cpu_hotplug(); oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); + unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); diff --git a/mm/swap.c b/mm/swap.c index 76247424dea..cce3dda59c5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -40,7 +40,7 @@ static void put_compound_page(struct page *page) if (put_page_testzero(page)) { void (*dtor)(struct page *page); - dtor = (void (*)(struct page *))page[1].mapping; + dtor = (void (*)(struct page *))page[1].lru.next; (*dtor)(page); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index 5a610804cd0..1838c15ca4f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -443,6 +443,10 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) BUG_ON(PageActive(page)); sc->nr_scanned++; + + if (!sc->may_swap && page_mapped(page)) + goto keep_locked; + /* Double the slab pressure for mapped and swapcache pages */ if (page_mapped(page) || PageSwapCache(page)) sc->nr_scanned++; @@ -632,7 +636,7 @@ static int swap_page(struct page *page) struct address_space *mapping = page_mapping(page); if (page_mapped(page) && mapping) - if (try_to_unmap(page, 0) != SWAP_SUCCESS) + if (try_to_unmap(page, 1) != SWAP_SUCCESS) goto unlock_retry; if (PageDirty(page)) { @@ -839,7 +843,7 @@ EXPORT_SYMBOL(migrate_page); * pages are swapped out. * * The function returns after 10 attempts or if no pages - * are movable anymore because t has become empty + * are movable anymore because to has become empty * or no retryable pages exist anymore. * * Return: Number of pages not migrated when "to" ran empty. @@ -928,12 +932,21 @@ redo: goto unlock_both; if (mapping->a_ops->migratepage) { + /* + * Most pages have a mapping and most filesystems + * should provide a migration function. Anonymous + * pages are part of swap space which also has its + * own migration function. This is the most common + * path for page migration. + */ rc = mapping->a_ops->migratepage(newpage, page); goto unlock_both; } /* - * Trigger writeout if page is dirty + * Default handling if a filesystem does not provide + * a migration function. We can only migrate clean + * pages so try to write out any dirty pages first. */ if (PageDirty(page)) { switch (pageout(page, mapping)) { @@ -949,9 +962,10 @@ redo: ; /* try to migrate the page below */ } } + /* - * If we have no buffer or can release the buffer - * then do a simple migration. + * Buffers are managed in a filesystem specific way. + * We must have no buffers or drop them. */ if (!page_has_buffers(page) || try_to_release_page(page, GFP_KERNEL)) { @@ -966,6 +980,11 @@ redo: * swap them out. */ if (pass > 4) { + /* + * Persistently unable to drop buffers..... As a + * measure of last resort we fall back to + * swap_page(). + */ unlock_page(newpage); newpage = NULL; rc = swap_page(page); @@ -1176,9 +1195,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) struct page *page; struct pagevec pvec; int reclaim_mapped = 0; - long mapped_ratio; - long distress; - long swap_tendency; + + if (unlikely(sc->may_swap)) { + long mapped_ratio; + long distress; + long swap_tendency; + + /* + * `distress' is a measure of how much trouble we're having + * reclaiming pages. 0 -> no problems. 100 -> great trouble. + */ + distress = 100 >> zone->prev_priority; + + /* + * The point of this algorithm is to decide when to start + * reclaiming mapped memory instead of just pagecache. Work out + * how much memory + * is mapped. + */ + mapped_ratio = (sc->nr_mapped * 100) / total_memory; + + /* + * Now decide how much we really want to unmap some pages. The + * mapped ratio is downgraded - just because there's a lot of + * mapped memory doesn't necessarily mean that page reclaim + * isn't succeeding. + * + * The distress ratio is important - we don't want to start + * going oom. + * + * A 100% value of vm_swappiness overrides this algorithm + * altogether. + */ + swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; + + /* + * Now use this metric to decide whether to start moving mapped + * memory onto the inactive list. + */ + if (swap_tendency >= 100) + reclaim_mapped = 1; + } lru_add_drain(); spin_lock_irq(&zone->lru_lock); @@ -1188,37 +1245,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); - /* - * `distress' is a measure of how much trouble we're having reclaiming - * pages. 0 -> no problems. 100 -> great trouble. - */ - distress = 100 >> zone->prev_priority; - - /* - * The point of this algorithm is to decide when to start reclaiming - * mapped memory instead of just pagecache. Work out how much memory - * is mapped. - */ - mapped_ratio = (sc->nr_mapped * 100) / total_memory; - - /* - * Now decide how much we really want to unmap some pages. The mapped - * ratio is downgraded - just because there's a lot of mapped memory - * doesn't necessarily mean that page reclaim isn't succeeding. - * - * The distress ratio is important - we don't want to start going oom. - * - * A 100% value of vm_swappiness overrides this algorithm altogether. - */ - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; - - /* - * Now use this metric to decide whether to start moving mapped memory - * onto the inactive list. - */ - if (swap_tendency >= 100) - reclaim_mapped = 1; - while (!list_empty(&l_hold)) { cond_resched(); page = lru_to_page(&l_hold); @@ -1595,9 +1621,7 @@ scan: sc.nr_reclaimed = 0; sc.priority = priority; sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; - atomic_inc(&zone->reclaim_in_progress); shrink_zone(zone, &sc); - atomic_dec(&zone->reclaim_in_progress); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); |