diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 103 | ||||
-rw-r--r-- | mm/hugetlb.c | 7 | ||||
-rw-r--r-- | mm/migrate.c | 39 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 5 | ||||
-rw-r--r-- | mm/vmalloc.c | 110 |
6 files changed, 183 insertions, 83 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 96ac6b0eb6c..698ea80f210 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); static struct page *__read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), - void *data) + void *data, + gfp_t gfp) { struct page *page; int err; repeat: page = find_get_page(mapping, index); if (!page) { - page = page_cache_alloc_cold(mapping); + page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); @@ -1661,31 +1662,18 @@ repeat: return page; } -/** - * read_cache_page_async - read into page cache, fill it if needed - * @mapping: the page's address_space - * @index: the page index - * @filler: function to perform the read - * @data: destination for read data - * - * Same as read_cache_page, but don't wait for page to become unlocked - * after submitting it to the filler. - * - * Read into the page cache. If a page already exists, and PageUptodate() is - * not set, try to fill the page but don't wait for it to become unlocked. - * - * If the page does not get brought uptodate, return -EIO. - */ -struct page *read_cache_page_async(struct address_space *mapping, +static struct page *do_read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), - void *data) + void *data, + gfp_t gfp) + { struct page *page; int err; retry: - page = __read_cache_page(mapping, index, filler, data); + page = __read_cache_page(mapping, index, filler, data, gfp); if (IS_ERR(page)) return page; if (PageUptodate(page)) @@ -1710,8 +1698,67 @@ out: mark_page_accessed(page); return page; } + +/** + * read_cache_page_async - read into page cache, fill it if needed + * @mapping: the page's address_space + * @index: the page index + * @filler: function to perform the read + * @data: destination for read data + * + * Same as read_cache_page, but don't wait for page to become unlocked + * after submitting it to the filler. + * + * Read into the page cache. If a page already exists, and PageUptodate() is + * not set, try to fill the page but don't wait for it to become unlocked. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page_async(struct address_space *mapping, + pgoff_t index, + int (*filler)(void *,struct page*), + void *data) +{ + return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); +} EXPORT_SYMBOL(read_cache_page_async); +static struct page *wait_on_page_read(struct page *page) +{ + if (!IS_ERR(page)) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + page_cache_release(page); + page = ERR_PTR(-EIO); + } + } + return page; +} + +/** + * read_cache_page_gfp - read into page cache, using specified page allocation flags. + * @mapping: the page's address_space + * @index: the page index + * @gfp: the page allocator flags to use if allocating + * + * This is the same as "read_mapping_page(mapping, index, NULL)", but with + * any new page allocations done using the specified allocation flags. Note + * that the Radix tree operations will still use GFP_KERNEL, so you can't + * expect to do this atomically or anything like that - but you can pass in + * other page requirements. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page_gfp(struct address_space *mapping, + pgoff_t index, + gfp_t gfp) +{ + filler_t *filler = (filler_t *)mapping->a_ops->readpage; + + return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); +} +EXPORT_SYMBOL(read_cache_page_gfp); + /** * read_cache_page - read into page cache, fill it if needed * @mapping: the page's address_space @@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping, int (*filler)(void *,struct page*), void *data) { - struct page *page; - - page = read_cache_page_async(mapping, index, filler, data); - if (IS_ERR(page)) - goto out; - wait_on_page_locked(page); - if (!PageUptodate(page)) { - page_cache_release(page); - page = ERR_PTR(-EIO); - } - out: - return page; + return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); } EXPORT_SYMBOL(read_cache_page); @@ -2196,6 +2232,9 @@ again: if (unlikely(status)) break; + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); pagefault_enable(); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e91b81b6367..2d16fa6b8c2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = { .attrs = hstate_attrs, }; -static int __init hugetlb_sysfs_add_hstate(struct hstate *h, - struct kobject *parent, - struct kobject **hstate_kobjs, - struct attribute_group *hstate_attr_group) +static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, + struct kobject **hstate_kobjs, + struct attribute_group *hstate_attr_group) { int retval; int hi = h - hstates; diff --git a/mm/migrate.c b/mm/migrate.c index efddbf0926b..880bd592d38 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, goto out_pm; err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_pm; + if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; @@ -999,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; - unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; - int err; - for (i = 0; i < nr_pages; i += chunk_nr) { - if (chunk_nr > nr_pages - i) - chunk_nr = nr_pages - i; + while (nr_pages) { + unsigned long chunk_nr; - err = copy_from_user(chunk_pages, &pages[i], - chunk_nr * sizeof(*chunk_pages)); - if (err) { - err = -EFAULT; - goto out; - } + chunk_nr = nr_pages; + if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) + chunk_nr = DO_PAGES_STAT_CHUNK_NR; + + if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) + break; do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - err = copy_to_user(&status[i], chunk_status, - chunk_nr * sizeof(*chunk_status)); - if (err) { - err = -EFAULT; - goto out; - } - } - err = 0; + if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + break; -out: - return err; + pages += chunk_nr; + status += chunk_nr; + nr_pages -= chunk_nr; + } + return nr_pages ? -EFAULT : 0; } /* diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f52481b1c1e..237050478f2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, list_for_each_entry(c, &p->children, sibling) { if (c->mm == p->mm) continue; + if (mem && !task_in_mem_cgroup(c, mem)) + continue; if (!oom_kill_task(c)) return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d2a8889b4c5..8deb9d0fd5b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, page = list_entry(list->prev, struct page, lru); /* must delete as __free_one_page list manipulates */ list_del(&page->lru); - __free_one_page(page, zone, 0, migratetype); - trace_mm_page_pcpu_drain(page, 0, migratetype); + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ + __free_one_page(page, zone, 0, page_private(page)); + trace_mm_page_pcpu_drain(page, 0, page_private(page)); } while (--count && --batch_free && !list_empty(list)); } spin_unlock(&zone->lock); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d55d905463e..ae007462b7f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void) static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); +/* for per-CPU blocks */ +static void purge_fragmented_blocks_allcpus(void); + /* * Purges all lazily-freed vmap areas. * @@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, } else spin_lock(&purge_lock); + if (sync) + purge_fragmented_blocks_allcpus(); + rcu_read_lock(); list_for_each_entry_rcu(va, &vmap_area_list, list) { if (va->flags & VM_LAZY_FREE) { @@ -667,8 +673,6 @@ static bool vmap_initialized __read_mostly = false; struct vmap_block_queue { spinlock_t lock; struct list_head free; - struct list_head dirty; - unsigned int nr_dirty; }; struct vmap_block { @@ -678,10 +682,9 @@ struct vmap_block { unsigned long free, dirty; DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); - union { - struct list_head free_list; - struct rcu_head rcu_head; - }; + struct list_head free_list; + struct rcu_head rcu_head; + struct list_head purge; }; /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ @@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) vbq = &get_cpu_var(vmap_block_queue); vb->vbq = vbq; spin_lock(&vbq->lock); - list_add(&vb->free_list, &vbq->free); + list_add_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); put_cpu_var(vmap_block_queue); @@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb) struct vmap_block *tmp; unsigned long vb_idx; - BUG_ON(!list_empty(&vb->free_list)); - vb_idx = addr_to_vb_idx(vb->va->va_start); spin_lock(&vmap_block_tree_lock); tmp = radix_tree_delete(&vmap_block_tree, vb_idx); @@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb) call_rcu(&vb->rcu_head, rcu_free_vb); } +static void purge_fragmented_blocks(int cpu) +{ + LIST_HEAD(purge); + struct vmap_block *vb; + struct vmap_block *n_vb; + struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); + + rcu_read_lock(); + list_for_each_entry_rcu(vb, &vbq->free, free_list) { + + if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) + continue; + + spin_lock(&vb->lock); + if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { + vb->free = 0; /* prevent further allocs after releasing lock */ + vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ + bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); + bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); + spin_lock(&vbq->lock); + list_del_rcu(&vb->free_list); + spin_unlock(&vbq->lock); + spin_unlock(&vb->lock); + list_add_tail(&vb->purge, &purge); + } else + spin_unlock(&vb->lock); + } + rcu_read_unlock(); + + list_for_each_entry_safe(vb, n_vb, &purge, purge) { + list_del(&vb->purge); + free_vmap_block(vb); + } +} + +static void purge_fragmented_blocks_thiscpu(void) +{ + purge_fragmented_blocks(smp_processor_id()); +} + +static void purge_fragmented_blocks_allcpus(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + purge_fragmented_blocks(cpu); +} + static void *vb_alloc(unsigned long size, gfp_t gfp_mask) { struct vmap_block_queue *vbq; struct vmap_block *vb; unsigned long addr = 0; unsigned int order; + int purge = 0; BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); @@ -806,24 +856,38 @@ again: int i; spin_lock(&vb->lock); + if (vb->free < 1UL << order) + goto next; + i = bitmap_find_free_region(vb->alloc_map, VMAP_BBMAP_BITS, order); - if (i >= 0) { - addr = vb->va->va_start + (i << PAGE_SHIFT); - BUG_ON(addr_to_vb_idx(addr) != - addr_to_vb_idx(vb->va->va_start)); - vb->free -= 1UL << order; - if (vb->free == 0) { - spin_lock(&vbq->lock); - list_del_init(&vb->free_list); - spin_unlock(&vbq->lock); + if (i < 0) { + if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { + /* fragmented and no outstanding allocations */ + BUG_ON(vb->dirty != VMAP_BBMAP_BITS); + purge = 1; } - spin_unlock(&vb->lock); - break; + goto next; } + addr = vb->va->va_start + (i << PAGE_SHIFT); + BUG_ON(addr_to_vb_idx(addr) != + addr_to_vb_idx(vb->va->va_start)); + vb->free -= 1UL << order; + if (vb->free == 0) { + spin_lock(&vbq->lock); + list_del_rcu(&vb->free_list); + spin_unlock(&vbq->lock); + } + spin_unlock(&vb->lock); + break; +next: spin_unlock(&vb->lock); } + + if (purge) + purge_fragmented_blocks_thiscpu(); + put_cpu_var(vmap_block_queue); rcu_read_unlock(); @@ -860,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(!vb); spin_lock(&vb->lock); - bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); + BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); vb->dirty += 1UL << order; if (vb->dirty == VMAP_BBMAP_BITS) { - BUG_ON(vb->free || !list_empty(&vb->free_list)); + BUG_ON(vb->free); spin_unlock(&vb->lock); free_vmap_block(vb); } else @@ -1033,8 +1097,6 @@ void __init vmalloc_init(void) vbq = &per_cpu(vmap_block_queue, i); spin_lock_init(&vbq->lock); INIT_LIST_HEAD(&vbq->free); - INIT_LIST_HEAD(&vbq->dirty); - vbq->nr_dirty = 0; } /* Import existing vmlist entries. */ |