aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c103
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/migrate.c39
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/vmalloc.c110
6 files changed, 183 insertions, 83 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6c..698ea80f210 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
static struct page *__read_cache_page(struct address_space *mapping,
pgoff_t index,
int (*filler)(void *,struct page*),
- void *data)
+ void *data,
+ gfp_t gfp)
{
struct page *page;
int err;
repeat:
page = find_get_page(mapping, index);
if (!page) {
- page = page_cache_alloc_cold(mapping);
+ page = __page_cache_alloc(gfp | __GFP_COLD);
if (!page)
return ERR_PTR(-ENOMEM);
err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
return page;
}
-/**
- * read_cache_page_async - read into page cache, fill it if needed
- * @mapping: the page's address_space
- * @index: the page index
- * @filler: function to perform the read
- * @data: destination for read data
- *
- * Same as read_cache_page, but don't wait for page to become unlocked
- * after submitting it to the filler.
- *
- * Read into the page cache. If a page already exists, and PageUptodate() is
- * not set, try to fill the page but don't wait for it to become unlocked.
- *
- * If the page does not get brought uptodate, return -EIO.
- */
-struct page *read_cache_page_async(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
pgoff_t index,
int (*filler)(void *,struct page*),
- void *data)
+ void *data,
+ gfp_t gfp)
+
{
struct page *page;
int err;
retry:
- page = __read_cache_page(mapping, index, filler, data);
+ page = __read_cache_page(mapping, index, filler, data, gfp);
if (IS_ERR(page))
return page;
if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
mark_page_accessed(page);
return page;
}
+
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping: the page's address_space
+ * @index: the page index
+ * @filler: function to perform the read
+ * @data: destination for read data
+ *
+ * Same as read_cache_page, but don't wait for page to become unlocked
+ * after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_async(struct address_space *mapping,
+ pgoff_t index,
+ int (*filler)(void *,struct page*),
+ void *data)
+{
+ return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+}
EXPORT_SYMBOL(read_cache_page_async);
+static struct page *wait_on_page_read(struct page *page)
+{
+ if (!IS_ERR(page)) {
+ wait_on_page_locked(page);
+ if (!PageUptodate(page)) {
+ page_cache_release(page);
+ page = ERR_PTR(-EIO);
+ }
+ }
+ return page;
+}
+
+/**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping: the page's address_space
+ * @index: the page index
+ * @gfp: the page allocator flags to use if allocating
+ *
+ * This is the same as "read_mapping_page(mapping, index, NULL)", but with
+ * any new page allocations done using the specified allocation flags. Note
+ * that the Radix tree operations will still use GFP_KERNEL, so you can't
+ * expect to do this atomically or anything like that - but you can pass in
+ * other page requirements.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_gfp(struct address_space *mapping,
+ pgoff_t index,
+ gfp_t gfp)
+{
+ filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+ return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
+}
+EXPORT_SYMBOL(read_cache_page_gfp);
+
/**
* read_cache_page - read into page cache, fill it if needed
* @mapping: the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
int (*filler)(void *,struct page*),
void *data)
{
- struct page *page;
-
- page = read_cache_page_async(mapping, index, filler, data);
- if (IS_ERR(page))
- goto out;
- wait_on_page_locked(page);
- if (!PageUptodate(page)) {
- page_cache_release(page);
- page = ERR_PTR(-EIO);
- }
- out:
- return page;
+ return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
}
EXPORT_SYMBOL(read_cache_page);
@@ -2196,6 +2232,9 @@ again:
if (unlikely(status))
break;
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
pagefault_disable();
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
pagefault_enable();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e91b81b6367..2d16fa6b8c2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = {
.attrs = hstate_attrs,
};
-static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
- struct kobject *parent,
- struct kobject **hstate_kobjs,
- struct attribute_group *hstate_attr_group)
+static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
+ struct kobject **hstate_kobjs,
+ struct attribute_group *hstate_attr_group)
{
int retval;
int hi = h - hstates;
diff --git a/mm/migrate.c b/mm/migrate.c
index efddbf0926b..880bd592d38 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
goto out_pm;
err = -ENODEV;
+ if (node < 0 || node >= MAX_NUMNODES)
+ goto out_pm;
+
if (!node_state(node, N_HIGH_MEMORY))
goto out_pm;
@@ -999,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
#define DO_PAGES_STAT_CHUNK_NR 16
const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
int chunk_status[DO_PAGES_STAT_CHUNK_NR];
- unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
- int err;
- for (i = 0; i < nr_pages; i += chunk_nr) {
- if (chunk_nr > nr_pages - i)
- chunk_nr = nr_pages - i;
+ while (nr_pages) {
+ unsigned long chunk_nr;
- err = copy_from_user(chunk_pages, &pages[i],
- chunk_nr * sizeof(*chunk_pages));
- if (err) {
- err = -EFAULT;
- goto out;
- }
+ chunk_nr = nr_pages;
+ if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
+ chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+
+ if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
+ break;
do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
- err = copy_to_user(&status[i], chunk_status,
- chunk_nr * sizeof(*chunk_status));
- if (err) {
- err = -EFAULT;
- goto out;
- }
- }
- err = 0;
+ if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
+ break;
-out:
- return err;
+ pages += chunk_nr;
+ status += chunk_nr;
+ nr_pages -= chunk_nr;
+ }
+ return nr_pages ? -EFAULT : 0;
}
/*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52481b1c1e..237050478f2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
list_for_each_entry(c, &p->children, sibling) {
if (c->mm == p->mm)
continue;
+ if (mem && !task_in_mem_cgroup(c, mem))
+ continue;
if (!oom_kill_task(c))
return 0;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d2a8889b4c5..8deb9d0fd5b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
- __free_one_page(page, zone, 0, migratetype);
- trace_mm_page_pcpu_drain(page, 0, migratetype);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, page_private(page));
+ trace_mm_page_pcpu_drain(page, 0, page_private(page));
} while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d55d905463e..ae007462b7f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void)
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks_allcpus(void);
+
/*
* Purges all lazily-freed vmap areas.
*
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
} else
spin_lock(&purge_lock);
+ if (sync)
+ purge_fragmented_blocks_allcpus();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
@@ -667,8 +673,6 @@ static bool vmap_initialized __read_mostly = false;
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
@@ -678,10 +682,9 @@ struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
@@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb)
struct vmap_block *tmp;
unsigned long vb_idx;
- BUG_ON(!list_empty(&vb->free_list));
-
vb_idx = addr_to_vb_idx(vb->va->va_start);
spin_lock(&vmap_block_tree_lock);
tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb)
call_rcu(&vb->rcu_head, rcu_free_vb);
}
+static void purge_fragmented_blocks(int cpu)
+{
+ LIST_HEAD(purge);
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
+static void purge_fragmented_blocks_thiscpu(void)
+{
+ purge_fragmented_blocks(smp_processor_id());
+}
+
+static void purge_fragmented_blocks_allcpus(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ purge_fragmented_blocks(cpu);
+}
+
static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -806,24 +856,38 @@ again:
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
+ if (i < 0) {
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
+ /* fragmented and no outstanding allocations */
+ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
+ purge = 1;
}
- spin_unlock(&vb->lock);
- break;
+ goto next;
}
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+ break;
+next:
spin_unlock(&vb->lock);
}
+
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -860,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size)
BUG_ON(!vb);
spin_lock(&vb->lock);
- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
- BUG_ON(vb->free || !list_empty(&vb->free_list));
+ BUG_ON(vb->free);
spin_unlock(&vb->lock);
free_vmap_block(vb);
} else
@@ -1033,8 +1097,6 @@ void __init vmalloc_init(void)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */