aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c155
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mlock.c48
-rw-r--r--mm/mmap.c79
-rw-r--r--mm/nommu.c24
-rw-r--r--mm/page-writeback.c21
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/swapfile.c5
-rw-r--r--mm/vmalloc.c20
10 files changed, 203 insertions, 155 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2996b80601..8e4be9cb2a6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
static void mem_cgroup_get(struct mem_cgroup *mem);
static void mem_cgroup_put(struct mem_cgroup *mem);
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
struct page_cgroup *pc,
@@ -358,6 +359,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
return;
pc = lookup_page_cgroup(page);
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
smp_rmb();
/* unused page is not rotated. */
if (!PageCgroupUsed(pc))
@@ -374,7 +379,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
- /* barrier to sync with "charge" */
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
smp_rmb();
if (!PageCgroupUsed(pc))
return;
@@ -559,6 +567,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
return NULL;
pc = lookup_page_cgroup(page);
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
+ smp_rmb();
+ if (!PageCgroupUsed(pc))
+ return NULL;
+
mz = page_cgroup_zoneinfo(pc);
if (!mz)
return NULL;
@@ -618,7 +634,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
* called with hierarchy_mutex held
*/
static struct mem_cgroup *
-mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
+__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
{
struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
@@ -629,19 +645,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
/*
* Walk down to children
*/
- mem_cgroup_put(curr);
cgroup = list_entry(curr_cgroup->children.next,
struct cgroup, sibling);
curr = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(curr);
goto done;
}
visit_parent:
if (curr_cgroup == root_cgroup) {
- mem_cgroup_put(curr);
- curr = root_mem;
- mem_cgroup_get(curr);
+ /* caller handles NULL case */
+ curr = NULL;
goto done;
}
@@ -649,11 +662,9 @@ visit_parent:
* Goto next sibling
*/
if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
- mem_cgroup_put(curr);
cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
sibling);
curr = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(curr);
goto done;
}
@@ -664,7 +675,6 @@ visit_parent:
goto visit_parent;
done:
- root_mem->last_scanned_child = curr;
return curr;
}
@@ -674,40 +684,46 @@ done:
* that to reclaim free pages from.
*/
static struct mem_cgroup *
-mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
+mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
{
struct cgroup *cgroup;
- struct mem_cgroup *ret;
+ struct mem_cgroup *orig, *next;
bool obsolete;
- obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
-
/*
* Scan all children under the mem_cgroup mem
*/
mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
+
+ orig = root_mem->last_scanned_child;
+ obsolete = mem_cgroup_is_obsolete(orig);
+
if (list_empty(&root_mem->css.cgroup->children)) {
- ret = root_mem;
+ /*
+ * root_mem might have children before and last_scanned_child
+ * may point to one of them. We put it later.
+ */
+ if (orig)
+ VM_BUG_ON(!obsolete);
+ next = NULL;
goto done;
}
- if (!root_mem->last_scanned_child || obsolete) {
-
- if (obsolete && root_mem->last_scanned_child)
- mem_cgroup_put(root_mem->last_scanned_child);
-
+ if (!orig || obsolete) {
cgroup = list_first_entry(&root_mem->css.cgroup->children,
struct cgroup, sibling);
- ret = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(ret);
+ next = mem_cgroup_from_cont(cgroup);
} else
- ret = mem_cgroup_get_next_node(root_mem->last_scanned_child,
- root_mem);
+ next = __mem_cgroup_get_next_node(orig, root_mem);
done:
- root_mem->last_scanned_child = ret;
+ if (next)
+ mem_cgroup_get(next);
+ root_mem->last_scanned_child = next;
+ if (orig)
+ mem_cgroup_put(orig);
mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
- return ret;
+ return (next) ? next : root_mem;
}
static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -758,28 +774,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
* but there might be left over accounting, even after children
* have left.
*/
- ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
+ ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
get_swappiness(root_mem));
if (mem_cgroup_check_under_limit(root_mem))
- return 0;
+ return 1; /* indicate reclaim has succeeded */
if (!root_mem->use_hierarchy)
return ret;
- next_mem = mem_cgroup_get_first_node(root_mem);
+ next_mem = mem_cgroup_get_next_node(root_mem);
while (next_mem != root_mem) {
if (mem_cgroup_is_obsolete(next_mem)) {
- mem_cgroup_put(next_mem);
- next_mem = mem_cgroup_get_first_node(root_mem);
+ next_mem = mem_cgroup_get_next_node(root_mem);
continue;
}
- ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
+ ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
get_swappiness(next_mem));
if (mem_cgroup_check_under_limit(root_mem))
- return 0;
- mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
- next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
- mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
+ return 1; /* indicate reclaim has succeeded */
+ next_mem = mem_cgroup_get_next_node(root_mem);
}
return ret;
}
@@ -863,6 +876,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
noswap);
+ if (ret)
+ continue;
/*
* try_to_free_mem_cgroup_pages() might not give us a full
@@ -979,14 +994,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
if (pc->mem_cgroup != from)
goto out;
- css_put(&from->css);
res_counter_uncharge(&from->res, PAGE_SIZE);
mem_cgroup_charge_statistics(from, pc, false);
if (do_swap_account)
res_counter_uncharge(&from->memsw, PAGE_SIZE);
+ css_put(&from->css);
+
+ css_get(&to->css);
pc->mem_cgroup = to;
mem_cgroup_charge_statistics(to, pc, true);
- css_get(&to->css);
ret = 0;
out:
unlock_page_cgroup(pc);
@@ -1019,8 +1035,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
if (ret || !parent)
return ret;
- if (!get_page_unless_zero(page))
- return -EBUSY;
+ if (!get_page_unless_zero(page)) {
+ ret = -EBUSY;
+ goto uncharge;
+ }
ret = isolate_lru_page(page);
@@ -1029,19 +1047,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
ret = mem_cgroup_move_account(pc, child, parent);
- /* drop extra refcnt by try_charge() (move_account increment one) */
- css_put(&parent->css);
putback_lru_page(page);
if (!ret) {
put_page(page);
+ /* drop extra refcnt by try_charge() */
+ css_put(&parent->css);
return 0;
}
- /* uncharge if move fails */
+
cancel:
+ put_page(page);
+uncharge:
+ /* drop extra refcnt by try_charge() */
+ css_put(&parent->css);
+ /* uncharge if move fails */
res_counter_uncharge(&parent->res, PAGE_SIZE);
if (do_swap_account)
res_counter_uncharge(&parent->memsw, PAGE_SIZE);
- put_page(page);
return ret;
}
@@ -1663,7 +1685,7 @@ move_account:
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
ret = 0;
- for_each_node_state(node, N_POSSIBLE) {
+ for_each_node_state(node, N_HIGH_MEMORY) {
for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
enum lru_list l;
for_each_lru(l) {
@@ -1971,6 +1993,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent;
+
if (val > 100)
return -EINVAL;
@@ -1978,15 +2001,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
return -EINVAL;
parent = mem_cgroup_from_cont(cgrp->parent);
+
+ cgroup_lock();
+
/* If under hierarchy, only empty-root can set this value */
if ((parent->use_hierarchy) ||
- (memcg->use_hierarchy && !list_empty(&cgrp->children)))
+ (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
+ cgroup_unlock();
return -EINVAL;
+ }
spin_lock(&memcg->reclaim_param_lock);
memcg->swappiness = val;
spin_unlock(&memcg->reclaim_param_lock);
+ cgroup_unlock();
+
return 0;
}
@@ -2164,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem)
static void mem_cgroup_put(struct mem_cgroup *mem)
{
- if (atomic_dec_and_test(&mem->refcnt))
+ if (atomic_dec_and_test(&mem->refcnt)) {
+ struct mem_cgroup *parent = parent_mem_cgroup(mem);
__mem_cgroup_free(mem);
+ if (parent)
+ mem_cgroup_put(parent);
+ }
}
+/*
+ * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
+ */
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
+{
+ if (!mem->res.parent)
+ return NULL;
+ return mem_cgroup_from_res_counter(mem->res.parent, res);
+}
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static void __init enable_swap_cgroup(void)
@@ -2181,7 +2224,7 @@ static void __init enable_swap_cgroup(void)
}
#endif
-static struct cgroup_subsys_state *
+static struct cgroup_subsys_state * __ref
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct mem_cgroup *mem, *parent;
@@ -2206,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
if (parent && parent->use_hierarchy) {
res_counter_init(&mem->res, &parent->res);
res_counter_init(&mem->memsw, &parent->memsw);
+ /*
+ * We increment refcnt of the parent to ensure that we can
+ * safely access it on res_counter_charge/uncharge.
+ * This refcnt will be decremented when freeing this
+ * mem_cgroup(see mem_cgroup_put).
+ */
+ mem_cgroup_get(parent);
} else {
res_counter_init(&mem->res, NULL);
res_counter_init(&mem->memsw, NULL);
@@ -2232,7 +2282,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
static void mem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- mem_cgroup_put(mem_cgroup_from_cont(cont));
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
+
+ if (last_scanned_child) {
+ VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
+ mem_cgroup_put(last_scanned_child);
+ }
+ mem_cgroup_put(mem);
}
static int mem_cgroup_populate(struct cgroup_subsys *ss,
diff --git a/mm/memory.c b/mm/memory.c
index 22bfa7a47a0..baa999e87cd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1999,7 +1999,7 @@ gotten:
* Don't let another task, with possibly unlocked vma,
* keep the mlocked page.
*/
- if (vma->vm_flags & VM_LOCKED) {
+ if ((vma->vm_flags & VM_LOCKED) && old_page) {
lock_page(old_page); /* for LRU manipulation */
clear_page_mlock(old_page);
unlock_page(old_page);
diff --git a/mm/mlock.c b/mm/mlock.c
index 2904a347e47..037161d61b4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
*
* return number of pages [> 0] to be removed from locked_vm on success
* of "special" vmas.
- *
- * return negative error if vma spanning @start-@range disappears while
- * mmap semaphore is dropped. Unlikely?
*/
long mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- struct mm_struct *mm = vma->vm_mm;
int nr_pages = (end - start) / PAGE_SIZE;
BUG_ON(!(vma->vm_flags & VM_LOCKED));
@@ -314,20 +310,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current))) {
- long error;
- downgrade_write(&mm->mmap_sem);
-
- error = __mlock_vma_pages_range(vma, start, end, 1);
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- vma = find_vma(mm, start);
- /* non-NULL vma must contain @start, but need to check @end */
- if (!vma || end > vma->vm_end)
- return -ENOMEM;
+ __mlock_vma_pages_range(vma, start, end, 1);
- return 0; /* hide other errors from mmap(), et al */
+ /* Hide errors from mmap() and other callers */
+ return 0;
}
/*
@@ -438,41 +425,14 @@ success:
vma->vm_flags = newflags;
if (lock) {
- /*
- * mmap_sem is currently held for write. Downgrade the write
- * lock to a read lock so that other faults, mmap scans, ...
- * while we fault in all pages.
- */
- downgrade_write(&mm->mmap_sem);
-
ret = __mlock_vma_pages_range(vma, start, end, 1);
- /*
- * Need to reacquire mmap sem in write mode, as our callers
- * expect this. We have no support for atomically upgrading
- * a sem to write, so we need to check for ranges while sem
- * is unlocked.
- */
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- *prev = find_vma(mm, start);
- /* non-NULL *prev must contain @start, but need to check @end */
- if (!(*prev) || end > (*prev)->vm_end)
- ret = -ENOMEM;
- else if (ret > 0) {
+ if (ret > 0) {
mm->locked_vm -= ret;
ret = 0;
} else
ret = __mlock_posix_error_return(ret); /* translate if needed */
} else {
- /*
- * TODO: for unlocking, pages will already be resident, so
- * we don't need to wait for allocations/reclaim/pagein, ...
- * However, unlocking a very large region can still take a
- * while. Should we downgrade the semaphore for both lock
- * AND unlock ?
- */
__mlock_vma_pages_range(vma, start, end, 0);
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d95902e9a3..214b6a258ee 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end);
validate_mm(mm);
}
+/* Flags that can be inherited from an existing mapping when merging */
+#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
+
/*
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
@@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end);
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
{
- if (vma->vm_flags != vm_flags)
+ if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
return 0;
if (vma->vm_file != file)
return 0;
@@ -1087,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
mapping_cap_account_dirty(vma->vm_file->f_mapping);
}
+/*
+ * We account for memory if it's a private writeable mapping,
+ * and VM_NORESERVE wasn't set.
+ */
+static inline int accountable_mapping(unsigned int vm_flags)
+{
+ return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
+}
+
unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, unsigned long flags,
unsigned int vm_flags, unsigned long pgoff,
@@ -1114,36 +1126,32 @@ munmap_back:
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
- if (flags & MAP_NORESERVE)
+ /*
+ * Set 'VM_NORESERVE' if we should not account for the
+ * memory use of this mapping. We only honor MAP_NORESERVE
+ * if we're allowed to overcommit memory.
+ */
+ if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+ vm_flags |= VM_NORESERVE;
+ if (!accountable)
vm_flags |= VM_NORESERVE;
- if (accountable && (!(flags & MAP_NORESERVE) ||
- sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
- if (vm_flags & VM_SHARED) {
- /* Check memory availability in shmem_file_setup? */
- vm_flags |= VM_ACCOUNT;
- } else if (vm_flags & VM_WRITE) {
- /*
- * Private writable mapping: check memory availability
- */
- charged = len >> PAGE_SHIFT;
- if (security_vm_enough_memory(charged))
- return -ENOMEM;
- vm_flags |= VM_ACCOUNT;
- }
+ /*
+ * Private writable mapping: check memory availability
+ */
+ if (accountable_mapping(vm_flags)) {
+ charged = len >> PAGE_SHIFT;
+ if (security_vm_enough_memory(charged))
+ return -ENOMEM;
+ vm_flags |= VM_ACCOUNT;
}
/*
- * Can we just expand an old private anonymous mapping?
- * The VM_SHARED test is necessary because shmem_zero_setup
- * will create the file object for a shared anonymous map below.
+ * Can we just expand an old mapping?
*/
- if (!file && !(vm_flags & VM_SHARED)) {
- vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
- NULL, NULL, pgoff, NULL);
- if (vma)
- goto out;
- }
+ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
+ if (vma)
+ goto out;
/*
* Determine the object being mapped and call the appropriate
@@ -1186,14 +1194,6 @@ munmap_back:
goto free_vma;
}
- /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
- * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
- * that memory reservation must be checked; but that reservation
- * belongs to shared memory object, not to vma: so now clear it.
- */
- if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
- vma->vm_flags &= ~VM_ACCOUNT;
-
/* Can addr have changed??
*
* Answer: Yes, several device drivers can do it in their
@@ -1206,17 +1206,8 @@ munmap_back:
if (vma_wants_writenotify(vma))
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
- if (file && vma_merge(mm, prev, addr, vma->vm_end,
- vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
- mpol_put(vma_policy(vma));
- kmem_cache_free(vm_area_cachep, vma);
- fput(file);
- if (vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(mm);
- } else {
- vma_link(mm, vma, prev, rb_link, rb_parent);
- file = vma->vm_file;
- }
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ file = vma->vm_file;
/* Once vma denies write, undo our temporary denial count */
if (correct_wcount)
diff --git a/mm/nommu.c b/mm/nommu.c
index 8cee8c8ff0f..2fcf47d449b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -10,7 +10,7 @@
* Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
* Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
* Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
- * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org>
+ * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org>
*/
#include <linux/module.h>
@@ -394,6 +394,24 @@ void vunmap(const void *addr)
}
EXPORT_SYMBOL(vunmap);
+void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+{
+ BUG();
+ return NULL;
+}
+EXPORT_SYMBOL(vm_map_ram);
+
+void vm_unmap_ram(const void *mem, unsigned int count)
+{
+ BUG();
+}
+EXPORT_SYMBOL(vm_unmap_ram);
+
+void vm_unmap_aliases(void)
+{
+}
+EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
/*
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
@@ -1143,8 +1161,8 @@ error_free:
return ret;
enomem:
- printk("Allocation of length %lu from process %d failed\n",
- len, current->pid);
+ printk("Allocation of length %lu from process %d (%s) failed\n",
+ len, current->pid, current->comm);
show_free_areas();
return -ENOMEM;
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b493db7841d..dc32dae01e5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1051,13 +1051,22 @@ continue_unlock:
}
}
- if (wbc->sync_mode == WB_SYNC_NONE) {
- wbc->nr_to_write--;
- if (wbc->nr_to_write <= 0) {
- done = 1;
- break;
- }
+ if (nr_to_write > 0)
+ nr_to_write--;
+ else if (wbc->sync_mode == WB_SYNC_NONE) {
+ /*
+ * We stop writing back only if we are not
+ * doing integrity sync. In case of integrity
+ * sync we have to keep going because someone
+ * may be concurrently dirtying pages, and we
+ * might have synced a lot of newly appeared
+ * dirty pages, but have not synced all of the
+ * old dirty pages.
+ */
+ done = 1;
+ break;
}
+
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
done = 1;
diff --git a/mm/shmem.c b/mm/shmem.c
index 5d0de96c978..19d566ccdee 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
goto close_file;
#ifdef CONFIG_SHMEM
- SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
+ SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
#endif
d_instantiate(dentry, inode);
inode->i_size = size;
diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5cc6b..bdc9abb08a2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1996,7 +1996,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
{
if (c < per_cpu(kmem_cache_cpu, cpu) ||
- c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
+ c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
kfree(c);
return;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f48b831e5e5..7e6304dfafa 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -698,8 +698,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
pte_t *pte;
int ret = 1;
- if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr))
+ if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
ret = -ENOMEM;
+ goto out_nolock;
+ }
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
@@ -723,6 +725,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
activate_page(page);
out:
pte_unmap_unlock(pte, ptl);
+out_nolock:
return ret;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c5db9a7264d..75f49d312e8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -14,7 +14,6 @@
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/mutex.h>
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -24,6 +23,7 @@
#include <linux/rbtree.h>
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
+#include <linux/bootmem.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
@@ -495,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
int sync, int force_flush)
{
- static DEFINE_MUTEX(purge_lock);
+ static DEFINE_SPINLOCK(purge_lock);
LIST_HEAD(valist);
struct vmap_area *va;
int nr = 0;
@@ -506,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
* the case that isn't actually used at the moment anyway.
*/
if (!sync && !force_flush) {
- if (!mutex_trylock(&purge_lock))
+ if (!spin_trylock(&purge_lock))
return;
} else
- mutex_lock(&purge_lock);
+ spin_lock(&purge_lock);
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
@@ -541,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
__free_vmap_area(va);
spin_unlock(&vmap_area_lock);
}
- mutex_unlock(&purge_lock);
+ spin_unlock(&purge_lock);
}
/*
@@ -984,6 +984,8 @@ EXPORT_SYMBOL(vm_map_ram);
void __init vmalloc_init(void)
{
+ struct vmap_area *va;
+ struct vm_struct *tmp;
int i;
for_each_possible_cpu(i) {
@@ -996,6 +998,14 @@ void __init vmalloc_init(void)
vbq->nr_dirty = 0;
}
+ /* Import existing vmlist entries. */
+ for (tmp = vmlist; tmp; tmp = tmp->next) {
+ va = alloc_bootmem(sizeof(struct vmap_area));
+ va->flags = tmp->flags | VM_VM_AREA;
+ va->va_start = (unsigned long)tmp->addr;
+ va->va_end = va->va_start + tmp->size;
+ __insert_vmap_area(va);
+ }
vmap_initialized = true;
}