aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c22
-rw-r--r--kernel/delayacct.c6
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/disk.c63
-rw-r--r--kernel/power/main.c19
-rw-r--r--kernel/power/power.h42
-rw-r--r--kernel/power/process.c6
-rw-r--r--kernel/power/snapshot.c298
-rw-r--r--kernel/power/swap.c18
-rw-r--r--kernel/power/swsusp.c139
-rw-r--r--kernel/power/user.c39
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/taskstats.c4
18 files changed, 498 insertions, 209 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f382b0f775e..d240349cbf0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
+ * If the task has been OOM killed and has access to memory reserves
+ * as specified by the TIF_MEMDIE flag, yes.
* Otherwise, no.
*
* If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
@@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* calls get to this routine, we should just shut up and say 'yes'.
*
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
- * and do not allow allocations outside the current tasks cpuset.
+ * and do not allow allocations outside the current tasks cpuset
+ * unless the task has been OOM killed as is marked TIF_MEMDIE.
* GFP_KERNEL allocations are not so marked, so can escape to the
* nearest enclosing mem_exclusive ancestor cpuset.
*
@@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* affect that:
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
+ * TIF_MEMDIE - any node ok
* GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*
@@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return 1;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
return 0;
@@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
*
* If we're in interrupt, yes, we can always allocate.
* If __GFP_THISNODE is set, yes, we can always allocate. If zone
- * z's node is in our tasks mems_allowed, yes. Otherwise, no.
+ * z's node is in our tasks mems_allowed, yes. If the task has been
+ * OOM killed and has access to memory reserves as specified by the
+ * TIF_MEMDIE flag, yes. Otherwise, no.
*
* The __GFP_THISNODE placement logic is really handled elsewhere,
* by forcibly using a zonelist starting at a specified node, and by
@@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
node = zone_to_nid(z);
if (node_isset(node, current->mems_allowed))
return 1;
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return 1;
return 0;
}
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 766d5912b26..c0148ae992c 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -31,11 +31,7 @@ __setup("nodelayacct", delayacct_setup_disable);
void delayacct_init(void)
{
- delayacct_cache = kmem_cache_create("delayacct_cache",
- sizeof(struct task_delay_info),
- 0,
- SLAB_PANIC,
- NULL, NULL);
+ delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC);
delayacct_tsk_init(&init_task);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index b55ed4cc910..92369240d91 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1033,6 +1033,8 @@ asmlinkage void sys_exit_group(int error_code)
static int eligible_child(pid_t pid, int options, struct task_struct *p)
{
+ int err;
+
if (pid > 0) {
if (p->pid != pid)
return 0;
@@ -1066,8 +1068,9 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p)
if (delay_group_leader(p))
return 2;
- if (security_task_wait(p))
- return 0;
+ err = security_task_wait(p);
+ if (err)
+ return err;
return 1;
}
@@ -1449,6 +1452,7 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int flag, retval;
+ int allowed, denied;
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
@@ -1457,6 +1461,7 @@ repeat:
* match our criteria, even if we are not able to reap it yet.
*/
flag = 0;
+ allowed = denied = 0;
current->state = TASK_INTERRUPTIBLE;
read_lock(&tasklist_lock);
tsk = current;
@@ -1472,6 +1477,12 @@ repeat:
if (!ret)
continue;
+ if (unlikely(ret < 0)) {
+ denied = ret;
+ continue;
+ }
+ allowed = 1;
+
switch (p->state) {
case TASK_TRACED:
/*
@@ -1570,6 +1581,8 @@ check_continued:
goto repeat;
}
retval = -ECHILD;
+ if (unlikely(denied) && !allowed)
+ retval = denied;
end:
current->state = TASK_RUNNING;
remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c
index ffccefb28b6..b7d169def94 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1425,8 +1425,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long fl
{
struct sighand_struct *sighand = data;
- if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR)
+ if (flags & SLAB_CTOR_CONSTRUCTOR)
spin_lock_init(&sighand->siglock);
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 78f2aee90f5..9c80bc23d6b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -412,7 +412,5 @@ void __init pidmap_init(void)
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
- pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
- __alignof__(struct pid),
- SLAB_PANIC, NULL, NULL);
+ pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 51a4dd0f1b7..877721708fa 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED
are likely to be bus or driver specific.
config SOFTWARE_SUSPEND
- bool "Software Suspend"
+ bool "Software Suspend (Hibernation)"
depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
---help---
- Enable the suspend to disk (STD) functionality.
+ Enable the suspend to disk (STD) functionality, which is usually
+ called "hibernation" in user interfaces. STD checkpoints the
+ system and powers it off; and restores that checkpoint on reboot.
You can suspend your machine with 'echo disk > /sys/power/state'.
Alternatively, you can use the additional userland tools available
from <http://suspend.sf.net>.
In principle it does not require ACPI or APM, although for example
- ACPI will be used if available.
+ ACPI will be used for the final steps when it is available. One
+ of the reasons to use software suspend is that the firmware hooks
+ for suspend states like suspend-to-RAM (STR) often don't work very
+ well with Linux.
It creates an image which is saved in your active swap. Upon the next
boot, pass the 'resume=/dev/swappartition' argument to the kernel to
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 8df51c23bba..06331374d86 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -130,15 +130,25 @@ int pm_suspend_disk(void)
{
int error;
+ /* The snapshot device should not be opened while we're running */
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0))
+ return -EBUSY;
+
+ /* Allocate memory management structures */
+ error = create_basic_memory_bitmaps();
+ if (error)
+ goto Exit;
+
error = prepare_processes();
if (error)
- return error;
+ goto Finish;
if (pm_disk_mode == PM_DISK_TESTPROC) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
goto Thaw;
}
+
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (error)
@@ -196,6 +206,10 @@ int pm_suspend_disk(void)
resume_console();
Thaw:
unprepare_processes();
+ Finish:
+ free_basic_memory_bitmaps();
+ Exit:
+ atomic_inc(&snapshot_device_available);
return error;
}
@@ -239,13 +253,21 @@ static int software_resume(void)
}
pr_debug("PM: Checking swsusp image.\n");
-
error = swsusp_check();
if (error)
- goto Done;
+ goto Unlock;
- pr_debug("PM: Preparing processes for restore.\n");
+ /* The snapshot device should not be opened while we're running */
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
+ error = create_basic_memory_bitmaps();
+ if (error)
+ goto Finish;
+
+ pr_debug("PM: Preparing processes for restore.\n");
error = prepare_processes();
if (error) {
swsusp_close();
@@ -280,7 +302,11 @@ static int software_resume(void)
printk(KERN_ERR "PM: Restore failed, recovering.\n");
unprepare_processes();
Done:
+ free_basic_memory_bitmaps();
+ Finish:
+ atomic_inc(&snapshot_device_available);
/* For success case, the suspend path will release the lock */
+ Unlock:
mutex_unlock(&pm_mutex);
pr_debug("PM: Resume from disk failed.\n");
return 0;
@@ -324,7 +350,34 @@ static const char * const pm_disk_modes[] = {
static ssize_t disk_show(struct kset *kset, char *buf)
{
- return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
+ int i;
+ char *start = buf;
+
+ for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) {
+ if (!pm_disk_modes[i])
+ continue;
+ switch (i) {
+ case PM_DISK_SHUTDOWN:
+ case PM_DISK_REBOOT:
+ case PM_DISK_TEST:
+ case PM_DISK_TESTPROC:
+ break;
+ default:
+ if (pm_ops && pm_ops->enter &&
+ (i == pm_ops->pm_disk_mode))
+ break;
+ /* not a valid mode, continue with loop */
+ continue;
+ }
+ if (i == pm_disk_mode)
+ buf += sprintf(buf, "[%s]", pm_disk_modes[i]);
+ else
+ buf += sprintf(buf, "%s", pm_disk_modes[i]);
+ if (i+1 != PM_DISK_MAX)
+ buf += sprintf(buf, " ");
+ }
+ buf += sprintf(buf, "\n");
+ return buf-start;
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b21c2a56f96..f6dda685e7e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -184,17 +184,21 @@ static void suspend_finish(suspend_state_t state)
static const char * const pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
-#ifdef CONFIG_SOFTWARE_SUSPEND
[PM_SUSPEND_DISK] = "disk",
-#endif
};
static inline int valid_state(suspend_state_t state)
{
/* Suspend-to-disk does not really need low-level support.
- * It can work with reboot if needed. */
+ * It can work with shutdown/reboot if needed. If it isn't
+ * configured, then it cannot be supported.
+ */
if (state == PM_SUSPEND_DISK)
+#ifdef CONFIG_SOFTWARE_SUSPEND
return 1;
+#else
+ return 0;
+#endif
/* all other states need lowlevel support and need to be
* valid to the lowlevel implementation, no valid callback
@@ -244,15 +248,6 @@ static int enter_state(suspend_state_t state)
return error;
}
-/*
- * This is main interface to the outside world. It needs to be
- * called from process context.
- */
-int software_suspend(void)
-{
- return enter_state(PM_SUSPEND_DISK);
-}
-
/**
* pm_suspend - Externally visible function for suspending system.
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 33bd94ceba3..34b43542785 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -14,8 +14,18 @@ struct swsusp_info {
#ifdef CONFIG_SOFTWARE_SUSPEND
-extern int pm_suspend_disk(void);
+/*
+ * Keep some memory free so that I/O operations can succeed without paging
+ * [Might this be more than 4 MB?]
+ */
+#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT)
+/*
+ * Keep 1 MB of memory free so that device drivers can allocate some pages in
+ * their .suspend() routines without breaking the suspend to disk.
+ */
+#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
+extern int pm_suspend_disk(void);
#else
static inline int pm_suspend_disk(void)
{
@@ -48,6 +58,8 @@ extern sector_t swsusp_resume_block;
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
+extern int create_basic_memory_bitmaps(void);
+extern void free_basic_memory_bitmaps(void);
extern unsigned int count_data_pages(void);
/**
@@ -138,30 +150,12 @@ struct resume_swap_area {
#define PMOPS_ENTER 2
#define PMOPS_FINISH 3
-/**
- * The bitmap is used for tracing allocated swap pages
- *
- * The entire bitmap consists of a number of bitmap_page
- * structures linked with the help of the .next member.
- * Thus each page can be allocated individually, so we only
- * need to make 0-order memory allocations to create
- * the bitmap.
- */
-
-#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *))
-#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long))
-#define BITS_PER_CHUNK (sizeof(long) * 8)
-#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
-
-struct bitmap_page {
- unsigned long chunks[BITMAP_PAGE_CHUNKS];
- struct bitmap_page *next;
-};
+/* If unset, the snapshot device cannot be open. */
+extern atomic_t snapshot_device_available;
-extern void free_bitmap(struct bitmap_page *bitmap);
-extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
-extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap);
-extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
+extern sector_t alloc_swapdev_block(int swap);
+extern void free_all_swap_pages(int swap);
+extern int swsusp_swap_in_use(void);
extern int swsusp_check(void);
extern int swsusp_shrink_memory(void);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 6d566bf7085..0eb5c420e8e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -47,8 +47,10 @@ void refrigerator(void)
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
- while (frozen(current)) {
- current->state = TASK_UNINTERRUPTIBLE;
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!frozen(current))
+ break;
schedule();
}
pr_debug("%s left refrigerator\n", current->comm);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 704c25a3ffe..128da11f01c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/pm.h>
#include <linux/device.h>
+#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/console.h>
@@ -34,6 +35,10 @@
#include "power.h"
+static int swsusp_page_is_free(struct page *);
+static void swsusp_set_page_forbidden(struct page *);
+static void swsusp_unset_page_forbidden(struct page *);
+
/* List of PBEs needed for restoring the pages that were allocated before
* the suspend and included in the suspend image, but have also been
* allocated by the "resume" kernel, so their contents cannot be written
@@ -67,15 +72,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
res = (void *)get_zeroed_page(gfp_mask);
if (safe_needed)
- while (res && PageNosaveFree(virt_to_page(res))) {
+ while (res && swsusp_page_is_free(virt_to_page(res))) {
/* The page is unsafe, mark it for swsusp_free() */
- SetPageNosave(virt_to_page(res));
+ swsusp_set_page_forbidden(virt_to_page(res));
allocated_unsafe_pages++;
res = (void *)get_zeroed_page(gfp_mask);
}
if (res) {
- SetPageNosave(virt_to_page(res));
- SetPageNosaveFree(virt_to_page(res));
+ swsusp_set_page_forbidden(virt_to_page(res));
+ swsusp_set_page_free(virt_to_page(res));
}
return res;
}
@@ -91,8 +96,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask)
page = alloc_page(gfp_mask);
if (page) {
- SetPageNosave(page);
- SetPageNosaveFree(page);
+ swsusp_set_page_forbidden(page);
+ swsusp_set_page_free(page);
}
return page;
}
@@ -110,9 +115,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
page = virt_to_page(addr);
- ClearPageNosave(page);
+ swsusp_unset_page_forbidden(page);
if (clear_nosave_free)
- ClearPageNosaveFree(page);
+ swsusp_unset_page_free(page);
__free_page(page);
}
@@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
* of type unsigned long each). It also contains the pfns that
* correspond to the start and end of the represented memory area and
* the number of bit chunks in the block.
- *
- * NOTE: Memory bitmaps are used for two types of operations only:
- * "set a bit" and "find the next bit set". Moreover, the searching
- * is always carried out after all of the "set a bit" operations
- * on given bitmap.
*/
#define BM_END_OF_MAP (~0UL)
@@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
}
/**
- * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
+ * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
* to given pfn. The cur_zone_bm member of @bm and the cur_block member
* of @bm->cur_zone_bm are updated.
- *
- * If the bit cannot be set, the function returns -EINVAL .
*/
-static int
-memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
+ void **addr, unsigned int *bit_nr)
{
struct zone_bitmap *zone_bm;
struct bm_block *bb;
@@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
/* We don't assume that the zones are sorted by pfns */
while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
zone_bm = zone_bm->next;
- if (unlikely(!zone_bm))
- return -EINVAL;
+
+ BUG_ON(!zone_bm);
}
bm->cur.zone_bm = zone_bm;
}
@@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
while (pfn >= bb->end_pfn) {
bb = bb->next;
- if (unlikely(!bb))
- return -EINVAL;
+
+ BUG_ON(!bb);
}
zone_bm->cur_block = bb;
pfn -= bb->start_pfn;
- set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
- return 0;
+ *bit_nr = pfn % BM_BITS_PER_CHUNK;
+ *addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+}
+
+static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+
+ memory_bm_find_bit(bm, pfn, &addr, &bit);
+ set_bit(bit, addr);
+}
+
+static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+
+ memory_bm_find_bit(bm, pfn, &addr, &bit);
+ clear_bit(bit, addr);
+}
+
+static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+
+ memory_bm_find_bit(bm, pfn, &addr, &bit);
+ return test_bit(bit, addr);
}
/* Two auxiliary functions for memory_bm_next_pfn */
@@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
}
/**
+ * This structure represents a range of page frames the contents of which
+ * should not be saved during the suspend.
+ */
+
+struct nosave_region {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+};
+
+static LIST_HEAD(nosave_regions);
+
+/**
+ * register_nosave_region - register a range of page frames the contents
+ * of which should not be saved during the suspend (to be used in the early
+ * initialization code)
+ */
+
+void __init
+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct nosave_region *region;
+
+ if (start_pfn >= end_pfn)
+ return;
+
+ if (!list_empty(&nosave_regions)) {
+ /* Try to extend the previous region (they should be sorted) */
+ region = list_entry(nosave_regions.prev,
+ struct nosave_region, list);
+ if (region->end_pfn == start_pfn) {
+ region->end_pfn = end_pfn;
+ goto Report;
+ }
+ }
+ /* This allocation cannot fail */
+ region = alloc_bootmem_low(sizeof(struct nosave_region));
+ region->start_pfn = start_pfn;
+ region->end_pfn = end_pfn;
+ list_add_tail(&region->list, &nosave_regions);
+ Report:
+ printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
+ start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+}
+
+/*
+ * Set bits in this map correspond to the page frames the contents of which
+ * should not be saved during the suspend.
+ */
+static struct memory_bitmap *forbidden_pages_map;
+
+/* Set bits in this map correspond to free page frames. */
+static struct memory_bitmap *free_pages_map;
+
+/*
+ * Each page frame allocated for creating the image is marked by setting the
+ * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
+ */
+
+void swsusp_set_page_free(struct page *page)
+{
+ if (free_pages_map)
+ memory_bm_set_bit(free_pages_map, page_to_pfn(page));
+}
+
+static int swsusp_page_is_free(struct page *page)
+{
+ return free_pages_map ?
+ memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
+}
+
+void swsusp_unset_page_free(struct page *page)
+{
+ if (free_pages_map)
+ memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
+}
+
+static void swsusp_set_page_forbidden(struct page *page)
+{
+ if (forbidden_pages_map)
+ memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+int swsusp_page_is_forbidden(struct page *page)
+{
+ return forbidden_pages_map ?
+ memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
+}
+
+static void swsusp_unset_page_forbidden(struct page *page)
+{
+ if (forbidden_pages_map)
+ memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+/**
+ * mark_nosave_pages - set bits corresponding to the page frames the
+ * contents of which should not be saved in a given bitmap.
+ */
+
+static void mark_nosave_pages(struct memory_bitmap *bm)
+{
+ struct nosave_region *region;
+
+ if (list_empty(&nosave_regions))
+ return;
+
+ list_for_each_entry(region, &nosave_regions, list) {
+ unsigned long pfn;
+
+ printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
+ region->start_pfn << PAGE_SHIFT,
+ region->end_pfn << PAGE_SHIFT);
+
+ for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+ memory_bm_set_bit(bm, pfn);
+ }
+}
+
+/**
+ * create_basic_memory_bitmaps - create bitmaps needed for marking page
+ * frames that should not be saved and free page frames. The pointers
+ * forbidden_pages_map and free_pages_map are only modified if everything
+ * goes well, because we don't want the bits to be used before both bitmaps
+ * are set up.
+ */
+
+int create_basic_memory_bitmaps(void)
+{
+ struct memory_bitmap *bm1, *bm2;
+ int error = 0;
+
+ BUG_ON(forbidden_pages_map || free_pages_map);
+
+ bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+ if (!bm1)
+ return -ENOMEM;
+
+ error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
+ if (error)
+ goto Free_first_object;
+
+ bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+ if (!bm2)
+ goto Free_first_bitmap;
+
+ error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
+ if (error)
+ goto Free_second_object;
+
+ forbidden_pages_map = bm1;
+ free_pages_map = bm2;
+ mark_nosave_pages(forbidden_pages_map);
+
+ printk("swsusp: Basic memory bitmaps created\n");
+
+ return 0;
+
+ Free_second_object:
+ kfree(bm2);
+ Free_first_bitmap:
+ memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ Free_first_object:
+ kfree(bm1);
+ return -ENOMEM;
+}
+
+/**
+ * free_basic_memory_bitmaps - free memory bitmaps allocated by
+ * create_basic_memory_bitmaps(). The auxiliary pointers are necessary
+ * so that the bitmaps themselves are not referred to while they are being
+ * freed.
+ */
+
+void free_basic_memory_bitmaps(void)
+{
+ struct memory_bitmap *bm1, *bm2;
+
+ BUG_ON(!(forbidden_pages_map && free_pages_map));
+
+ bm1 = forbidden_pages_map;
+ bm2 = free_pages_map;
+ forbidden_pages_map = NULL;
+ free_pages_map = NULL;
+ memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ kfree(bm1);
+ memory_bm_free(bm2, PG_UNSAFE_CLEAR);
+ kfree(bm2);
+
+ printk("swsusp: Basic memory bitmaps freed\n");
+}
+
+/**
* snapshot_additional_pages - estimate the number of additional pages
* be needed for setting up the suspend image data structures for given
* zone (usually the returned value is greater than the exact number)
@@ -615,7 +833,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
BUG_ON(!PageHighMem(page));
- if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+ if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
+ PageReserved(page))
return NULL;
return page;
@@ -670,7 +889,7 @@ static struct page *saveable_page(unsigned long pfn)
BUG_ON(PageHighMem(page));
- if (PageNosave(page) || PageNosaveFree(page))
+ if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
return NULL;
if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -810,9 +1029,10 @@ void swsusp_free(void)
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
- if (PageNosave(page) && PageNosaveFree(page)) {
- ClearPageNosave(page);
- ClearPageNosaveFree(page);
+ if (swsusp_page_is_forbidden(page) &&
+ swsusp_page_is_free(page)) {
+ swsusp_unset_page_forbidden(page);
+ swsusp_unset_page_free(page);
__free_page(page);
}
}
@@ -1135,7 +1355,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn))
- ClearPageNosaveFree(pfn_to_page(pfn));
+ swsusp_unset_page_free(pfn_to_page(pfn));
}
/* Mark pages that correspond to the "original" pfns as "unsafe" */
@@ -1144,7 +1364,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
pfn = memory_bm_next_pfn(bm);
if (likely(pfn != BM_END_OF_MAP)) {
if (likely(pfn_valid(pfn)))
- SetPageNosaveFree(pfn_to_page(pfn));
+ swsusp_set_page_free(pfn_to_page(pfn));
else
return -EFAULT;
}
@@ -1310,14 +1530,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
struct page *page;
page = alloc_page(__GFP_HIGHMEM);
- if (!PageNosaveFree(page)) {
+ if (!swsusp_page_is_free(page)) {
/* The page is "safe", set its bit the bitmap */
memory_bm_set_bit(bm, page_to_pfn(page));
safe_highmem_pages++;
}
/* Mark the page as allocated */
- SetPageNosave(page);
- SetPageNosaveFree(page);
+ swsusp_set_page_forbidden(page);
+ swsusp_set_page_free(page);
}
memory_bm_position_reset(bm);
safe_highmem_bm = bm;
@@ -1349,7 +1569,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
struct highmem_pbe *pbe;
void *kaddr;
- if (PageNosave(page) && PageNosaveFree(page)) {
+ if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
/* We have allocated the "original" page frame and we can
* use it directly to store the loaded page.
*/
@@ -1511,14 +1731,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
error = -ENOMEM;
goto Free;
}
- if (!PageNosaveFree(virt_to_page(lp))) {
+ if (!swsusp_page_is_free(virt_to_page(lp))) {
/* The page is "safe", add it to the list */
lp->next = safe_pages_list;
safe_pages_list = lp;
}
/* Mark the page as allocated */
- SetPageNosave(virt_to_page(lp));
- SetPageNosaveFree(virt_to_page(lp));
+ swsusp_set_page_forbidden(virt_to_page(lp));
+ swsusp_set_page_free(virt_to_page(lp));
nr_pages--;
}
/* Free the reserved safe pages so that chain_alloc() can use them */
@@ -1547,7 +1767,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
if (PageHighMem(page))
return get_highmem_page_buffer(page, ca);
- if (PageNosave(page) && PageNosaveFree(page))
+ if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
/* We have allocated the "original" page frame and we can
* use it directly to store the loaded page.
*/
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b18c155cbb6..e83ed9945a8 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -243,7 +243,6 @@ struct swap_map_page {
struct swap_map_handle {
struct swap_map_page *cur;
sector_t cur_swap;
- struct bitmap_page *bitmap;
unsigned int k;
};
@@ -252,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle)
if (handle->cur)
free_page((unsigned long)handle->cur);
handle->cur = NULL;
- if (handle->bitmap)
- free_bitmap(handle->bitmap);
- handle->bitmap = NULL;
}
static int get_swap_writer(struct swap_map_handle *handle)
@@ -262,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
if (!handle->cur)
return -ENOMEM;
- handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0));
- if (!handle->bitmap) {
- release_swap_writer(handle);
- return -ENOMEM;
- }
- handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
+ handle->cur_swap = alloc_swapdev_block(root_swap);
if (!handle->cur_swap) {
release_swap_writer(handle);
return -ENOSPC;
@@ -284,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
if (!handle->cur)
return -EINVAL;
- offset = alloc_swapdev_block(root_swap, handle->bitmap);
+ offset = alloc_swapdev_block(root_swap);
error = write_page(buf, offset, bio_chain);
if (error)
return error;
@@ -293,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
error = wait_on_bio_chain(bio_chain);
if (error)
goto out;
- offset = alloc_swapdev_block(root_swap, handle->bitmap);
+ offset = alloc_swapdev_block(root_swap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
@@ -430,7 +421,8 @@ int swsusp_write(void)
}
}
if (error)
- free_all_swap_pages(root_swap, handle.bitmap);
+ free_all_swap_pages(root_swap);
+
release_swap_writer(&handle);
out:
swsusp_close();
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 175370824f3..5da304c8f1f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -50,6 +50,7 @@
#include <linux/syscalls.h>
#include <linux/highmem.h>
#include <linux/time.h>
+#include <linux/rbtree.h>
#include "power.h"
@@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; }
/**
* The following functions are used for tracing the allocated
* swap pages, so that they can be freed in case of an error.
- *
- * The functions operate on a linked bitmap structure defined
- * in power.h
*/
-void free_bitmap(struct bitmap_page *bitmap)
-{
- struct bitmap_page *bp;
+struct swsusp_extent {
+ struct rb_node node;
+ unsigned long start;
+ unsigned long end;
+};
- while (bitmap) {
- bp = bitmap->next;
- free_page((unsigned long)bitmap);
- bitmap = bp;
- }
-}
+static struct rb_root swsusp_extents = RB_ROOT;
-struct bitmap_page *alloc_bitmap(unsigned int nr_bits)
+static int swsusp_extents_insert(unsigned long swap_offset)
{
- struct bitmap_page *bitmap, *bp;
- unsigned int n;
-
- if (!nr_bits)
- return NULL;
-
- bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
- bp = bitmap;
- for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) {
- bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
- bp = bp->next;
- if (!bp) {
- free_bitmap(bitmap);
- return NULL;
+ struct rb_node **new = &(swsusp_extents.rb_node);
+ struct rb_node *parent = NULL;
+ struct swsusp_extent *ext;
+
+ /* Figure out where to put the new node */
+ while (*new) {
+ ext = container_of(*new, struct swsusp_extent, node);
+ parent = *new;
+ if (swap_offset < ext->start) {
+ /* Try to merge */
+ if (swap_offset == ext->start - 1) {
+ ext->start--;
+ return 0;
+ }
+ new = &((*new)->rb_left);
+ } else if (swap_offset > ext->end) {
+ /* Try to merge */
+ if (swap_offset == ext->end + 1) {
+ ext->end++;
+ return 0;
+ }
+ new = &((*new)->rb_right);
+ } else {
+ /* It already is in the tree */
+ return -EINVAL;
}
}
- return bitmap;
-}
-
-static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
-{
- unsigned int n;
-
- n = BITMAP_PAGE_BITS;
- while (bitmap && n <= bit) {
- n += BITMAP_PAGE_BITS;
- bitmap = bitmap->next;
- }
- if (!bitmap)
- return -EINVAL;
- n -= BITMAP_PAGE_BITS;
- bit -= n;
- n = 0;
- while (bit >= BITS_PER_CHUNK) {
- bit -= BITS_PER_CHUNK;
- n++;
- }
- bitmap->chunks[n] |= (1UL << bit);
+ /* Add the new node and rebalance the tree. */
+ ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
+ if (!ext)
+ return -ENOMEM;
+
+ ext->start = swap_offset;
+ ext->end = swap_offset;
+ rb_link_node(&ext->node, parent, new);
+ rb_insert_color(&ext->node, &swsusp_extents);
return 0;
}
-sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
+/**
+ * alloc_swapdev_block - allocate a swap page and register that it has
+ * been allocated, so that it can be freed in case of an error.
+ */
+
+sector_t alloc_swapdev_block(int swap)
{
unsigned long offset;
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
- if (bitmap_set(bitmap, offset))
+ if (swsusp_extents_insert(offset))
swap_free(swp_entry(swap, offset));
else
return swapdev_block(swap, offset);
@@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
return 0;
}
-void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
+/**
+ * free_all_swap_pages - free swap pages allocated for saving image data.
+ * It also frees the extents used to register which swap entres had been
+ * allocated.
+ */
+
+void free_all_swap_pages(int swap)
{
- unsigned int bit, n;
- unsigned long test;
-
- bit = 0;
- while (bitmap) {
- for (n = 0; n < BITMAP_PAGE_CHUNKS; n++)
- for (test = 1UL; test; test <<= 1) {
- if (bitmap->chunks[n] & test)
- swap_free(swp_entry(swap, bit));
- bit++;
- }
- bitmap = bitmap->next;
+ struct rb_node *node;
+
+ while ((node = swsusp_extents.rb_node)) {
+ struct swsusp_extent *ext;
+ unsigned long offset;
+
+ ext = container_of(node, struct swsusp_extent, node);
+ rb_erase(node, &swsusp_extents);
+ for (offset = ext->start; offset <= ext->end; offset++)
+ swap_free(swp_entry(swap, offset));
+
+ kfree(ext);
}
}
+int swsusp_swap_in_use(void)
+{
+ return (swsusp_extents.rb_node != NULL);
+}
+
/**
* swsusp_show_speed - print the time elapsed between two events represented by
* @start and @stop
@@ -224,7 +233,7 @@ int swsusp_shrink_memory(void)
long size, highmem_size;
highmem_size = count_highmem_pages();
- size = count_data_pages() + PAGES_FOR_IO;
+ size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
tmp = size;
size += highmem_size;
for_each_zone (zone)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 7cf6713b232..040560d9c31 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -33,25 +33,29 @@
static struct snapshot_data {
struct snapshot_handle handle;
int swap;
- struct bitmap_page *bitmap;
int mode;
char frozen;
char ready;
char platform_suspend;
} snapshot_state;
-static atomic_t device_available = ATOMIC_INIT(1);
+atomic_t snapshot_device_available = ATOMIC_INIT(1);
static int snapshot_open(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
- if (!atomic_add_unless(&device_available, -1, 0))
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0))
return -EBUSY;
- if ((filp->f_flags & O_ACCMODE) == O_RDWR)
+ if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
+ atomic_inc(&snapshot_device_available);
return -ENOSYS;
-
+ }
+ if(create_basic_memory_bitmaps()) {
+ atomic_inc(&snapshot_device_available);
+ return -ENOMEM;
+ }
nonseekable_open(inode, filp);
data = &snapshot_state;
filp->private_data = data;
@@ -64,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->swap = -1;
data->mode = O_WRONLY;
}
- data->bitmap = NULL;
data->frozen = 0;
data->ready = 0;
data->platform_suspend = 0;
@@ -77,16 +80,15 @@ static int snapshot_release(struct inode *inode, struct file *filp)
struct snapshot_data *data;
swsusp_free();
+ free_basic_memory_bitmaps();
data = filp->private_data;
- free_all_swap_pages(data->swap, data->bitmap);
- free_bitmap(data->bitmap);
+ free_all_swap_pages(data->swap);
if (data->frozen) {
mutex_lock(&pm_mutex);
thaw_processes();
- enable_nonboot_cpus();
mutex_unlock(&pm_mutex);
}
- atomic_inc(&device_available);
+ atomic_inc(&snapshot_device_available);
return 0;
}
@@ -294,14 +296,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -ENODEV;
break;
}
- if (!data->bitmap) {
- data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0));
- if (!data->bitmap) {
- error = -ENOMEM;
- break;
- }
- }
- offset = alloc_swapdev_block(data->swap, data->bitmap);
+ offset = alloc_swapdev_block(data->swap);
if (offset) {
offset <<= PAGE_SHIFT;
error = put_user(offset, (sector_t __user *)arg);
@@ -315,13 +310,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -ENODEV;
break;
}
- free_all_swap_pages(data->swap, data->bitmap);
- free_bitmap(data->bitmap);
- data->bitmap = NULL;
+ free_all_swap_pages(data->swap);
break;
case SNAPSHOT_SET_SWAP_FILE:
- if (!data->bitmap) {
+ if (!swsusp_swap_in_use()) {
/*
* User space encodes device types as two-byte values,
* so we need to recode them
@@ -420,7 +413,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
break;
case SNAPSHOT_SET_SWAP_AREA:
- if (data->bitmap) {
+ if (swsusp_swap_in_use()) {
error = -EPERM;
} else {
struct resume_swap_area swap_area;
diff --git a/kernel/sched.c b/kernel/sched.c
index 960d7c5fca3..0227f1625a7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5244,6 +5244,11 @@ int __init migration_init(void)
#endif
#ifdef CONFIG_SMP
+
+/* Number of possible processor ids */
+int nr_cpu_ids __read_mostly = NR_CPUS;
+EXPORT_SYMBOL(nr_cpu_ids);
+
#undef SCHED_DOMAIN_DEBUG
#ifdef SCHED_DOMAIN_DEBUG
static void sched_domain_debug(struct sched_domain *sd, int cpu)
@@ -6726,6 +6731,7 @@ int in_sched_functions(unsigned long addr)
void __init sched_init(void)
{
int i, j, k;
+ int highest_cpu = 0;
for_each_possible_cpu(i) {
struct prio_array *array;
@@ -6760,11 +6766,13 @@ void __init sched_init(void)
// delimiter for bitsearch
__set_bit(MAX_PRIO, array->bitmap);
}
+ highest_cpu = i;
}
set_load_weight(&init_task);
#ifdef CONFIG_SMP
+ nr_cpu_ids = highest_cpu + 1;
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
#endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 3670225ecbc..2b4087d545a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2636,9 +2636,5 @@ __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
void __init signals_init(void)
{
- sigqueue_cachep =
- kmem_cache_create("sigqueue",
- sizeof(struct sigqueue),
- __alignof__(struct sigqueue),
- SLAB_PANIC, NULL, NULL);
+ sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 123b165080e..fe1f3ab2047 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -881,7 +881,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
#ifdef CONFIG_SOFTWARE_SUSPEND
case LINUX_REBOOT_CMD_SW_SUSPEND:
{
- int ret = software_suspend();
+ int ret = pm_suspend(PM_SUSPEND_DISK);
unlock_kernel();
return ret;
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index ad7d2392cb0..906cae77158 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -524,9 +524,7 @@ void __init taskstats_init_early(void)
{
unsigned int i;
- taskstats_cache = kmem_cache_create("taskstats_cache",
- sizeof(struct taskstats),
- 0, SLAB_PANIC, NULL, NULL);
+ taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
for_each_possible_cpu(i) {
INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
init_rwsem(&(per_cpu(listener_array, i).sem));