aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c6
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/kexec.c14
-rw-r--r--kernel/module.c1
-rw-r--r--kernel/params.c46
-rw-r--r--kernel/perf_counter.c95
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/power/Makefile5
-rw-r--r--kernel/power/hibernate.c (renamed from kernel/power/disk.c)34
-rw-r--r--kernel/power/hibernate_nvs.c135
-rw-r--r--kernel/power/main.c521
-rw-r--r--kernel/power/power.h25
-rw-r--r--kernel/power/snapshot.c80
-rw-r--r--kernel/power/suspend.c300
-rw-r--r--kernel/power/suspend_test.c187
-rw-r--r--kernel/power/swsusp.c198
-rw-r--r--kernel/sched.c1
18 files changed, 878 insertions, 779 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6e7351739a8..1f6396d7668 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -568,7 +568,7 @@ void audit_trim_trees(void)
if (err)
goto skip_it;
- root_mnt = collect_mounts(path.mnt, path.dentry);
+ root_mnt = collect_mounts(&path);
path_put(&path);
if (!root_mnt)
goto skip_it;
@@ -660,7 +660,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
err = kern_path(tree->pathname, 0, &path);
if (err)
goto Err;
- mnt = collect_mounts(path.mnt, path.dentry);
+ mnt = collect_mounts(&path);
path_put(&path);
if (!mnt) {
err = -ENOMEM;
@@ -720,7 +720,7 @@ int audit_tag_tree(char *old, char *new)
err = kern_path(new, 0, &path);
if (err)
return err;
- tagged = collect_mounts(path.mnt, path.dentry);
+ tagged = collect_mounts(&path);
path_put(&path);
if (!tagged)
return -ENOMEM;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a7267bfd376..3fb789f6df9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,6 +46,7 @@
#include <linux/cgroupstats.h>
#include <linux/hash.h>
#include <linux/namei.h>
+#include <linux/smp_lock.h>
#include <asm/atomic.h>
@@ -900,6 +901,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_sb_opts opts;
+ lock_kernel();
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
@@ -927,6 +929,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
kfree(opts.release_agent);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+ unlock_kernel();
return ret;
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 10457854123..065205bdd92 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -45,7 +45,7 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
static void __init init_irq_default_affinity(void)
{
- alloc_bootmem_cpumask_var(&irq_default_affinity);
+ alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
cpumask_setall(irq_default_affinity);
}
#else
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e4983770913..ae1c35201cc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1448,17 +1448,17 @@ int kernel_kexec(void)
goto Restore_console;
}
suspend_console();
- error = device_suspend(PMSG_FREEZE);
+ error = dpm_suspend_start(PMSG_FREEZE);
if (error)
goto Resume_console;
- /* At this point, device_suspend() has been called,
- * but *not* device_power_down(). We *must*
- * device_power_down() now. Otherwise, drivers for
+ /* At this point, dpm_suspend_start() has been called,
+ * but *not* dpm_suspend_noirq(). We *must* call
+ * dpm_suspend_noirq() now. Otherwise, drivers for
* some devices (e.g. interrupt controllers) become
* desynchronized with the actual state of the
* hardware at resume time, and evil weirdness ensues.
*/
- error = device_power_down(PMSG_FREEZE);
+ error = dpm_suspend_noirq(PMSG_FREEZE);
if (error)
goto Resume_devices;
error = disable_nonboot_cpus();
@@ -1486,9 +1486,9 @@ int kernel_kexec(void)
local_irq_enable();
Enable_cpus:
enable_nonboot_cpus();
- device_power_up(PMSG_RESTORE);
+ dpm_resume_noirq(PMSG_RESTORE);
Resume_devices:
- device_resume(PMSG_RESTORE);
+ dpm_resume_end(PMSG_RESTORE);
Resume_console:
resume_console();
thaw_processes();
diff --git a/kernel/module.c b/kernel/module.c
index 35f7de00bf0..e4ab36ce767 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2455,6 +2455,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
+ trim_init_extable(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
diff --git a/kernel/params.c b/kernel/params.c
index de273ec85bd..7f6912ced2b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,9 +24,6 @@
#include <linux/err.h>
#include <linux/slab.h>
-/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */
-#define KPARAM_KMALLOCED 0x80000000
-
#if 0
#define DEBUGP printk
#else
@@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp)
return -ENOSPC;
}
- if (kp->perm & KPARAM_KMALLOCED)
+ if (kp->flags & KPARAM_KMALLOCED)
kfree(*(char **)kp->arg);
/* This is a hack. We can't need to strdup in early boot, and we
* don't need to; this mangled commandline is preserved. */
if (slab_is_available()) {
- kp->perm |= KPARAM_KMALLOCED;
+ kp->flags |= KPARAM_KMALLOCED;
*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
if (!kp->arg)
return -ENOMEM;
@@ -241,44 +238,63 @@ int param_get_charp(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "%s", *((char **)kp->arg));
}
+/* Actually could be a bool or an int, for historical reasons. */
int param_set_bool(const char *val, struct kernel_param *kp)
{
+ bool v;
+
/* No equals means "set"... */
if (!val) val = "1";
/* One of =[yYnN01] */
switch (val[0]) {
case 'y': case 'Y': case '1':
- *(int *)kp->arg = 1;
- return 0;
+ v = true;
+ break;
case 'n': case 'N': case '0':
- *(int *)kp->arg = 0;
- return 0;
+ v = false;
+ break;
+ default:
+ return -EINVAL;
}
- return -EINVAL;
+
+ if (kp->flags & KPARAM_ISBOOL)
+ *(bool *)kp->arg = v;
+ else
+ *(int *)kp->arg = v;
+ return 0;
}
int param_get_bool(char *buffer, struct kernel_param *kp)
{
+ bool val;
+ if (kp->flags & KPARAM_ISBOOL)
+ val = *(bool *)kp->arg;
+ else
+ val = *(int *)kp->arg;
+
/* Y and N chosen as being relatively non-coder friendly */
- return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
+ return sprintf(buffer, "%c", val ? 'Y' : 'N');
}
+/* This one must be bool. */
int param_set_invbool(const char *val, struct kernel_param *kp)
{
- int boolval, ret;
+ int ret;
+ bool boolval;
struct kernel_param dummy;
dummy.arg = &boolval;
+ dummy.flags = KPARAM_ISBOOL;
ret = param_set_bool(val, &dummy);
if (ret == 0)
- *(int *)kp->arg = !boolval;
+ *(bool *)kp->arg = !boolval;
return ret;
}
int param_get_invbool(char *buffer, struct kernel_param *kp)
{
- return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
+ return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
}
/* We break the rule and mangle the string. */
@@ -591,7 +607,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
unsigned int i;
for (i = 0; i < num; i++)
- if (params[i].perm & KPARAM_KMALLOCED)
+ if (params[i].flags & KPARAM_KMALLOCED)
kfree(*(char **)params[i].arg);
}
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ef5d8a5b245..29b685f551a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
goto done;
- if (attr->type == PERF_TYPE_RAW) {
- pmu = hw_perf_counter_init(counter);
- goto done;
- }
-
switch (attr->type) {
+ case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
pmu = hw_perf_counter_init(counter);
@@ -3588,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
case PERF_TYPE_TRACEPOINT:
pmu = tp_perf_counter_init(counter);
break;
+
+ default:
+ break;
}
done:
err = 0;
@@ -3614,6 +3613,85 @@ done:
return counter;
}
+static int perf_copy_attr(struct perf_counter_attr __user *uattr,
+ struct perf_counter_attr *attr)
+{
+ int ret;
+ u32 size;
+
+ if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+ return -EFAULT;
+
+ /*
+ * zero the full structure, so that a short copy will be nice.
+ */
+ memset(attr, 0, sizeof(*attr));
+
+ ret = get_user(size, &uattr->size);
+ if (ret)
+ return ret;
+
+ if (size > PAGE_SIZE) /* silly large */
+ goto err_size;
+
+ if (!size) /* abi compat */
+ size = PERF_ATTR_SIZE_VER0;
+
+ if (size < PERF_ATTR_SIZE_VER0)
+ goto err_size;
+
+ /*
+ * If we're handed a bigger struct than we know of,
+ * ensure all the unknown bits are 0.
+ */
+ if (size > sizeof(*attr)) {
+ unsigned long val;
+ unsigned long __user *addr;
+ unsigned long __user *end;
+
+ addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
+ sizeof(unsigned long));
+ end = PTR_ALIGN((void __user *)uattr + size,
+ sizeof(unsigned long));
+
+ for (; addr < end; addr += sizeof(unsigned long)) {
+ ret = get_user(val, addr);
+ if (ret)
+ return ret;
+ if (val)
+ goto err_size;
+ }
+ }
+
+ ret = copy_from_user(attr, uattr, size);
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * If the type exists, the corresponding creation will verify
+ * the attr->config.
+ */
+ if (attr->type >= PERF_TYPE_MAX)
+ return -EINVAL;
+
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+ return -EINVAL;
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+ return -EINVAL;
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+ return -EINVAL;
+
+out:
+ return ret;
+
+err_size:
+ put_user(sizeof(*attr), &uattr->size);
+ ret = -E2BIG;
+ goto out;
+}
+
/**
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
*
@@ -3623,7 +3701,7 @@ done:
* @group_fd: group leader counter fd
*/
SYSCALL_DEFINE5(perf_counter_open,
- const struct perf_counter_attr __user *, attr_uptr,
+ struct perf_counter_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
struct perf_counter *counter, *group_leader;
@@ -3639,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
if (flags)
return -EINVAL;
- if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
- return -EFAULT;
+ ret = perf_copy_attr(attr_uptr, &attr);
+ if (ret)
+ return ret;
if (!attr.exclude_kernel) {
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 23bd4daeb96..72067cbdb37 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -116,9 +116,13 @@ config SUSPEND_FREEZER
Turning OFF this setting is NOT recommended! If in doubt, say Y.
+config HIBERNATION_NVS
+ bool
+
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+ select HIBERNATION_NVS if HAS_IOMEM
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 720ea4f781b..c3b81c30e5d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -6,6 +6,9 @@ endif
obj-$(CONFIG_PM) += main.o
obj-$(CONFIG_PM_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
+obj-$(CONFIG_SUSPEND) += suspend.o
+obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
+obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/hibernate.c
index 5cb080e7eeb..81d2e746489 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/hibernate.c
@@ -1,12 +1,12 @@
/*
- * kernel/power/disk.c - Suspend-to-disk support.
+ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
* Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
*
* This file is released under the GPLv2.
- *
*/
#include <linux/suspend.h>
@@ -215,13 +215,13 @@ static int create_image(int platform_mode)
if (error)
return error;
- /* At this point, device_suspend() has been called, but *not*
- * device_power_down(). We *must* call device_power_down() now.
+ /* At this point, dpm_suspend_start() has been called, but *not*
+ * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
* become desynchronized with the actual state of the hardware
* at resume time, and evil weirdness ensues.
*/
- error = device_power_down(PMSG_FREEZE);
+ error = dpm_suspend_noirq(PMSG_FREEZE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting hibernation\n");
@@ -262,7 +262,7 @@ static int create_image(int platform_mode)
Power_up:
sysdev_resume();
- /* NOTE: device_power_up() is just a resume() for devices
+ /* NOTE: dpm_resume_noirq() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
@@ -275,7 +275,7 @@ static int create_image(int platform_mode)
Platform_finish:
platform_finish(platform_mode);
- device_power_up(in_suspend ?
+ dpm_resume_noirq(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
return error;
@@ -304,7 +304,7 @@ int hibernation_snapshot(int platform_mode)
goto Close;
suspend_console();
- error = device_suspend(PMSG_FREEZE);
+ error = dpm_suspend_start(PMSG_FREEZE);
if (error)
goto Recover_platform;
@@ -315,7 +315,7 @@ int hibernation_snapshot(int platform_mode)
/* Control returns here after successful restore */
Resume_devices:
- device_resume(in_suspend ?
+ dpm_resume_end(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
resume_console();
Close:
@@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode)
{
int error;
- error = device_power_down(PMSG_QUIESCE);
+ error = dpm_suspend_noirq(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
@@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode)
Cleanup:
platform_restore_cleanup(platform_mode);
- device_power_up(PMSG_RECOVER);
+ dpm_resume_noirq(PMSG_RECOVER);
return error;
}
@@ -414,10 +414,10 @@ int hibernation_restore(int platform_mode)
pm_prepare_console();
suspend_console();
- error = device_suspend(PMSG_QUIESCE);
+ error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
error = resume_target_kernel(platform_mode);
- device_resume(PMSG_RECOVER);
+ dpm_resume_end(PMSG_RECOVER);
}
resume_console();
pm_restore_console();
@@ -447,14 +447,14 @@ int hibernation_platform_enter(void)
entering_platform_hibernation = true;
suspend_console();
- error = device_suspend(PMSG_HIBERNATE);
+ error = dpm_suspend_start(PMSG_HIBERNATE);
if (error) {
if (hibernation_ops->recover)
hibernation_ops->recover();
goto Resume_devices;
}
- error = device_power_down(PMSG_HIBERNATE);
+ error = dpm_suspend_noirq(PMSG_HIBERNATE);
if (error)
goto Resume_devices;
@@ -479,11 +479,11 @@ int hibernation_platform_enter(void)
Platofrm_finish:
hibernation_ops->finish();
- device_power_up(PMSG_RESTORE);
+ dpm_suspend_noirq(PMSG_RESTORE);
Resume_devices:
entering_platform_hibernation = false;
- device_resume(PMSG_RESTORE);
+ dpm_resume_end(PMSG_RESTORE);
resume_console();
Close:
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
new file mode 100644
index 00000000000..39ac698ef83
--- /dev/null
+++ b/kernel/power/hibernate_nvs.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
+ *
+ * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume. The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+ unsigned long phys_start;
+ unsigned int size;
+ void *kaddr;
+ void *data;
+ struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ * hibernate_nvs_register - register platform NVS memory region to save
+ * @start - physical address of the region
+ * @size - size of the region
+ *
+ * The NVS region need not be page-aligned (both ends) and we arrange
+ * things so that the data from page-aligned addresses in this region will
+ * be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+ struct nvs_page *entry, *next;
+
+ while (size > 0) {
+ unsigned int nr_bytes;
+
+ entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+ if (!entry)
+ goto Error;
+
+ list_add_tail(&entry->node, &nvs_list);
+ entry->phys_start = start;
+ nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+ entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+ start += entry->size;
+ size -= entry->size;
+ }
+ return 0;
+
+ Error:
+ list_for_each_entry_safe(entry, next, &nvs_list, node) {
+ list_del(&entry->node);
+ kfree(entry);
+ }
+ return -ENOMEM;
+}
+
+/**
+ * hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+ struct nvs_page *entry;
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data) {
+ free_page((unsigned long)entry->data);
+ entry->data = NULL;
+ if (entry->kaddr) {
+ iounmap(entry->kaddr);
+ entry->kaddr = NULL;
+ }
+ }
+}
+
+/**
+ * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+ struct nvs_page *entry;
+
+ list_for_each_entry(entry, &nvs_list, node) {
+ entry->data = (void *)__get_free_page(GFP_KERNEL);
+ if (!entry->data) {
+ hibernate_nvs_free();
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/**
+ * hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+ struct nvs_page *entry;
+
+ printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data) {
+ entry->kaddr = ioremap(entry->phys_start, entry->size);
+ memcpy(entry->data, entry->kaddr, entry->size);
+ }
+}
+
+/**
+ * hibernate_nvs_restore - restore NVS memory regions
+ *
+ * This function is going to be called with interrupts disabled, so it
+ * cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+ struct nvs_page *entry;
+
+ printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data)
+ memcpy(entry->kaddr, entry->data, entry->size);
+}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 868028280d1..f710e36930c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,20 +8,9 @@
*
*/
-#include <linux/module.h>
-#include <linux/suspend.h>
#include <linux/kobject.h>
#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/kmod.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/cpu.h>
#include <linux/resume-trace.h>
-#include <linux/freezer.h>
-#include <linux/vmstat.h>
-#include <linux/syscalls.h>
#include "power.h"
@@ -119,373 +108,6 @@ power_attr(pm_test);
#endif /* CONFIG_PM_SLEEP */
-#ifdef CONFIG_SUSPEND
-
-static int suspend_test(int level)
-{
-#ifdef CONFIG_PM_DEBUG
- if (pm_test_level == level) {
- printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
- mdelay(5000);
- return 1;
- }
-#endif /* !CONFIG_PM_DEBUG */
- return 0;
-}
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-/*
- * We test the system suspend code by setting an RTC wakealarm a short
- * time in the future, then suspending. Suspending the devices won't
- * normally take long ... some systems only need a few milliseconds.
- *
- * The time it takes is system-specific though, so when we test this
- * during system bootup we allow a LOT of time.
- */
-#define TEST_SUSPEND_SECONDS 5
-
-static unsigned long suspend_test_start_time;
-
-static void suspend_test_start(void)
-{
- /* FIXME Use better timebase than "jiffies", ideally a clocksource.
- * What we want is a hardware counter that will work correctly even
- * during the irqs-are-off stages of the suspend/resume cycle...
- */
- suspend_test_start_time = jiffies;
-}
-
-static void suspend_test_finish(const char *label)
-{
- long nj = jiffies - suspend_test_start_time;
- unsigned msec;
-
- msec = jiffies_to_msecs(abs(nj));
- pr_info("PM: %s took %d.%03d seconds\n", label,
- msec / 1000, msec % 1000);
-
- /* Warning on suspend means the RTC alarm period needs to be
- * larger -- the system was sooo slooowwww to suspend that the
- * alarm (should have) fired before the system went to sleep!
- *
- * Warning on either suspend or resume also means the system
- * has some performance issues. The stack dump of a WARN_ON
- * is more likely to get the right attention than a printk...
- */
- WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
-}
-
-#else
-
-static void suspend_test_start(void)
-{
-}
-
-static void suspend_test_finish(const char *label)
-{
-}
-
-#endif
-
-/* This is just an arbitrary number */
-#define FREE_PAGE_NUMBER (100)
-
-static struct platform_suspend_ops *suspend_ops;
-
-/**
- * suspend_set_ops - Set the global suspend method table.
- * @ops: Pointer to ops structure.
- */
-
-void suspend_set_ops(struct platform_suspend_ops *ops)
-{
- mutex_lock(&pm_mutex);
- suspend_ops = ops;
- mutex_unlock(&pm_mutex);
-}
-
-/**
- * suspend_valid_only_mem - generic memory-only valid callback
- *
- * Platform drivers that implement mem suspend only and only need
- * to check for that in their .valid callback can use this instead
- * of rolling their own .valid callback.
- */
-int suspend_valid_only_mem(suspend_state_t state)
-{
- return state == PM_SUSPEND_MEM;
-}
-
-/**
- * suspend_prepare - Do prep work before entering low-power state.
- *
- * This is common code that is called for each state that we're entering.
- * Run suspend notifiers, allocate a console and stop all processes.
- */
-static int suspend_prepare(void)
-{
- int error;
- unsigned int free_pages;
-
- if (!suspend_ops || !suspend_ops->enter)
- return -EPERM;
-
- pm_prepare_console();
-
- error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
- if (error)
- goto Finish;
-
- error = usermodehelper_disable();
- if (error)
- goto Finish;
-
- if (suspend_freeze_processes()) {
- error = -EAGAIN;
- goto Thaw;
- }
-
- free_pages = global_page_state(NR_FREE_PAGES);
- if (free_pages < FREE_PAGE_NUMBER) {
- pr_debug("PM: free some memory\n");
- shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
- if (nr_free_pages() < FREE_PAGE_NUMBER) {
- error = -ENOMEM;
- printk(KERN_ERR "PM: No enough memory\n");
- }
- }
- if (!error)
- return 0;
-
- Thaw:
- suspend_thaw_processes();
- usermodehelper_enable();
- Finish:
- pm_notifier_call_chain(PM_POST_SUSPEND);
- pm_restore_console();
- return error;
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
-{
- local_irq_disable();
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
-{
- local_irq_enable();
-}
-
-/**
- * suspend_enter - enter the desired system sleep state.
- * @state: state to enter
- *
- * This function should be called after devices have been suspended.
- */
-static int suspend_enter(suspend_state_t state)
-{
- int error;
-
- if (suspend_ops->prepare) {
- error = suspend_ops->prepare();
- if (error)
- return error;
- }
-
- error = device_power_down(PMSG_SUSPEND);
- if (error) {
- printk(KERN_ERR "PM: Some devices failed to power down\n");
- goto Platfrom_finish;
- }
-
- if (suspend_ops->prepare_late) {
- error = suspend_ops->prepare_late();
- if (error)
- goto Power_up_devices;
- }
-
- if (suspend_test(TEST_PLATFORM))
- goto Platform_wake;
-
- error = disable_nonboot_cpus();
- if (error || suspend_test(TEST_CPUS))
- goto Enable_cpus;
-
- arch_suspend_disable_irqs();
- BUG_ON(!irqs_disabled());
-
- error = sysdev_suspend(PMSG_SUSPEND);
- if (!error) {
- if (!suspend_test(TEST_CORE))
- error = suspend_ops->enter(state);
- sysdev_resume();
- }
-
- arch_suspend_enable_irqs();
- BUG_ON(irqs_disabled());
-
- Enable_cpus:
- enable_nonboot_cpus();
-
- Platform_wake:
- if (suspend_ops->wake)
- suspend_ops->wake();
-
- Power_up_devices:
- device_power_up(PMSG_RESUME);
-
- Platfrom_finish:
- if (suspend_ops->finish)
- suspend_ops->finish();
-
- return error;
-}
-
-/**
- * suspend_devices_and_enter - suspend devices and enter the desired system
- * sleep state.
- * @state: state to enter
- */
-int suspend_devices_and_enter(suspend_state_t state)
-{
- int error;
-
- if (!suspend_ops)
- return -ENOSYS;
-
- if (suspend_ops->begin) {
- error = suspend_ops->begin(state);
- if (error)
- goto Close;
- }
- suspend_console();
- suspend_test_start();
- error = device_suspend(PMSG_SUSPEND);
- if (error) {
- printk(KERN_ERR "PM: Some devices failed to suspend\n");
- goto Recover_platform;
- }
- suspend_test_finish("suspend devices");
- if (suspend_test(TEST_DEVICES))
- goto Recover_platform;
-
- suspend_enter(state);
-
- Resume_devices:
- suspend_test_start();
- device_resume(PMSG_RESUME);
- suspend_test_finish("resume devices");
- resume_console();
- Close:
- if (suspend_ops->end)
- suspend_ops->end();
- return error;
-
- Recover_platform:
- if (suspend_ops->recover)
- suspend_ops->recover();
- goto Resume_devices;
-}
-
-/**
- * suspend_finish - Do final work before exiting suspend sequence.
- *
- * Call platform code to clean up, restart processes, and free the
- * console that we've allocated. This is not called for suspend-to-disk.
- */
-static void suspend_finish(void)
-{
- suspend_thaw_processes();
- usermodehelper_enable();
- pm_notifier_call_chain(PM_POST_SUSPEND);
- pm_restore_console();
-}
-
-
-
-
-static const char * const pm_states[PM_SUSPEND_MAX] = {
- [PM_SUSPEND_STANDBY] = "standby",
- [PM_SUSPEND_MEM] = "mem",
-};
-
-static inline int valid_state(suspend_state_t state)
-{
- /* All states need lowlevel support and need to be valid
- * to the lowlevel implementation, no valid callback
- * implies that none are valid. */
- if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state))
- return 0;
- return 1;
-}
-
-
-/**
- * enter_state - Do common work of entering low-power state.
- * @state: pm_state structure for state we're entering.
- *
- * Make sure we're the only ones trying to enter a sleep state. Fail
- * if someone has beat us to it, since we don't want anything weird to
- * happen when we wake up.
- * Then, do the setup for suspend, enter the state, and cleaup (after
- * we've woken up).
- */
-static int enter_state(suspend_state_t state)
-{
- int error;
-
- if (!valid_state(state))
- return -ENODEV;
-
- if (!mutex_trylock(&pm_mutex))
- return -EBUSY;
-
- printk(KERN_INFO "PM: Syncing filesystems ... ");
- sys_sync();
- printk("done.\n");
-
- pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
- error = suspend_prepare();
- if (error)
- goto Unlock;
-
- if (suspend_test(TEST_FREEZER))
- goto Finish;
-
- pr_debug("PM: Entering %s sleep\n", pm_states[state]);
- error = suspend_devices_and_enter(state);
-
- Finish:
- pr_debug("PM: Finishing wakeup.\n");
- suspend_finish();
- Unlock:
- mutex_unlock(&pm_mutex);
- return error;
-}
-
-
-/**
- * pm_suspend - Externally visible function for suspending system.
- * @state: Enumerated value of state to enter.
- *
- * Determine whether or not value is within range, get state
- * structure, and enter (above).
- */
-
-int pm_suspend(suspend_state_t state)
-{
- if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
- return enter_state(state);
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(pm_suspend);
-
-#endif /* CONFIG_SUSPEND */
-
struct kobject *power_kobj;
/**
@@ -498,7 +120,6 @@ struct kobject *power_kobj;
* store() accepts one of those strings, translates it into the
* proper enumerated value, and initiates a suspend transition.
*/
-
static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
@@ -596,7 +217,6 @@ static struct attribute_group attr_group = {
.attrs = g,
};
-
static int __init pm_init(void)
{
power_kobj = kobject_create_and_add("power", NULL);
@@ -606,144 +226,3 @@ static int __init pm_init(void)
}
core_initcall(pm_init);
-
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-#include <linux/rtc.h>
-
-/*
- * To test system suspend, we need a hands-off mechanism to resume the
- * system. RTCs wake alarms are a common self-contained mechanism.
- */
-
-static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
-{
- static char err_readtime[] __initdata =
- KERN_ERR "PM: can't read %s time, err %d\n";
- static char err_wakealarm [] __initdata =
- KERN_ERR "PM: can't set %s wakealarm, err %d\n";
- static char err_suspend[] __initdata =
- KERN_ERR "PM: suspend test failed, error %d\n";
- static char info_test[] __initdata =
- KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
-
- unsigned long now;
- struct rtc_wkalrm alm;
- int status;
-
- /* this may fail if the RTC hasn't been initialized */
- status = rtc_read_time(rtc, &alm.time);
- if (status < 0) {
- printk(err_readtime, dev_name(&rtc->dev), status);
- return;
- }
- rtc_tm_to_time(&alm.time, &now);
-
- memset(&alm, 0, sizeof alm);
- rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
- alm.enabled = true;
-
- status = rtc_set_alarm(rtc, &alm);
- if (status < 0) {
- printk(err_wakealarm, dev_name(&rtc->dev), status);
- return;
- }
-
- if (state == PM_SUSPEND_MEM) {
- printk(info_test, pm_states[state]);
- status = pm_suspend(state);
- if (status == -ENODEV)
- state = PM_SUSPEND_STANDBY;
- }
- if (state == PM_SUSPEND_STANDBY) {
- printk(info_test, pm_states[state]);
- status = pm_suspend(state);
- }
- if (status < 0)
- printk(err_suspend, status);
-
- /* Some platforms can't detect that the alarm triggered the
- * wakeup, or (accordingly) disable it after it afterwards.
- * It's supposed to give oneshot behavior; cope.
- */
- alm.enabled = false;
- rtc_set_alarm(rtc, &alm);
-}
-
-static int __init has_wakealarm(struct device *dev, void *name_ptr)
-{
- struct rtc_device *candidate = to_rtc_device(dev);
-
- if (!candidate->ops->set_alarm)
- return 0;
- if (!device_may_wakeup(candidate->dev.parent))
- return 0;
-
- *(const char **)name_ptr = dev_name(dev);
- return 1;
-}
-
-/*
- * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
- * at startup time. They're normally disabled, for faster boot and because
- * we can't know which states really work on this particular system.
- */
-static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
-
-static char warn_bad_state[] __initdata =
- KERN_WARNING "PM: can't test '%s' suspend state\n";
-
-static int __init setup_test_suspend(char *value)
-{
- unsigned i;
-
- /* "=mem" ==> "mem" */
- value++;
- for (i = 0; i < PM_SUSPEND_MAX; i++) {
- if (!pm_states[i])
- continue;
- if (strcmp(pm_states[i], value) != 0)
- continue;
- test_state = (__force suspend_state_t) i;
- return 0;
- }
- printk(warn_bad_state, value);
- return 0;
-}
-__setup("test_suspend", setup_test_suspend);
-
-static int __init test_suspend(void)
-{
- static char warn_no_rtc[] __initdata =
- KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
-
- char *pony = NULL;
- struct rtc_device *rtc = NULL;
-
- /* PM is initialized by now; is that state testable? */
- if (test_state == PM_SUSPEND_ON)
- goto done;
- if (!valid_state(test_state)) {
- printk(warn_bad_state, pm_states[test_state]);
- goto done;
- }
-
- /* RTCs have initialized by now too ... can we use one? */
- class_find_device(rtc_class, NULL, &pony, has_wakealarm);
- if (pony)
- rtc = rtc_class_open(pony);
- if (!rtc) {
- printk(warn_no_rtc);
- goto done;
- }
-
- /* go for it */
- test_wakealarm(rtc, test_state);
- rtc_class_close(rtc);
-done:
- return 0;
-}
-late_initcall(test_suspend);
-
-#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46b5ec7a3af..26d5a26f82e 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -45,7 +45,7 @@ static inline char *check_image_kernel(struct swsusp_info *info)
*/
#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
extern int hibernation_snapshot(int platform_mode);
extern int hibernation_restore(int platform_mode);
extern int hibernation_platform_enter(void);
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void);
extern int create_basic_memory_bitmaps(void);
extern void free_basic_memory_bitmaps(void);
-extern unsigned int count_data_pages(void);
+extern int swsusp_shrink_memory(void);
/**
* Auxiliary structure used for reading the snapshot image data and
@@ -147,9 +147,8 @@ extern int swsusp_swap_in_use(void);
*/
#define SF_PLATFORM_MODE 1
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
extern int swsusp_check(void);
-extern int swsusp_shrink_memory(void);
extern void swsusp_free(void);
extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
@@ -161,22 +160,36 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *,
unsigned int, char *);
#ifdef CONFIG_SUSPEND
-/* kernel/power/main.c */
+/* kernel/power/suspend.c */
+extern const char *const pm_states[];
+
+extern bool valid_state(suspend_state_t state);
extern int suspend_devices_and_enter(suspend_state_t state);
+extern int enter_state(suspend_state_t state);
#else /* !CONFIG_SUSPEND */
static inline int suspend_devices_and_enter(suspend_state_t state)
{
return -ENOSYS;
}
+static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
+static inline bool valid_state(suspend_state_t state) { return false; }
#endif /* !CONFIG_SUSPEND */
+#ifdef CONFIG_PM_TEST_SUSPEND
+/* kernel/power/suspend_test.c */
+extern void suspend_test_start(void);
+extern void suspend_test_finish(const char *label);
+#else /* !CONFIG_PM_TEST_SUSPEND */
+static inline void suspend_test_start(void) {}
+static inline void suspend_test_finish(const char *label) {}
+#endif /* !CONFIG_PM_TEST_SUSPEND */
+
#ifdef CONFIG_PM_SLEEP
/* kernel/power/main.c */
extern int pm_notifier_call_chain(unsigned long val);
#endif
#ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void);
int restore_highmem(void);
#else
static inline unsigned int count_highmem_pages(void) { return 0; }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 33e2e4a819f..523a451b45d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,6 +39,14 @@ static int swsusp_page_is_free(struct page *);
static void swsusp_set_page_forbidden(struct page *);
static void swsusp_unset_page_forbidden(struct page *);
+/*
+ * Preferred image size in bytes (tunable via /sys/power/image_size).
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N bytes, but if that is impossible, it will
+ * try to create the smallest image possible.
+ */
+unsigned long image_size = 500 * 1024 * 1024;
+
/* List of PBEs needed for restoring the pages that were allocated before
* the suspend and included in the suspend image, but have also been
* allocated by the "resume" kernel, so their contents cannot be written
@@ -840,7 +848,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
* pages.
*/
-unsigned int count_highmem_pages(void)
+static unsigned int count_highmem_pages(void)
{
struct zone *zone;
unsigned int n = 0;
@@ -902,7 +910,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
* pages.
*/
-unsigned int count_data_pages(void)
+static unsigned int count_data_pages(void)
{
struct zone *zone;
unsigned long pfn, max_zone_pfn;
@@ -1058,6 +1066,74 @@ void swsusp_free(void)
buffer = NULL;
}
+/**
+ * swsusp_shrink_memory - Try to free as much memory as needed
+ *
+ * ... but do not OOM-kill anyone
+ *
+ * Notice: all userland should be stopped before it is called, or
+ * livelock is possible.
+ */
+
+#define SHRINK_BITE 10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+ if (tmp > SHRINK_BITE)
+ tmp = SHRINK_BITE;
+ return shrink_all_memory(tmp);
+}
+
+int swsusp_shrink_memory(void)
+{
+ long tmp;
+ struct zone *zone;
+ unsigned long pages = 0;
+ unsigned int i = 0;
+ char *p = "-\\|/";
+ struct timeval start, stop;
+
+ printk(KERN_INFO "PM: Shrinking memory... ");
+ do_gettimeofday(&start);
+ do {
+ long size, highmem_size;
+
+ highmem_size = count_highmem_pages();
+ size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
+ tmp = size;
+ size += highmem_size;
+ for_each_populated_zone(zone) {
+ tmp += snapshot_additional_pages(zone);
+ if (is_highmem(zone)) {
+ highmem_size -=
+ zone_page_state(zone, NR_FREE_PAGES);
+ } else {
+ tmp -= zone_page_state(zone, NR_FREE_PAGES);
+ tmp += zone->lowmem_reserve[ZONE_NORMAL];
+ }
+ }
+
+ if (highmem_size < 0)
+ highmem_size = 0;
+
+ tmp += highmem_size;
+ if (tmp > 0) {
+ tmp = __shrink_memory(tmp);
+ if (!tmp)
+ return -ENOMEM;
+ pages += tmp;
+ } else if (size > image_size / PAGE_SIZE) {
+ tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
+ pages += tmp;
+ }
+ printk("\b%c", p[i++%4]);
+ } while (tmp > 0);
+ do_gettimeofday(&stop);
+ printk("\bdone (%lu pages freed)\n", pages);
+ swsusp_show_speed(&start, &stop, pages, "Freed");
+
+ return 0;
+}
+
#ifdef CONFIG_HIGHMEM
/**
* count_pages_for_highmem - compute the number of non-highmem pages
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
new file mode 100644
index 00000000000..6f10dfc2d3e
--- /dev/null
+++ b/kernel/power/suspend.c
@@ -0,0 +1,300 @@
+/*
+ * kernel/power/suspend.c - Suspend to RAM and standby functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+
+#include "power.h"
+
+const char *const pm_states[PM_SUSPEND_MAX] = {
+ [PM_SUSPEND_STANDBY] = "standby",
+ [PM_SUSPEND_MEM] = "mem",
+};
+
+static struct platform_suspend_ops *suspend_ops;
+
+/**
+ * suspend_set_ops - Set the global suspend method table.
+ * @ops: Pointer to ops structure.
+ */
+void suspend_set_ops(struct platform_suspend_ops *ops)
+{
+ mutex_lock(&pm_mutex);
+ suspend_ops = ops;
+ mutex_unlock(&pm_mutex);
+}
+
+bool valid_state(suspend_state_t state)
+{
+ /*
+ * All states need lowlevel support and need to be valid to the lowlevel
+ * implementation, no valid callback implies that none are valid.
+ */
+ return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
+/**
+ * suspend_valid_only_mem - generic memory-only valid callback
+ *
+ * Platform drivers that implement mem suspend only and only need
+ * to check for that in their .valid callback can use this instead
+ * of rolling their own .valid callback.
+ */
+int suspend_valid_only_mem(suspend_state_t state)
+{
+ return state == PM_SUSPEND_MEM;
+}
+
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+ if (pm_test_level == level) {
+ printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ return 1;
+ }
+#endif /* !CONFIG_PM_DEBUG */
+ return 0;
+}
+
+/**
+ * suspend_prepare - Do prep work before entering low-power state.
+ *
+ * This is common code that is called for each state that we're entering.
+ * Run suspend notifiers, allocate a console and stop all processes.
+ */
+static int suspend_prepare(void)
+{
+ int error;
+
+ if (!suspend_ops || !suspend_ops->enter)
+ return -EPERM;
+
+ pm_prepare_console();
+
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (error)
+ goto Finish;
+
+ error = usermodehelper_disable();
+ if (error)
+ goto Finish;
+
+ error = suspend_freeze_processes();
+ if (!error)
+ return 0;
+
+ suspend_thaw_processes();
+ usermodehelper_enable();
+ Finish:
+ pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
+ return error;
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
+{
+ local_irq_disable();
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
+{
+ local_irq_enable();
+}
+
+/**
+ * suspend_enter - enter the desired system sleep state.
+ * @state: state to enter
+ *
+ * This function should be called after devices have been suspended.
+ */
+static int suspend_enter(suspend_state_t state)
+{
+ int error;
+
+ if (suspend_ops->prepare) {
+ error = suspend_ops->prepare();
+ if (error)
+ return error;
+ }
+
+ error = dpm_suspend_noirq(PMSG_SUSPEND);
+ if (error) {
+ printk(KERN_ERR "PM: Some devices failed to power down\n");
+ goto Platfrom_finish;
+ }
+
+ if (suspend_ops->prepare_late) {
+ error = suspend_ops->prepare_late();
+ if (error)
+ goto Power_up_devices;
+ }
+
+ if (suspend_test(TEST_PLATFORM))
+ goto Platform_wake;
+
+ error = disable_nonboot_cpus();
+ if (error || suspend_test(TEST_CPUS))
+ goto Enable_cpus;
+
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
+ error = sysdev_suspend(PMSG_SUSPEND);
+ if (!error) {
+ if (!suspend_test(TEST_CORE))
+ error = suspend_ops->enter(state);
+ sysdev_resume();
+ }
+
+ arch_suspend_enable_irqs();
+ BUG_ON(irqs_disabled());
+
+ Enable_cpus:
+ enable_nonboot_cpus();
+
+ Platform_wake:
+ if (suspend_ops->wake)
+ suspend_ops->wake();
+
+ Power_up_devices:
+ dpm_resume_noirq(PMSG_RESUME);
+
+ Platfrom_finish:
+ if (suspend_ops->finish)
+ suspend_ops->finish();
+
+ return error;
+}
+
+/**
+ * suspend_devices_and_enter - suspend devices and enter the desired system
+ * sleep state.
+ * @state: state to enter
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+ int error;
+
+ if (!suspend_ops)
+ return -ENOSYS;
+
+ if (suspend_ops->begin) {
+ error = suspend_ops->begin(state);
+ if (error)
+ goto Close;
+ }
+ suspend_console();
+ suspend_test_start();
+ error = dpm_suspend_start(PMSG_SUSPEND);
+ if (error) {
+ printk(KERN_ERR "PM: Some devices failed to suspend\n");
+ goto Recover_platform;
+ }
+ suspend_test_finish("suspend devices");
+ if (suspend_test(TEST_DEVICES))
+ goto Recover_platform;
+
+ suspend_enter(state);
+
+ Resume_devices:
+ suspend_test_start();
+ dpm_resume_end(PMSG_RESUME);
+ suspend_test_finish("resume devices");
+ resume_console();
+ Close:
+ if (suspend_ops->end)
+ suspend_ops->end();
+ return error;
+
+ Recover_platform:
+ if (suspend_ops->recover)
+ suspend_ops->recover();
+ goto Resume_devices;
+}
+
+/**
+ * suspend_finish - Do final work before exiting suspend sequence.
+ *
+ * Call platform code to clean up, restart processes, and free the
+ * console that we've allocated. This is not called for suspend-to-disk.
+ */
+static void suspend_finish(void)
+{
+ suspend_thaw_processes();
+ usermodehelper_enable();
+ pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
+}
+
+/**
+ * enter_state - Do common work of entering low-power state.
+ * @state: pm_state structure for state we're entering.
+ *
+ * Make sure we're the only ones trying to enter a sleep state. Fail
+ * if someone has beat us to it, since we don't want anything weird to
+ * happen when we wake up.
+ * Then, do the setup for suspend, enter the state, and cleaup (after
+ * we've woken up).
+ */
+int enter_state(suspend_state_t state)
+{
+ int error;
+
+ if (!valid_state(state))
+ return -ENODEV;
+
+ if (!mutex_trylock(&pm_mutex))
+ return -EBUSY;
+
+ printk(KERN_INFO "PM: Syncing filesystems ... ");
+ sys_sync();
+ printk("done.\n");
+
+ pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+ error = suspend_prepare();
+ if (error)
+ goto Unlock;
+
+ if (suspend_test(TEST_FREEZER))
+ goto Finish;
+
+ pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+ error = suspend_devices_and_enter(state);
+
+ Finish:
+ pr_debug("PM: Finishing wakeup.\n");
+ suspend_finish();
+ Unlock:
+ mutex_unlock(&pm_mutex);
+ return error;
+}
+
+/**
+ * pm_suspend - Externally visible function for suspending system.
+ * @state: Enumerated value of state to enter.
+ *
+ * Determine whether or not value is within range, get state
+ * structure, and enter (above).
+ */
+int pm_suspend(suspend_state_t state)
+{
+ if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
+ return enter_state(state);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
new file mode 100644
index 00000000000..17d8bb1acf9
--- /dev/null
+++ b/kernel/power/suspend_test.c
@@ -0,0 +1,187 @@
+/*
+ * kernel/power/suspend_test.c - Suspend to RAM and standby test facility.
+ *
+ * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/rtc.h>
+
+#include "power.h"
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending. Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS 5
+
+static unsigned long suspend_test_start_time;
+
+void suspend_test_start(void)
+{
+ /* FIXME Use better timebase than "jiffies", ideally a clocksource.
+ * What we want is a hardware counter that will work correctly even
+ * during the irqs-are-off stages of the suspend/resume cycle...
+ */
+ suspend_test_start_time = jiffies;
+}
+
+void suspend_test_finish(const char *label)
+{
+ long nj = jiffies - suspend_test_start_time;
+ unsigned msec;
+
+ msec = jiffies_to_msecs(abs(nj));
+ pr_info("PM: %s took %d.%03d seconds\n", label,
+ msec / 1000, msec % 1000);
+
+ /* Warning on suspend means the RTC alarm period needs to be
+ * larger -- the system was sooo slooowwww to suspend that the
+ * alarm (should have) fired before the system went to sleep!
+ *
+ * Warning on either suspend or resume also means the system
+ * has some performance issues. The stack dump of a WARN_ON
+ * is more likely to get the right attention than a printk...
+ */
+ WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
+}
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system. RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+ static char err_readtime[] __initdata =
+ KERN_ERR "PM: can't read %s time, err %d\n";
+ static char err_wakealarm [] __initdata =
+ KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+ static char err_suspend[] __initdata =
+ KERN_ERR "PM: suspend test failed, error %d\n";
+ static char info_test[] __initdata =
+ KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+ unsigned long now;
+ struct rtc_wkalrm alm;
+ int status;
+
+ /* this may fail if the RTC hasn't been initialized */
+ status = rtc_read_time(rtc, &alm.time);
+ if (status < 0) {
+ printk(err_readtime, dev_name(&rtc->dev), status);
+ return;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+
+ memset(&alm, 0, sizeof alm);
+ rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+ alm.enabled = true;
+
+ status = rtc_set_alarm(rtc, &alm);
+ if (status < 0) {
+ printk(err_wakealarm, dev_name(&rtc->dev), status);
+ return;
+ }
+
+ if (state == PM_SUSPEND_MEM) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ if (status == -ENODEV)
+ state = PM_SUSPEND_STANDBY;
+ }
+ if (state == PM_SUSPEND_STANDBY) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ }
+ if (status < 0)
+ printk(err_suspend, status);
+
+ /* Some platforms can't detect that the alarm triggered the
+ * wakeup, or (accordingly) disable it after it afterwards.
+ * It's supposed to give oneshot behavior; cope.
+ */
+ alm.enabled = false;
+ rtc_set_alarm(rtc, &alm);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+ struct rtc_device *candidate = to_rtc_device(dev);
+
+ if (!candidate->ops->set_alarm)
+ return 0;
+ if (!device_may_wakeup(candidate->dev.parent))
+ return 0;
+
+ *(const char **)name_ptr = dev_name(dev);
+ return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time. They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+ KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+ unsigned i;
+
+ /* "=mem" ==> "mem" */
+ value++;
+ for (i = 0; i < PM_SUSPEND_MAX; i++) {
+ if (!pm_states[i])
+ continue;
+ if (strcmp(pm_states[i], value) != 0)
+ continue;
+ test_state = (__force suspend_state_t) i;
+ return 0;
+ }
+ printk(warn_bad_state, value);
+ return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+ static char warn_no_rtc[] __initdata =
+ KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+ char *pony = NULL;
+ struct rtc_device *rtc = NULL;
+
+ /* PM is initialized by now; is that state testable? */
+ if (test_state == PM_SUSPEND_ON)
+ goto done;
+ if (!valid_state(test_state)) {
+ printk(warn_bad_state, pm_states[test_state]);
+ goto done;
+ }
+
+ /* RTCs have initialized by now too ... can we use one? */
+ class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+ if (pony)
+ rtc = rtc_class_open(pony);
+ if (!rtc) {
+ printk(warn_no_rtc);
+ goto done;
+ }
+
+ /* go for it */
+ test_wakealarm(rtc, test_state);
+ rtc_class_close(rtc);
+done:
+ return 0;
+}
+late_initcall(test_suspend);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 78c35047586..6a07f4dbf2f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -55,14 +55,6 @@
#include "power.h"
-/*
- * Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, swsusp will do its best to ensure the image
- * size will not exceed N bytes, but if that is impossible, it will
- * try to create the smallest image possible.
- */
-unsigned long image_size = 500 * 1024 * 1024;
-
int in_suspend __nosavedata = 0;
/**
@@ -194,193 +186,3 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
centisecs / 100, centisecs % 100,
kps / 1000, (kps % 1000) / 10);
}
-
-/**
- * swsusp_shrink_memory - Try to free as much memory as needed
- *
- * ... but do not OOM-kill anyone
- *
- * Notice: all userland should be stopped before it is called, or
- * livelock is possible.
- */
-
-#define SHRINK_BITE 10000
-static inline unsigned long __shrink_memory(long tmp)
-{
- if (tmp > SHRINK_BITE)
- tmp = SHRINK_BITE;
- return shrink_all_memory(tmp);
-}
-
-int swsusp_shrink_memory(void)
-{
- long tmp;
- struct zone *zone;
- unsigned long pages = 0;
- unsigned int i = 0;
- char *p = "-\\|/";
- struct timeval start, stop;
-
- printk(KERN_INFO "PM: Shrinking memory... ");
- do_gettimeofday(&start);
- do {
- long size, highmem_size;
-
- highmem_size = count_highmem_pages();
- size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
- tmp = size;
- size += highmem_size;
- for_each_populated_zone(zone) {
- tmp += snapshot_additional_pages(zone);
- if (is_highmem(zone)) {
- highmem_size -=
- zone_page_state(zone, NR_FREE_PAGES);
- } else {
- tmp -= zone_page_state(zone, NR_FREE_PAGES);
- tmp += zone->lowmem_reserve[ZONE_NORMAL];
- }
- }
-
- if (highmem_size < 0)
- highmem_size = 0;
-
- tmp += highmem_size;
- if (tmp > 0) {
- tmp = __shrink_memory(tmp);
- if (!tmp)
- return -ENOMEM;
- pages += tmp;
- } else if (size > image_size / PAGE_SIZE) {
- tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
- pages += tmp;
- }
- printk("\b%c", p[i++%4]);
- } while (tmp > 0);
- do_gettimeofday(&stop);
- printk("\bdone (%lu pages freed)\n", pages);
- swsusp_show_speed(&start, &stop, pages, "Freed");
-
- return 0;
-}
-
-/*
- * Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
- * resume. The code below implements a mechanism allowing us to do that.
- */
-
-struct nvs_page {
- unsigned long phys_start;
- unsigned int size;
- void *kaddr;
- void *data;
- struct list_head node;
-};
-
-static LIST_HEAD(nvs_list);
-
-/**
- * hibernate_nvs_register - register platform NVS memory region to save
- * @start - physical address of the region
- * @size - size of the region
- *
- * The NVS region need not be page-aligned (both ends) and we arrange
- * things so that the data from page-aligned addresses in this region will
- * be copied into separate RAM pages.
- */
-int hibernate_nvs_register(unsigned long start, unsigned long size)
-{
- struct nvs_page *entry, *next;
-
- while (size > 0) {
- unsigned int nr_bytes;
-
- entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
- if (!entry)
- goto Error;
-
- list_add_tail(&entry->node, &nvs_list);
- entry->phys_start = start;
- nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
- entry->size = (size < nr_bytes) ? size : nr_bytes;
-
- start += entry->size;
- size -= entry->size;
- }
- return 0;
-
- Error:
- list_for_each_entry_safe(entry, next, &nvs_list, node) {
- list_del(&entry->node);
- kfree(entry);
- }
- return -ENOMEM;
-}
-
-/**
- * hibernate_nvs_free - free data pages allocated for saving NVS regions
- */
-void hibernate_nvs_free(void)
-{
- struct nvs_page *entry;
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data) {
- free_page((unsigned long)entry->data);
- entry->data = NULL;
- if (entry->kaddr) {
- iounmap(entry->kaddr);
- entry->kaddr = NULL;
- }
- }
-}
-
-/**
- * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
- */
-int hibernate_nvs_alloc(void)
-{
- struct nvs_page *entry;
-
- list_for_each_entry(entry, &nvs_list, node) {
- entry->data = (void *)__get_free_page(GFP_KERNEL);
- if (!entry->data) {
- hibernate_nvs_free();
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-/**
- * hibernate_nvs_save - save NVS memory regions
- */
-void hibernate_nvs_save(void)
-{
- struct nvs_page *entry;
-
- printk(KERN_INFO "PM: Saving platform NVS memory\n");
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data) {
- entry->kaddr = ioremap(entry->phys_start, entry->size);
- memcpy(entry->data, entry->kaddr, entry->size);
- }
-}
-
-/**
- * hibernate_nvs_restore - restore NVS memory regions
- *
- * This function is going to be called with interrupts disabled, so it
- * cannot iounmap the virtual addresses used to access the NVS region.
- */
-void hibernate_nvs_restore(void)
-{
- struct nvs_page *entry;
-
- printk(KERN_INFO "PM: Restoring platform NVS memory\n");
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data)
- memcpy(entry->kaddr, entry->data, entry->size);
-}
diff --git a/kernel/sched.c b/kernel/sched.c
index f04aa966450..8ec9d13140b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2192,6 +2192,7 @@ void kick_process(struct task_struct *p)
smp_send_reschedule(cpu);
preempt_enable();
}
+EXPORT_SYMBOL_GPL(kick_process);
/*
* Return a low guess at the load of a migration-source cpu weighted