From 0a0c5168df270a50e3518e4f12bddb31f8f5f38f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:33:49 +0100 Subject: PM: Introduce functions for suspending and resuming device interrupts Introduce helper functions allowing us to prevent device drivers from getting any interrupts (without disabling interrupts on the CPU) during suspend (or hibernation) and to make them start to receive interrupts again during the subsequent resume. These functions make it possible to keep timer interrupts enabled while the "late" suspend and "early" resume callbacks provided by device drivers are being executed. In turn, this allows device drivers' "late" suspend and "early" resume callbacks to sleep, execute ACPI callbacks etc. The functions introduced here will be used to rework the handling of interrupts during suspend (hibernation) and resume. Namely, interrupts will only be disabled on the CPU right before suspending sysdevs, while device drivers will be prevented from receiving interrupts, with the help of the new helper function, before their "late" suspend callbacks run (and analogously during resume). Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- include/linux/interrupt.h | 9 ++++++ include/linux/irq.h | 1 + kernel/irq/Makefile | 1 + kernel/irq/internals.h | 2 ++ kernel/irq/manage.c | 31 ++++++++++++++----- kernel/irq/pm.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 kernel/irq/pm.c diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0c9cb63e689..c68bffd182b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -117,6 +117,15 @@ extern void disable_irq_nosync(unsigned int irq); extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); +/* The following three functions are for the core kernel use only. */ +extern void suspend_device_irqs(void); +extern void resume_device_irqs(void); +#ifdef CONFIG_PM_SLEEP +extern int check_wakeup_irqs(void); +#else +static inline int check_wakeup_irqs(void) { return 0; } +#endif + #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) extern cpumask_var_t irq_default_affinity; diff --git a/include/linux/irq.h b/include/linux/irq.h index 9c62fbe2ef3..974890b3c52 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -67,6 +67,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ +#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 4dd5b1edac9..3394f8f5296 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ee1aa9f8e8b..01ce20eab38 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,6 +12,8 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); +extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 6458e99984c..1516ab77355 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -162,6 +162,20 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc) } #endif +void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) +{ + if (suspend) { + if (!desc->action || (desc->action->flags & IRQF_TIMER)) + return; + desc->status |= IRQ_SUSPENDED; + } + + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->chip->disable(irq); + } +} + /** * disable_irq_nosync - disable an irq without waiting * @irq: Interrupt to disable @@ -182,10 +196,7 @@ void disable_irq_nosync(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->chip->disable(irq); - } + __disable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(disable_irq_nosync); @@ -215,15 +226,21 @@ void disable_irq(unsigned int irq) } EXPORT_SYMBOL(disable_irq); -static void __enable_irq(struct irq_desc *desc, unsigned int irq) +void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) { + if (resume) + desc->status &= ~IRQ_SUSPENDED; + switch (desc->depth) { case 0: + err_out: WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); break; case 1: { unsigned int status = desc->status & ~IRQ_DISABLED; + if (desc->status & IRQ_SUSPENDED) + goto err_out; /* Prevent probing on this irq: */ desc->status = status | IRQ_NOPROBE; check_irq_resend(desc, irq); @@ -253,7 +270,7 @@ void enable_irq(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - __enable_irq(desc, irq); + __enable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(enable_irq); @@ -511,7 +528,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { desc->status &= ~IRQ_SPURIOUS_DISABLED; - __enable_irq(desc, irq); + __enable_irq(desc, irq, false); } spin_unlock_irqrestore(&desc->lock, flags); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c new file mode 100644 index 00000000000..638d8bedec1 --- /dev/null +++ b/kernel/irq/pm.c @@ -0,0 +1,79 @@ +/* + * linux/kernel/irq/pm.c + * + * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. + * + * This file contains power management functions related to interrupts. + */ + +#include +#include +#include + +#include "internals.h" + +/** + * suspend_device_irqs - disable all currently enabled interrupt lines + * + * During system-wide suspend or hibernation device interrupts need to be + * disabled at the chip level and this function is provided for this purpose. + * It disables all interrupt lines that are enabled at the moment and sets the + * IRQ_SUSPENDED flag for them. + */ +void suspend_device_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + __disable_irq(desc, irq, true); + spin_unlock_irqrestore(&desc->lock, flags); + } + + for_each_irq_desc(irq, desc) + if (desc->status & IRQ_SUSPENDED) + synchronize_irq(irq); +} +EXPORT_SYMBOL_GPL(suspend_device_irqs); + +/** + * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() + * + * Enable all interrupt lines previously disabled by suspend_device_irqs() that + * have the IRQ_SUSPENDED flag set. + */ +void resume_device_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + + if (!(desc->status & IRQ_SUSPENDED)) + continue; + + spin_lock_irqsave(&desc->lock, flags); + __enable_irq(desc, irq, true); + spin_unlock_irqrestore(&desc->lock, flags); + } +} +EXPORT_SYMBOL_GPL(resume_device_irqs); + +/** + * check_wakeup_irqs - check if any wake-up interrupts are pending + */ +int check_wakeup_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) + if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING)) + return -EBUSY; + + return 0; +} -- cgit v1.2.3 From 2ed8d2b3a81bdbb0418301628ccdb008ac9f40b7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:34:06 +0100 Subject: PM: Rework handling of interrupts during suspend-resume Use the functions introduced in by the previous patch, suspend_device_irqs(), resume_device_irqs() and check_wakeup_irqs(), to rework the handling of interrupts during suspend (hibernation) and resume. Namely, interrupts will only be disabled on the CPU right before suspending sysdevs, while device drivers will be prevented from receiving interrupts, with the help of the new helper function, before their "late" suspend callbacks run (and analogously during resume). In addition, since the device interrups are now disabled before the CPU has turned all interrupts off and the CPU will ACK the interrupts setting the IRQ_PENDING bit for them, check in sysdev_suspend() if any wake-up interrupts are pending and abort suspend if that's the case. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 15 +++++++++++---- drivers/base/power/main.c | 20 +++++++++++--------- drivers/base/sys.c | 8 ++++++++ drivers/xen/manage.c | 16 +++++++++------- kernel/kexec.c | 8 ++++---- kernel/power/disk.c | 39 +++++++++++++++++++++++++++++---------- kernel/power/main.c | 17 +++++++++++------ 7 files changed, 83 insertions(+), 40 deletions(-) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 10033fe718e..ac7783a6743 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1190,8 +1190,10 @@ static int suspend(int vetoable) struct apm_user *as; device_suspend(PMSG_SUSPEND); - local_irq_disable(); + device_power_down(PMSG_SUSPEND); + + local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); @@ -1209,9 +1211,12 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; + sysdev_resume(); - device_power_up(PMSG_RESUME); local_irq_enable(); + + device_power_up(PMSG_RESUME); + device_resume(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); @@ -1228,8 +1233,9 @@ static void standby(void) { int err; - local_irq_disable(); device_power_down(PMSG_SUSPEND); + + local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); @@ -1239,8 +1245,9 @@ static void standby(void) local_irq_disable(); sysdev_resume(); - device_power_up(PMSG_RESUME); local_irq_enable(); + + device_power_up(PMSG_RESUME); } static apm_event_t get_event(void) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e255341682c..69b4ddb7de3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "../base.h" #include "power.h" @@ -349,7 +350,8 @@ static int resume_device_noirq(struct device *dev, pm_message_t state) * Execute the appropriate "noirq resume" callback for all devices marked * as DPM_OFF_IRQ. * - * Must be called with interrupts disabled and only one CPU running. + * Must be called under dpm_list_mtx. Device drivers should not receive + * interrupts while it's being executed. */ static void dpm_power_up(pm_message_t state) { @@ -370,14 +372,13 @@ static void dpm_power_up(pm_message_t state) * device_power_up - Turn on all devices that need special attention. * @state: PM transition of the system being carried out. * - * Power on system devices, then devices that required we shut them down - * with interrupts disabled. - * - * Must be called with interrupts disabled. + * Call the "early" resume handlers and enable device drivers to receive + * interrupts. */ void device_power_up(pm_message_t state) { dpm_power_up(state); + resume_device_irqs(); } EXPORT_SYMBOL_GPL(device_power_up); @@ -602,16 +603,17 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state) * device_power_down - Shut down special devices. * @state: PM transition of the system being carried out. * - * Power down devices that require interrupts to be disabled. - * Then power down system devices. + * Prevent device drivers from receiving interrupts and call the "late" + * suspend handlers. * - * Must be called with interrupts disabled and only one CPU running. + * Must be called under dpm_list_mtx. */ int device_power_down(pm_message_t state) { struct device *dev; int error = 0; + suspend_device_irqs(); list_for_each_entry_reverse(dev, &dpm_list, power.entry) { error = suspend_device_noirq(dev, state); if (error) { @@ -621,7 +623,7 @@ int device_power_down(pm_message_t state) dev->power.status = DPM_OFF_IRQ; } if (error) - dpm_power_up(resume_event(state)); + device_power_up(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_power_down); diff --git a/drivers/base/sys.c b/drivers/base/sys.c index cbd36cf59a0..76ce75bad91 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "base.h" @@ -369,6 +370,13 @@ int sysdev_suspend(pm_message_t state) struct sysdev_driver *drv, *err_drv; int ret; + pr_debug("Checking wake-up interrupts\n"); + + /* Return error code if there are any wake-up interrupts pending */ + ret = check_wakeup_irqs(); + if (ret) + return ret; + pr_debug("Suspending System Devices\n"); list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) { diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 3ccd348d112..0d61db1e7b4 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -39,12 +39,6 @@ static int xen_suspend(void *data) BUG_ON(!irqs_disabled()); - err = device_power_down(PMSG_SUSPEND); - if (err) { - printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", - err); - return err; - } err = sysdev_suspend(PMSG_SUSPEND); if (err) { printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n", @@ -69,7 +63,6 @@ static int xen_suspend(void *data) xen_mm_unpin_all(); sysdev_resume(); - device_power_up(PMSG_RESUME); if (!*cancelled) { xen_irq_resume(); @@ -108,6 +101,12 @@ static void do_suspend(void) /* XXX use normal device tree? */ xenbus_suspend(); + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "device_power_down failed: %d\n", err); + goto resume_devices; + } + err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); @@ -120,6 +119,9 @@ static void do_suspend(void) } else xenbus_suspend_cancel(); + device_power_up(PMSG_RESUME); + +resume_devices: device_resume(PMSG_RESUME); /* Make sure timer events get retriggered on all CPUs */ diff --git a/kernel/kexec.c b/kernel/kexec.c index c7fd6692939..dade9af6bf2 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1454,7 +1454,6 @@ int kernel_kexec(void) if (error) goto Resume_devices; device_pm_lock(); - local_irq_disable(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for @@ -1464,8 +1463,9 @@ int kernel_kexec(void) */ error = device_power_down(PMSG_FREEZE); if (error) - goto Enable_irqs; + goto Unlock_pm; + local_irq_disable(); /* Suspend system devices */ error = sysdev_suspend(PMSG_FREEZE); if (error) @@ -1484,9 +1484,9 @@ int kernel_kexec(void) if (kexec_image->preserve_context) { sysdev_resume(); Power_up_devices: - device_power_up(PMSG_RESTORE); - Enable_irqs: local_irq_enable(); + device_power_up(PMSG_RESTORE); + Unlock_pm: device_pm_unlock(); enable_nonboot_cpus(); Resume_devices: diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 4a4a206b197..320bb0949bd 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -214,7 +214,7 @@ static int create_image(int platform_mode) return error; device_pm_lock(); - local_irq_disable(); + /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -225,8 +225,11 @@ static int create_image(int platform_mode) if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Enable_irqs; + goto Unlock; } + + local_irq_disable(); + sysdev_suspend(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " @@ -252,12 +255,16 @@ static int create_image(int platform_mode) /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ + Power_up_devices: + local_irq_enable(); + device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Enable_irqs: - local_irq_enable(); + + Unlock: device_pm_unlock(); + return error; } @@ -336,13 +343,16 @@ static int resume_target_kernel(void) int error; device_pm_lock(); - local_irq_disable(); + error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); - goto Enable_irqs; + goto Unlock; } + + local_irq_disable(); + sysdev_suspend(PMSG_QUIESCE); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); @@ -366,11 +376,16 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); - device_power_up(PMSG_RECOVER); - Enable_irqs: + local_irq_enable(); + + device_power_up(PMSG_RECOVER); + + Unlock: device_pm_unlock(); + return error; } @@ -447,15 +462,16 @@ int hibernation_platform_enter(void) goto Finish; device_pm_lock(); - local_irq_disable(); + error = device_power_down(PMSG_HIBERNATE); if (!error) { + local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); hibernation_ops->enter(); /* We should never get here */ while (1); } - local_irq_enable(); + device_pm_unlock(); /* @@ -464,12 +480,15 @@ int hibernation_platform_enter(void) */ Finish: hibernation_ops->finish(); + Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); resume_console(); + Close: hibernation_ops->end(); + return error; } diff --git a/kernel/power/main.c b/kernel/power/main.c index c9632f841f6..f0a466736c0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -287,17 +287,19 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state) { - int error = 0; + int error; device_pm_lock(); - arch_suspend_disable_irqs(); - BUG_ON(!irqs_disabled()); - if ((error = device_power_down(PMSG_SUSPEND))) { + error = device_power_down(PMSG_SUSPEND); + if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Done; } + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + error = sysdev_suspend(PMSG_SUSPEND); if (!error) { if (!suspend_test(TEST_CORE)) @@ -305,11 +307,14 @@ static int suspend_enter(suspend_state_t state) sysdev_resume(); } - device_power_up(PMSG_RESUME); - Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + + device_power_up(PMSG_RESUME); + + Done: device_pm_unlock(); + return error; } -- cgit v1.2.3 From 900af0d973856d6feb6fc088c2d0d3fde57707d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:34:15 +0100 Subject: PM: Change suspend code ordering Change the ordering of the suspend core code so that the platform "prepare" callback is executed and the nonboot CPUs are disabled after calling device drivers' "late suspend" methods. This change will allow us to rework the PCI PM core so that the power state of devices is changed in the "late" phase of suspend (and analogously in the "early" phase of resume), which in turn will allow us to avoid the race condition where a device using shared interrupts is put into a low power state with interrupts enabled and then an interrupt (for another device) comes in and confuses its driver. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- kernel/power/main.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index f0a466736c0..f172f41858b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -297,6 +297,19 @@ static int suspend_enter(suspend_state_t state) goto Done; } + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + goto Power_up_devices; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platfrom_finish; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -310,6 +323,14 @@ static int suspend_enter(suspend_state_t state) arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + Enable_cpus: + enable_nonboot_cpus(); + + Platfrom_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + Power_up_devices: device_power_up(PMSG_RESUME); Done: @@ -346,23 +367,8 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_test(TEST_DEVICES)) goto Recover_platform; - if (suspend_ops->prepare) { - error = suspend_ops->prepare(); - if (error) - goto Resume_devices; - } - - if (suspend_test(TEST_PLATFORM)) - goto Finish; + suspend_enter(state); - error = disable_nonboot_cpus(); - if (!error && !suspend_test(TEST_CPUS)) - suspend_enter(state); - - enable_nonboot_cpus(); - Finish: - if (suspend_ops->finish) - suspend_ops->finish(); Resume_devices: suspend_test_start(); device_resume(PMSG_RESUME); -- cgit v1.2.3 From 4aecd6718939eb3c4145b248369b65f7483a8a02 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:34:26 +0100 Subject: PM: Change hibernation code ordering Change the ordering of the hibernation core code so that the platform "prepare" callbacks are executed and the nonboot CPUs are disabled after calling device drivers' "late suspend" methods. This change (along with the previous analogous change of the suspend core code) will allow us to rework the PCI PM core so that the power state of devices is changed in the "late" phase of suspend (and analogously in the "early" phase of resume), which in turn will allow us to avoid the race condition where a device using shared interrupts is put into a low power state with interrupts enabled and then an interrupt (for another device) comes in and confuses its driver. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- kernel/power/disk.c | 109 +++++++++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 320bb0949bd..e886d1332a1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -228,13 +228,22 @@ static int create_image(int platform_mode) goto Unlock; } + error = platform_pre_snapshot(platform_mode); + if (error || hibernation_test(TEST_PLATFORM)) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error || hibernation_test(TEST_CPUS) + || hibernation_testmode(HIBERNATION_TEST)) + goto Enable_cpus; + local_irq_disable(); sysdev_suspend(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Power_up_devices; + goto Enable_irqs; } if (hibernation_test(TEST_CORE)) @@ -250,15 +259,22 @@ static int create_image(int platform_mode) restore_processor_state(); if (!in_suspend) platform_leave(platform_mode); + Power_up: sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ - Power_up_devices: + Enable_irqs: local_irq_enable(); + Enable_cpus: + enable_nonboot_cpus(); + + Platform_finish: + platform_finish(platform_mode); + device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); @@ -298,25 +314,9 @@ int hibernation_snapshot(int platform_mode) if (hibernation_test(TEST_DEVICES)) goto Recover_platform; - error = platform_pre_snapshot(platform_mode); - if (error || hibernation_test(TEST_PLATFORM)) - goto Finish; - - error = disable_nonboot_cpus(); - if (!error) { - if (hibernation_test(TEST_CPUS)) - goto Enable_cpus; - - if (hibernation_testmode(HIBERNATION_TEST)) - goto Enable_cpus; + error = create_image(platform_mode); + /* Control returns here after successful restore */ - error = create_image(platform_mode); - /* Control returns here after successful restore */ - } - Enable_cpus: - enable_nonboot_cpus(); - Finish: - platform_finish(platform_mode); Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); @@ -338,7 +338,7 @@ int hibernation_snapshot(int platform_mode) * kernel. */ -static int resume_target_kernel(void) +static int resume_target_kernel(bool platform_mode) { int error; @@ -351,9 +351,20 @@ static int resume_target_kernel(void) goto Unlock; } + error = platform_pre_restore(platform_mode); + if (error) + goto Cleanup; + + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + local_irq_disable(); - sysdev_suspend(PMSG_QUIESCE); + error = sysdev_suspend(PMSG_QUIESCE); + if (error) + goto Enable_irqs; + /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -379,8 +390,15 @@ static int resume_target_kernel(void) sysdev_resume(); + Enable_irqs: local_irq_enable(); + Enable_cpus: + enable_nonboot_cpus(); + + Cleanup: + platform_restore_cleanup(platform_mode); + device_power_up(PMSG_RECOVER); Unlock: @@ -405,19 +423,10 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); error = device_suspend(PMSG_QUIESCE); - if (error) - goto Finish; - - error = platform_pre_restore(platform_mode); if (!error) { - error = disable_nonboot_cpus(); - if (!error) - error = resume_target_kernel(); - enable_nonboot_cpus(); + error = resume_target_kernel(platform_mode); + device_resume(PMSG_RECOVER); } - platform_restore_cleanup(platform_mode); - device_resume(PMSG_RECOVER); - Finish: resume_console(); pm_restore_console(); return error; @@ -453,34 +462,38 @@ int hibernation_platform_enter(void) goto Resume_devices; } + device_pm_lock(); + + error = device_power_down(PMSG_HIBERNATE); + if (error) + goto Unlock; + error = hibernation_ops->prepare(); if (error) - goto Resume_devices; + goto Platofrm_finish; error = disable_nonboot_cpus(); if (error) - goto Finish; - - device_pm_lock(); - - error = device_power_down(PMSG_HIBERNATE); - if (!error) { - local_irq_disable(); - sysdev_suspend(PMSG_HIBERNATE); - hibernation_ops->enter(); - /* We should never get here */ - while (1); - } + goto Platofrm_finish; - device_pm_unlock(); + local_irq_disable(); + sysdev_suspend(PMSG_HIBERNATE); + hibernation_ops->enter(); + /* We should never get here */ + while (1); /* * We don't need to reenable the nonboot CPUs or resume consoles, since * the system is going to be halted anyway. */ - Finish: + Platofrm_finish: hibernation_ops->finish(); + device_power_up(PMSG_RESTORE); + + Unlock: + device_pm_unlock(); + Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); -- cgit v1.2.3 From 749b0afc3a9d90dda3319fd1464a3b32fc225361 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:34:35 +0100 Subject: kexec: Change kexec jump code ordering Change the ordering of the kexec jump code so that the nonboot CPUs are disabled after calling device drivers' "late suspend" methods. This change reflects the recent modifications of the power management code that is also used by kexec jump. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- kernel/kexec.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index dade9af6bf2..93eed85fe01 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1450,9 +1450,6 @@ int kernel_kexec(void) error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; - error = disable_nonboot_cpus(); - if (error) - goto Resume_devices; device_pm_lock(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* @@ -1463,13 +1460,15 @@ int kernel_kexec(void) */ error = device_power_down(PMSG_FREEZE); if (error) - goto Unlock_pm; - + goto Resume_devices; + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; local_irq_disable(); /* Suspend system devices */ error = sysdev_suspend(PMSG_FREEZE); if (error) - goto Power_up_devices; + goto Enable_irqs; } else #endif { @@ -1483,13 +1482,13 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { sysdev_resume(); - Power_up_devices: + Enable_irqs: local_irq_enable(); - device_power_up(PMSG_RESTORE); - Unlock_pm: - device_pm_unlock(); + Enable_cpus: enable_nonboot_cpus(); + device_power_up(PMSG_RESTORE); Resume_devices: + device_pm_unlock(); device_resume(PMSG_RESTORE); Resume_console: resume_console(); -- cgit v1.2.3 From 57ef80266e14ecc363380268fedc64e519047b4a Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Mon, 16 Mar 2009 22:39:56 +0100 Subject: PCI PM: Consistently use variable name "error" for pm call return values I noticed two functions use a variable "i" to store the return value of PM function calls while the rest of the file uses "error". As "i" normally indicates a counter of some sort it seems better to keep this consistent. Signed-off-by: Frans Pop Signed-off-by: Jesse Barnes Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 93eac142358..a5f11ad975b 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -352,17 +352,17 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int i = 0; + int error = 0; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; pci_dev->state_saved = false; - i = drv->suspend(pci_dev, state); - suspend_report_result(drv->suspend, i); - if (i) - return i; + error = drv->suspend(pci_dev, state); + suspend_report_result(drv->suspend, error); + if (error) + return error; if (pci_dev->state_saved) goto Fixup; @@ -385,20 +385,20 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return i; + return error; } static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int i = 0; + int error = 0; if (drv && drv->suspend_late) { - i = drv->suspend_late(pci_dev, state); - suspend_report_result(drv->suspend_late, i); + error = drv->suspend_late(pci_dev, state); + suspend_report_result(drv->suspend_late, error); } - return i; + return error; } static int pci_legacy_resume_early(struct device *dev) -- cgit v1.2.3 From f00a20ef46b1795c495869163a9a7333f899713a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:08 +0100 Subject: PCI PM: Use pci_set_power_state during early resume Once we have allowed timer interrupts to be enabled during the early phase of resuming devices, we are now able to use the generic pci_set_power_state() to put PCI devices into D0 at that time. Then, the platform-specific PM code will have a chance to handle devices that don't implement the native PCI PM or that require some additional, platform-specific operations to be carried out to power them up. Also, by doing this we can simplify the code quite a bit. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci.c | 48 +++++++++--------------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6d6120007af..3acb1da296d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -426,7 +426,6 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * given PCI device * @dev: PCI device to handle. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * @wait: If 'true', wait for the device to change its power state * * RETURN VALUE: * -EINVAL if the requested state is invalid. @@ -435,8 +434,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int -pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) +static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) { u16 pmcsr; bool need_restore = false; @@ -481,10 +479,8 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) break; case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot - && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) { + && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) need_restore = true; - wait = true; - } /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -494,9 +490,6 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) /* enter specified state */ pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); - if (!wait) - return 0; - /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ if (state == PCI_D3hot || dev->current_state == PCI_D3hot) @@ -521,7 +514,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) if (need_restore) pci_restore_bars(dev); - if (wait && dev->bus->self) + if (dev->bus->self) pcie_aspm_pm_state_change(dev->bus->self); return 0; @@ -591,7 +584,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) return 0; - error = pci_raw_set_power_state(dev, state, true); + error = pci_raw_set_power_state(dev, state); if (state > PCI_D0 && platform_pci_power_manageable(dev)) { /* Allow the platform to finalize the transition */ @@ -1390,37 +1383,14 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) */ int pci_restore_standard_config(struct pci_dev *dev) { - pci_power_t prev_state; - int error; - - pci_update_current_state(dev, PCI_D0); - - prev_state = dev->current_state; - if (prev_state == PCI_D0) - goto Restore; - - error = pci_raw_set_power_state(dev, PCI_D0, false); - if (error) - return error; + pci_update_current_state(dev, PCI_UNKNOWN); - /* - * This assumes that we won't get a bus in B2 or B3 from the BIOS, but - * we've made this assumption forever and it appears to be universally - * satisfied. - */ - switch(prev_state) { - case PCI_D3cold: - case PCI_D3hot: - mdelay(pci_pm_d3_delay); - break; - case PCI_D2: - udelay(PCI_PM_D2_DELAY); - break; + if (dev->current_state != PCI_D0) { + int error = pci_set_power_state(dev, PCI_D0); + if (error) + return error; } - pci_update_current_state(dev, PCI_D0); - - Restore: return dev->state_saved ? pci_restore_state(dev) : 0; } -- cgit v1.2.3 From 0128a89cf75124500b5b69f0c3c7b7c5aa60676f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:18 +0100 Subject: PCI PM: Move pci_restore_standard_config to pci-driver.c Move pci_restore_standard_config() from pci.c to pci-driver.c and make it static. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 17 +++++++++++++++++ drivers/pci/pci.c | 21 --------------------- drivers/pci/pci.h | 1 - 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a5f11ad975b..8395206d1ae 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -423,6 +423,23 @@ static int pci_legacy_resume(struct device *dev) /* Auxiliary functions used by the new power management framework */ +/** + * pci_restore_standard_config - restore standard config registers of PCI device + * @pci_dev: PCI device to handle + */ +static int pci_restore_standard_config(struct pci_dev *pci_dev) +{ + pci_update_current_state(pci_dev, PCI_UNKNOWN); + + if (pci_dev->current_state != PCI_D0) { + int error = pci_set_power_state(pci_dev, PCI_D0); + if (error) + return error; + } + + return pci_dev->state_saved ? pci_restore_state(pci_dev) : 0; +} + static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) { pci_restore_standard_config(pci_dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3acb1da296d..a4ecc2f1512 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1373,27 +1373,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) "unable to preallocate PCI-X save buffer\n"); } -/** - * pci_restore_standard_config - restore standard config registers of PCI device - * @dev: PCI device to handle - * - * This function assumes that the device's configuration space is accessible. - * If the device needs to be powered up, the function will wait for it to - * change the state. - */ -int pci_restore_standard_config(struct pci_dev *dev) -{ - pci_update_current_state(dev, PCI_UNKNOWN); - - if (dev->current_state != PCI_D0) { - int error = pci_set_power_state(dev, PCI_D0); - if (error) - return error; - } - - return dev->state_saved ? pci_restore_state(dev) : 0; -} - /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 07c0aa5275e..149fff65891 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -49,7 +49,6 @@ extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); -extern int pci_restore_standard_config(struct pci_dev *dev); static inline bool pci_is_bridge(struct pci_dev *pci_dev) { -- cgit v1.2.3 From 46939f8b15e44f065d052e89ea4f2adc81fdc740 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:26 +0100 Subject: PCI PM: Put devices into low power states during late suspend (rev. 2) Once we have allowed timer interrupts to be enabled during the late phase of suspending devices, we are now able to use the generic pci_set_power_state() to put PCI devices into low power states at that time. We can also use some related platform callbacks, like the ones preparing devices for wake-up, during the late suspend. Doing this will allow us to avoid the race condition where a device using shared interrupts is put into a low power state with interrupts enabled and then an interrupt (for another device) comes in and confuses its driver. At the same time, devices that don't support the native PCI PM or that require some additional, platform-specific operations to be carried out to put them into low power states will be handled as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 134 ++++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 8395206d1ae..3c1831c82f5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -352,53 +352,60 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int error = 0; + + pci_dev->state_saved = false; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; - - pci_dev->state_saved = false; + int error; error = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, error); if (error) return error; - if (pci_dev->state_saved) - goto Fixup; - - if (pci_dev->current_state != PCI_D0 + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, "PCI PM: Device state not saved by %pF\n", drv->suspend); - goto Fixup; } } - pci_save_state(pci_dev); - /* - * This is for compatibility with existing code with legacy PM support. - */ - pci_pm_set_unknown_state(pci_dev); - - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return error; + return 0; } static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int error = 0; if (drv && drv->suspend_late) { + pci_power_t prev = pci_dev->current_state; + int error; + error = drv->suspend_late(pci_dev, state); suspend_report_result(drv->suspend_late, error); + if (error) + return error; + + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: Device state not saved by %pF\n", + drv->suspend_late); + return 0; + } } - return error; + + if (!pci_dev->state_saved) + pci_save_state(pci_dev); + + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_legacy_resume_early(struct device *dev) @@ -460,7 +467,6 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) /* Disable non-bridge devices without PM support */ if (!pci_is_bridge(pci_dev)) pci_disable_enabled_device(pci_dev); - pci_save_state(pci_dev); } static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) @@ -526,24 +532,14 @@ static int pci_pm_suspend(struct device *dev) if (error) return error; - if (pci_dev->state_saved) - goto Fixup; - - if (pci_dev->current_state != PCI_D0 + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, "PCI PM: State of device not saved by %pF\n", pm->suspend); - goto Fixup; } } - if (!pci_dev->state_saved) { - pci_save_state(pci_dev); - if (!pci_is_bridge(pci_dev)) - pci_prepare_to_sleep(pci_dev); - } - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); @@ -553,21 +549,41 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; - int error = 0; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); - if (drv && drv->pm && drv->pm->suspend_noirq) { - error = drv->pm->suspend_noirq(dev); - suspend_report_result(drv->pm->suspend_noirq, error); + if (!pm) + return 0; + + if (pm->suspend_noirq) { + pci_power_t prev = pci_dev->current_state; + int error; + + error = pm->suspend_noirq(dev); + suspend_report_result(pm->suspend_noirq, error); + if (error) + return error; + + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: State of device not saved by %pF\n", + pm->suspend_noirq); + return 0; + } } - if (!error) - pci_pm_set_unknown_state(pci_dev); + if (!pci_dev->state_saved) { + pci_save_state(pci_dev); + if (!pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + } - return error; + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_pm_resume_noirq(struct device *dev) @@ -650,9 +666,6 @@ static int pci_pm_freeze(struct device *dev) return error; } - if (!pci_dev->state_saved) - pci_save_state(pci_dev); - return 0; } @@ -660,20 +673,25 @@ static int pci_pm_freeze_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_FREEZE); if (drv && drv->pm && drv->pm->freeze_noirq) { + int error; + error = drv->pm->freeze_noirq(dev); suspend_report_result(drv->pm->freeze_noirq, error); + if (error) + return error; } - if (!error) - pci_pm_set_unknown_state(pci_dev); + if (!pci_dev->state_saved) + pci_save_state(pci_dev); - return error; + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_pm_thaw_noirq(struct device *dev) @@ -716,7 +734,6 @@ static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); @@ -729,33 +746,44 @@ static int pci_pm_poweroff(struct device *dev) pci_dev->state_saved = false; if (pm->poweroff) { + int error; + error = pm->poweroff(dev); suspend_report_result(pm->poweroff, error); + if (error) + return error; } - if (!pci_dev->state_saved && !pci_is_bridge(pci_dev)) - pci_prepare_to_sleep(pci_dev); - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return error; + return 0; } static int pci_pm_poweroff_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error = 0; if (pci_has_legacy_pm_support(to_pci_dev(dev))) return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); - if (drv && drv->pm && drv->pm->poweroff_noirq) { + if (!drv || !drv->pm) + return 0; + + if (drv->pm->poweroff_noirq) { + int error; + error = drv->pm->poweroff_noirq(dev); suspend_report_result(drv->pm->poweroff_noirq, error); + if (error) + return error; } - return error; + if (!pci_dev->state_saved && !pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + + return 0; } static int pci_pm_restore_noirq(struct device *dev) -- cgit v1.2.3 From 4a865905f685eaefaedf6ade362323dc52aa703b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:36 +0100 Subject: PCI PM: Make pci_set_power_state() handle devices with no PM support There is a problem with PCI devices without any PM support (either native or through the platform) that pci_set_power_state() always returns error code for them, even if they are being put into D0. However, such devices are always in D0, so pci_set_power_state() should return success when attempting to put such a device into D0. It also should update the current_state field for these devices as appropriate. This modification is necessary so that the standard configuration registers of these devices are successfully restored by pci_restore_standard_config() during the "early" phase of resume. In addition, pci_set_power_state() should check the value of current_state before calling the platform to change the power state of the device to avoid doing that unnecessarily. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a4ecc2f1512..979ceb1d37e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -439,6 +439,10 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) u16 pmcsr; bool need_restore = false; + /* Check if we're already there */ + if (dev->current_state == state) + return 0; + if (!dev->pm_cap) return -EIO; @@ -449,10 +453,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ - if (dev->current_state == state) { - /* we're already there */ - return 0; - } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold + if (state != PCI_D0 && dev->current_state <= PCI_D3cold && dev->current_state > state) { dev_err(&dev->dev, "invalid power transition " "(from state %d to %d)\n", dev->current_state, state); @@ -570,12 +571,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) */ return 0; - if (state == PCI_D0 && platform_pci_power_manageable(dev)) { + /* Check if we're already there */ + if (dev->current_state == state) + return 0; + + if (state == PCI_D0) { /* * Allow the platform to change the state, for example via ACPI * _PR0, _PS0 and some such, but do not trust it. */ - int ret = platform_pci_set_power_state(dev, PCI_D0); + int ret = platform_pci_power_manageable(dev) ? + platform_pci_set_power_state(dev, PCI_D0) : 0; if (!ret) pci_update_current_state(dev, PCI_D0); } -- cgit v1.2.3 From 931ff68a5a53fa84bcdf9b1b179a80e54e034bd0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:50 +0100 Subject: PCI PM: Restore config spaces of all devices during early resume At present the configuration spaces of PCI devices that have no drivers or no PM support in the drivers (either legacy or through a pm object) are not saved during suspend and, consequently, they are not restored during resume. This generally may lead to the state of the system being slightly inconsistent after the resume, so it's better to save and restore the configuration spaces of these devices as well. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3c1831c82f5..267de88551c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -516,13 +516,13 @@ static int pci_pm_suspend(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); goto Fixup; } - pci_dev->state_saved = false; - if (pm->suspend) { pci_power_t prev = pci_dev->current_state; int error; @@ -554,8 +554,10 @@ static int pci_pm_suspend_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); - if (!pm) + if (!pm) { + pci_save_state(pci_dev); return 0; + } if (pm->suspend_noirq) { pci_power_t prev = pci_dev->current_state; @@ -650,13 +652,13 @@ static int pci_pm_freeze(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); return 0; } - pci_dev->state_saved = false; - if (pm->freeze) { int error; @@ -738,13 +740,13 @@ static int pci_pm_poweroff(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); goto Fixup; } - pci_dev->state_saved = false; - if (pm->poweroff) { int error; -- cgit v1.2.3 From 0e5dd46b761195356065a30611f265adec286d0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Mar 2009 22:51:40 +0100 Subject: PCI PM: Introduce __pci_[start|complete]_power_transition() (rev. 2) The radeonfb driver needs to program the device's PMCSR directly due to some quirky hardware it has to handle (see http://bugzilla.kernel.org/show_bug.cgi?id=12846 for details) and after doing that it needs to call the platform (usually ACPI) to finish the power transition of the device. Currently it uses pci_set_power_state() for this purpose, however making a specific assumption about the internal behavior of this function, which has changed recently so that this assumption is no longer satisfied. For this reason, introduce __pci_complete_power_transition() that may be called by the radeonfb driver to complete the power transition of the device. For symmetry, introduce __pci_start_power_transition(). Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes --- drivers/pci/pci.c | 69 +++++++++++++++++++++++++++++++++++++++-------------- include/linux/pci.h | 1 + 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 979ceb1d37e..de54fd643ba 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -539,6 +539,53 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) } } +/** + * pci_platform_power_transition - Use platform to change device power state + * @dev: PCI device to handle. + * @state: State to put the device into. + */ +static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state) +{ + int error; + + if (platform_pci_power_manageable(dev)) { + error = platform_pci_set_power_state(dev, state); + if (!error) + pci_update_current_state(dev, state); + } else { + error = -ENODEV; + /* Fall back to PCI_D0 if native PM is not supported */ + pci_update_current_state(dev, PCI_D0); + } + + return error; +} + +/** + * __pci_start_power_transition - Start power transition of a PCI device + * @dev: PCI device to handle. + * @state: State to put the device into. + */ +static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state) +{ + if (state == PCI_D0) + pci_platform_power_transition(dev, PCI_D0); +} + +/** + * __pci_complete_power_transition - Complete power transition of a PCI device + * @dev: PCI device to handle. + * @state: State to put the device into. + * + * This function should not be called directly by device drivers. + */ +int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state) +{ + return state > PCI_D0 ? + pci_platform_power_transition(dev, state) : -EINVAL; +} +EXPORT_SYMBOL_GPL(__pci_complete_power_transition); + /** * pci_set_power_state - Set the power state of a PCI device * @dev: PCI device to handle. @@ -575,16 +622,8 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (dev->current_state == state) return 0; - if (state == PCI_D0) { - /* - * Allow the platform to change the state, for example via ACPI - * _PR0, _PS0 and some such, but do not trust it. - */ - int ret = platform_pci_power_manageable(dev) ? - platform_pci_set_power_state(dev, PCI_D0) : 0; - if (!ret) - pci_update_current_state(dev, PCI_D0); - } + __pci_start_power_transition(dev, state); + /* This device is quirked not to be put into D3, so don't put it in D3 */ if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) @@ -592,14 +631,8 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) error = pci_raw_set_power_state(dev, state); - if (state > PCI_D0 && platform_pci_power_manageable(dev)) { - /* Allow the platform to finalize the transition */ - int ret = platform_pci_set_power_state(dev, state); - if (!ret) { - pci_update_current_state(dev, state); - error = 0; - } - } + if (!__pci_complete_power_transition(dev, state)) + error = 0; return error; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 7bd624bfdcf..df364413261 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -689,6 +689,7 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); int pci_restore_state(struct pci_dev *dev); +int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); -- cgit v1.2.3 From b8e676d2432b8ce96967a3fe6601a0a28e64fa10 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Mar 2009 22:52:08 +0100 Subject: radeonfb: Use __pci_complete_power_transition() Use __pci_complete_power_transition() to finalize the transition into D2 after programming the PMCSR of the device directly. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes --- drivers/video/aty/radeon_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index c6d7cc76516..1de0c003246 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -2582,7 +2582,7 @@ static void radeon_set_suspend(struct radeonfb_info *rinfo, int suspend) * calling pci_set_power_state() */ radeonfb_whack_power_state(rinfo, PCI_D2); - pci_set_power_state(rinfo->pdev, PCI_D2); + __pci_complete_power_transition(rinfo->pdev, PCI_D2); } else { printk(KERN_DEBUG "radeonfb (%s): switching to D0 state...\n", pci_name(rinfo->pdev)); -- cgit v1.2.3 From 8efb8c76fcdccf5050c0ea059dac392789baaff2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Mar 2009 21:46:27 +0200 Subject: PCI PM: Make pci_prepare_to_sleep() disable wake-up if needed If the device is not supposed to wake up the system, ie. when device_may_wakeup(&dev->dev) returns 'false', pci_prepare_to_sleep() should pass 'false' to pci_enable_wake() so that it calls the platform to disable the wake-up capability of the device. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index de54fd643ba..0195066251e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1263,7 +1263,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; - pci_enable_wake(dev, target_state, true); + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); -- cgit v1.2.3