From 1eede070a59e1cc73da51e1aaa00d9ab86572cfc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 23:00:01 +0200 Subject: Introduce new top level suspend and hibernation callbacks Introduce 'struct pm_ops' and 'struct pm_ext_ops' ('ext' meaning 'extended') representing suspend and hibernation operations for bus types, device classes, device types and device drivers. Modify the PM core to use 'struct pm_ops' and 'struct pm_ext_ops' objects, if defined, instead of the ->suspend(), ->resume(), ->suspend_late(), and ->resume_early() callbacks (the old callbacks will be considered as legacy and gradually phased out). The main purpose of doing this is to separate suspend (aka S2RAM and standby) callbacks from hibernation callbacks in such a way that the new callbacks won't take arguments and the semantics of each of them will be clearly specified. This has been requested for multiple times by many people, including Linus himself, and the reason is that within the current scheme if ->resume() is called, for example, it's difficult to say why it's been called (ie. is it a resume from RAM or from hibernation or a suspend/hibernation failure etc.?). The second purpose is to make the suspend/hibernation callbacks more flexible so that device drivers can handle more than they can within the current scheme. For example, some drivers may need to prevent new children of the device from being registered before their ->suspend() callbacks are executed or they may want to carry out some operations requiring the availability of some other devices, not directly bound via the parent-child relationship, in order to prepare for the execution of ->suspend(), etc. Ultimately, we'd like to stop using the freezing of tasks for suspend and therefore the drivers' suspend/hibernation code will have to take care of the handling of the user space during suspend/hibernation. That, in turn, would be difficult within the current scheme, without the new ->prepare() and ->complete() callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- arch/x86/kernel/apm_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e2901..c1735f61a2c 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1211,9 +1211,9 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); - device_resume(); + device_resume(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1238,7 +1238,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); } @@ -1324,7 +1324,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - device_resume(); + device_resume(PMSG_RESUME); queue_event(event, NULL); } ignore_normal_resume = 0; -- cgit v1.2.3 From e3f2baebf4209b5927e23fa65d5977d31db936b3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 22 May 2008 14:35:11 -0700 Subject: PCI/x86: early dump pci conf space v2 Allows us to dump PCI space before any kernel changes have been made. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/kernel/setup_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8..524b6850b2c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -361,6 +361,11 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_PCI + if (pci_early_dump_regs) + early_dump_pci_devices(); +#endif + #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT if (init_ohci1394_dma_early) init_ohci1394_dma_on_all_controllers(); -- cgit v1.2.3 From d8f3de0d2412bb91639cfefc5b3c79dbf3812212 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Jun 2008 23:24:06 +0200 Subject: Suspend-related patches for 2.6.27 ACPI PM: Add possibility to change suspend sequence There are some systems out there that don't work correctly with our current suspend/hibernation code ordering. Provide a workaround for these systems allowing them to pass 'acpi_sleep=old_ordering' in the kernel command line so that it will use the pre-ACPI 2.0 ("old") suspend code ordering. Unfortunately, this requires us to add a platform hook to the resuming of devices for recovering the platform in case one of the device drivers' .suspend() routines returns error code. Namely, ACPI 1.0 specifies that _PTS should be called before suspending devices, but _WAK still should be called before resuming them in order to undo the changes made by _PTS. However, if there is an error during suspending devices, they are automatically resumed without returning control to the PM core, so the _WAK has to be called from within device_resume() in that cases. The patch also reorders and refactors the ACPI suspend/hibernation code to avoid duplication as far as reasonably possible. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- arch/x86/kernel/acpi/sleep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee9964..882e970032d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -124,6 +124,8 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 2; if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; + if (strncmp(str, "old_ordering", 12) == 0) + acpi_old_suspend_ordering(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); -- cgit v1.2.3 From 15650a2f644a2f15738cf22807c090d89328f500 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 16 Jun 2008 15:29:45 -0700 Subject: x86/PCI: fixup early quirk probing On x86, we do early PCI probing to apply some quirks for chipset bugs. However, in a recent cleanup (7bcbc78dea92fdf0947fa48e248da3c993a5690f) a thinko was introduced that causes us to probe all subfunctions of even single function devices (a function was factored out of an inner loop and a "break" became a "return"). Fix that up by making check_dev_quirk() return a value so we can keep the factored code intact. Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/kernel/early-quirks.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f51e1ea9e8..8566fea647e 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -133,7 +133,18 @@ static struct chipset early_qrk[] __initdata = { {} }; -static void __init check_dev_quirk(int num, int slot, int func) +/** + * check_dev_quirk - apply early quirks to a given PCI device + * @num: bus number + * @slot: slot number + * @func: PCI function + * + * Check the vendor & device ID against the early quirks table. + * + * If the device is single function, let early_quirks() know so we don't + * poke at this device again. + */ +static int __init check_dev_quirk(int num, int slot, int func) { u16 class; u16 vendor; @@ -144,7 +155,7 @@ static void __init check_dev_quirk(int num, int slot, int func) class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); if (class == 0xffff) - return; + return -1; /* no class, treat as single function */ vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); @@ -167,7 +178,9 @@ static void __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) - return; + return -1; + + return 0; } void __init early_quirks(void) @@ -180,6 +193,9 @@ void __init early_quirks(void) /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) for (slot = 0; slot < 32; slot++) - for (func = 0; func < 8; func++) - check_dev_quirk(num, slot, func); + for (func = 0; func < 8; func++) { + /* Only probe function 0 on single fn devices */ + if (check_dev_quirk(num, slot, func)) + break; + } } -- cgit v1.2.3 From 431ceb83f703a343bdd14350480a2224fa4bfedf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Jul 2008 22:08:04 +0200 Subject: x86: fix TSC build error on 32bit Dave Hansen reported a build error on 32bit which went unnoticed as newer gcc versions seem to optimize unused static functions away before compiling them. Make vread_tsc() depend on CONFIG_X86_64 Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3c36f92160c..7603c055390 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -358,6 +358,7 @@ static cycle_t read_tsc(void) ret : clocksource_tsc.cycle_last; } +#ifdef CONFIG_X86_64 static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); @@ -365,6 +366,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) return ret >= __vsyscall_gtod_data.clock.cycle_last ? ret : __vsyscall_gtod_data.clock.cycle_last; } +#endif static struct clocksource clocksource_tsc = { .name = "tsc", -- cgit v1.2.3 From 5b53496a5ad79e91052f72761a7c5516b069bc99 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Jun 2008 14:39:59 +0800 Subject: ACPI: Disable the C2C3_FFH access mode HW has no MWAIT support 991528d7348667924176f3e29addea0675298944 (ACPI: Processor native C-states using MWAIT) started passing C2C3_FFH to _PDC to tell the BIOS that Linux supports MWAIT for deep C-states. However, we should first double check with the hardware that it actually supports MWAIT before potentially exposing a BIOS bug of an MWAIT _CST on HW that doesn't support MWAIT. Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- arch/x86/kernel/acpi/processor.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index de2d2e4ebad..7c074eec39f 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ACPI)) buf[2] |= ACPI_PDC_T_FFH; + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; obj->buffer.pointer = (u8 *) buf; -- cgit v1.2.3 From c1e3b377ad48febba6f91b8ae42c44ee4d4ab45e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 17:58:53 +0800 Subject: ACPI: Create "idle=halt" bootparam "idle=halt" limits the idle loop to using the halt instruction. No MWAIT, no IO accesses, no C-states deeper than C1. If something is broken in the idle code, "idle=halt" is a less severe workaround than "idle=poll" which disables all power savings. Signed-off-by: Zhao Yakui Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- arch/x86/kernel/process.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7dceea94723..7fc72949876 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,10 @@ #include #include #include +#include + +unsigned long idle_halt; +EXPORT_SYMBOL(idle_halt); struct kmem_cache *task_xstate_cachep; @@ -325,7 +329,18 @@ static int __init idle_setup(char *str) pm_idle = poll_idle; } else if (!strcmp(str, "mwait")) force_mwait = 1; - else + else if (!strcmp(str, "halt")) { + /* + * When the boot option of idle=halt is added, halt is + * forced to be used for CPU idle. In such case CPU C2/C3 + * won't be used again. + * To continue to load the CPU idle driver, don't touch + * the boot_option_idle_override. + */ + pm_idle = default_idle; + idle_halt = 1; + return 0; + } else return -1; boot_option_idle_override = 1; -- cgit v1.2.3 From da5e09a1b3e5a9fc0b15a3feb64e921ccc55ba74 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 18:01:09 +0800 Subject: ACPI : Create "idle=nomwait" bootparam "idle=nomwait" disables the use of the MWAIT instruction from both C1 (C1_FFH) and deeper (C2C3_FFH) C-states. When MWAIT is unavailable, the BIOS and OS generally negotiate to use the HALT instruction for C1, and use IO accesses for deeper C-states. This option is useful for power and performance comparisons, and also to work around BIOS bugs where broken MWAIT support is advertised. http://bugzilla.kernel.org/show_bug.cgi?id=10807 http://bugzilla.kernel.org/show_bug.cgi?id=10914 Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- arch/x86/kernel/process.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc72949876..4d629c62f4f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -11,6 +11,8 @@ unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; @@ -340,6 +342,15 @@ static int __init idle_setup(char *str) pm_idle = default_idle; idle_halt = 1; return 0; + } else if (!strcmp(str, "nomwait")) { + /* + * If the boot option of "idle=nomwait" is added, + * it means that mwait will be disabled for CPU C2/C3 + * states. In such case it won't touch the variable + * of boot_option_idle_override. + */ + idle_nomwait = 1; + return 0; } else return -1; -- cgit v1.2.3 From 8e9509c827a28e2f365c203c04224f9e9dd1b63a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Jul 2008 13:26:50 +0200 Subject: ftrace: fix merge buglet -tip testing found a bootup hang here: initcall anon_inode_init+0x0/0x130 returned 0 after 0 msecs calling acpi_event_init+0x0/0x57 the bootup should have continued with: initcall acpi_event_init+0x0/0x57 returned 0 after 45 msecs but it hung hard there instead. bisection led to this commit: | commit 5806b81ac1c0c52665b91723fd4146a4f86e386b | Merge: d14c8a6... 6712e29... | Author: Ingo Molnar | Date: Mon Jul 14 16:11:52 2008 +0200 | Merge branch 'auto-ftrace-next' into tracing/for-linus turns out that i made this mistake in the merge: ifdef CONFIG_FTRACE # Do not profile debug utilities CFLAGS_REMOVE_tsc_64.o = -pg CFLAGS_REMOVE_tsc_32.o = -pg those two files got unified meanwhile - so the dont-profile annotation got lost. The proper rule is: CFLAGS_REMOVE_tsc.o = -pg i guess this could have been caught sooner if the CFLAGS_REMOVE* kbuild rule aborted the build if it met a target that does not exist anymore? Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5112c84f542..da140611bb5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,8 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FTRACE # Do not profile debug utilities -CFLAGS_REMOVE_tsc_64.o = -pg -CFLAGS_REMOVE_tsc_32.o = -pg +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg endif -- cgit v1.2.3 From 9354094a95aed456a46b353b1051a7e2fab29045 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Jul 2008 23:29:01 -0700 Subject: x86: fix numaq_tsc_disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/numaq_32.c: In function ‘numaq_tsc_disable’: arch/x86/kernel/numaq_32.c:99: warning: ‘return’ with a value, in function returning void Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 5b20a5e7ac2..a23e8233b9a 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -96,7 +96,7 @@ int __init get_memcfg_numaq(void) void __init numaq_tsc_disable(void) { if (!found_numaq) - return -1; + return; if (num_online_nodes() > 1) { printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); -- cgit v1.2.3