aboutsummaryrefslogtreecommitdiff
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig69
-rw-r--r--arch/i386/boot/Makefile4
-rw-r--r--arch/i386/boot/compressed/head.S7
-rw-r--r--arch/i386/boot/compressed/misc.c8
-rw-r--r--arch/i386/boot/edd.S2
-rw-r--r--arch/i386/boot/setup.S6
-rw-r--r--arch/i386/boot/tools/build.c8
-rw-r--r--arch/i386/crypto/aes.c2
-rw-r--r--arch/i386/defconfig1
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c277
-rw-r--r--arch/i386/kernel/acpi/cstate.c103
-rw-r--r--arch/i386/kernel/acpi/sleep.c27
-rw-r--r--arch/i386/kernel/acpi/wakeup.S5
-rw-r--r--arch/i386/kernel/apic.c82
-rw-r--r--arch/i386/kernel/apm.c13
-rw-r--r--arch/i386/kernel/cpu/common.c50
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c2
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/i386/kernel/cpu/intel.c14
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c76
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/i386/kernel/crash.c223
-rw-r--r--arch/i386/kernel/dmi_scan.c391
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/head.S6
-rw-r--r--arch/i386/kernel/i8259.c12
-rw-r--r--arch/i386/kernel/io_apic.c45
-rw-r--r--arch/i386/kernel/irq.c72
-rw-r--r--arch/i386/kernel/kprobes.c133
-rw-r--r--arch/i386/kernel/machine_kexec.c226
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/process.c68
-rw-r--r--arch/i386/kernel/reboot.c82
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/setup.c69
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/smp.c34
-rw-r--r--arch/i386/kernel/smpboot.c359
-rw-r--r--arch/i386/kernel/syscall_table.S7
-rw-r--r--arch/i386/kernel/sysenter.c12
-rw-r--r--arch/i386/kernel/time.c7
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/i386/kernel/timers/common.c2
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c4
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c4
-rw-r--r--arch/i386/kernel/timers/timer_pit.c4
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c5
-rw-r--r--arch/i386/kernel/traps.c51
-rw-r--r--arch/i386/kernel/vmlinux.lds.S62
-rw-r--r--arch/i386/mach-default/setup.c27
-rw-r--r--arch/i386/mach-default/topology.c15
-rw-r--r--arch/i386/mach-visws/mpparse.c5
-rw-r--r--arch/i386/mach-voyager/voyager_basic.c2
-rw-r--r--arch/i386/mm/discontig.c2
-rw-r--r--arch/i386/mm/fault.c7
-rw-r--r--arch/i386/mm/highmem.c18
-rw-r--r--arch/i386/mm/init.c2
-rw-r--r--arch/i386/mm/ioremap.c8
-rw-r--r--arch/i386/mm/pgtable.c30
-rw-r--r--arch/i386/pci/common.c9
-rw-r--r--arch/i386/pci/i386.c11
-rw-r--r--arch/i386/pci/irq.c67
-rw-r--r--arch/i386/pci/legacy.c2
-rw-r--r--arch/i386/pci/mmconfig.c39
-rw-r--r--arch/i386/pci/numa.c2
-rw-r--r--arch/i386/pci/pci.h1
-rw-r--r--arch/i386/pci/visws.c2
-rw-r--r--arch/i386/power/cpu.c24
79 files changed, 2247 insertions, 847 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index d4ae5f9ceae..a801d9d4860 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -510,28 +510,7 @@ config SCHED_SMT
cost of slightly increased overhead in some places. If unsure say
N here.
-config PREEMPT
- bool "Preemptible Kernel"
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
-
-config PREEMPT_BKL
- bool "Preempt The Big Kernel Lock"
- depends on PREEMPT
- default y
- help
- This option reduces the latency of the kernel by making the
- big kernel lock preemptible.
-
- Say Y here if you are building a kernel for a desktop system.
- Say N if you are unsure.
+source "kernel/Kconfig.preempt"
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
@@ -963,6 +942,41 @@ config SECCOMP
source kernel/Kconfig.hz
+config PHYSICAL_START
+ hex "Physical address where the kernel is loaded" if EMBEDDED
+ default "0x100000"
+ help
+ This gives the physical address where the kernel is loaded.
+ Primarily used in the case of kexec on panic where the
+ fail safe kernel needs to run at a different address than
+ the panic-ed kernel.
+
+ Don't change this unless you know what you are doing.
+
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+ bool "kernel crash dumps (EXPERIMENTAL)"
+ depends on EMBEDDED
+ depends on EXPERIMENTAL
+ depends on HIGHMEM
+ help
+ Generate crash dump after being started by kexec.
endmenu
@@ -1250,6 +1264,15 @@ config SCx200
This support is also available as a module. If compiled as a
module, it will be called scx200.
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+ depends on SMP && HOTPLUG && EXPERIMENTAL
+ ---help---
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu.
+
+ Say N.
+
source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
@@ -1262,6 +1285,8 @@ source "fs/Kconfig.binfmt"
endmenu
+source "net/Kconfig"
+
source "drivers/Kconfig"
source "fs/Kconfig"
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 43cd6220ee4..1e71382d413 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -25,8 +25,8 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
#RAMDISK := -DRAMDISK=512
-targets := vmlinux.bin bootsect bootsect.o setup setup.o \
- zImage bzImage
+targets := vmlinux.bin bootsect bootsect.o \
+ setup setup.o zImage bzImage
subdir- := compressed
hostprogs-y := tools/build
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index c5e80b69e7d..b5893e4ecd3 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -25,6 +25,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/page.h>
.globl startup_32
@@ -74,7 +75,7 @@ startup_32:
popl %esi # discard address
popl %esi # real mode pointer
xorl %ebx,%ebx
- ljmp $(__BOOT_CS), $0x100000
+ ljmp $(__BOOT_CS), $__PHYSICAL_START
/*
* We come here, if we were loaded high.
@@ -99,7 +100,7 @@ startup_32:
popl %ecx # lcount
popl %edx # high_buffer_start
popl %eax # hcount
- movl $0x100000,%edi
+ movl $__PHYSICAL_START,%edi
cli # make sure we don't get interrupted
ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
@@ -124,5 +125,5 @@ move_routine_start:
movsl
movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
- ljmp $(__BOOT_CS), $0x100000
+ ljmp $(__BOOT_CS), $__PHYSICAL_START
move_routine_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index cedc55cc47d..82a807f9f5e 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -13,6 +13,7 @@
#include <linux/vmalloc.h>
#include <linux/tty.h>
#include <asm/io.h>
+#include <asm/page.h>
/*
* gzip declarations
@@ -308,7 +309,7 @@ static void setup_normal_output_buffer(void)
#else
if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)0x100000; /* Points to 1M */
+ output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
@@ -333,8 +334,8 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
high_loaded = 1;
free_mem_end_ptr = (long)high_buffer_start;
- if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(0x100000 + low_buffer_size);
+ if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
+ high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
mv->hcount = 0; /* say: we need not to move high_buffer */
}
else mv->hcount = -1;
@@ -353,7 +354,6 @@ static void close_output_buffer_if_we_run_high(struct moveparams *mv)
}
}
-
asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
{
real_mode = rmode;
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
index 027d6b354ff..d8d69f2b911 100644
--- a/arch/i386/boot/edd.S
+++ b/arch/i386/boot/edd.S
@@ -6,7 +6,7 @@
* projects 1572D, 1484D, 1386D, 1226DT
* disk signature read by Matt Domsch <Matt_Domsch@dell.com>
* and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004
- * legacy CHS retreival by Patrick J. LoPresti <patl@users.sourceforge.net>
+ * legacy CHS retrieval by Patrick J. LoPresti <patl@users.sourceforge.net>
* March 2004
* Command line option parsing, Matt Domsch, November 2004
*/
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index caa1fde6904..8cb420f40c5 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -33,7 +33,7 @@
* Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
* <stiker@northlink.com>
*
- * Fix to work around buggy BIOSes which dont use carry bit correctly
+ * Fix to work around buggy BIOSes which don't use carry bit correctly
* and/or report extended memory in CX/DX for e801h memory size detection
* call. As a result the kernel got wrong figures. The int15/e801h docs
* from Ralf Brown interrupt list seem to indicate AX/BX should be used
@@ -357,7 +357,7 @@ bail820:
meme801:
stc # fix to work around buggy
- xorw %cx,%cx # BIOSes which dont clear/set
+ xorw %cx,%cx # BIOSes which don't clear/set
xorw %dx,%dx # carry on pass/error of
# e801h memory size call
# or merely pass cx,dx though
@@ -847,7 +847,7 @@ flush_instr:
#
# but we yet haven't reloaded the CS register, so the default size
# of the target offset still is 16 bit.
-# However, using an operand prefix (0x66), the CPU will properly
+# However, using an operand prefix (0x66), the CPU will properly
# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
# Manual, Mixing 16-bit and 32-bit code, page 16-6)
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
index 26509b826ae..6835f6d47c3 100644
--- a/arch/i386/boot/tools/build.c
+++ b/arch/i386/boot/tools/build.c
@@ -1,6 +1,4 @@
/*
- * $Id: build.c,v 1.5 1997/05/19 12:29:58 mj Exp $
- *
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1997 Martin Mares
*/
@@ -8,7 +6,8 @@
/*
* This file builds a disk-image from three different files:
*
- * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest
+ * - bootsect: compatibility mbr which prints an error message if
+ * someone tries to boot the kernel directly.
* - setup: 8086 machine code, sets up system parm
* - system: 80386 code for actual system
*
@@ -71,7 +70,8 @@ void usage(void)
int main(int argc, char ** argv)
{
- unsigned int i, c, sz, setup_sectors;
+ unsigned int i, sz, setup_sectors;
+ int c;
u32 sys_size;
byte major_root, minor_root;
struct stat sb;
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index 1019430fc1f..88ee85c3b43 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -59,7 +59,7 @@ struct aes_ctx {
};
#define WPOLY 0x011b
-#define u32_in(x) le32_to_cpu(*(const u32 *)(x))
+#define u32_in(x) le32_to_cpup((const __le32 *)(x))
#define bytes2word(b0, b1, b2, b3) \
(((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 28e62038379..ca07b95c06b 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -126,7 +126,6 @@ CONFIG_HAVE_DEC_LOCK=y
#
CONFIG_PM=y
CONFIG_SOFTWARE_SUSPEND=y
-# CONFIG_PM_DISK is not set
#
# ACPI (Advanced Configuration and Power Interface) Support
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603..4cc83b322b3 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index ee75cb286cf..5e291a20c03 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -2,3 +2,7 @@ obj-$(CONFIG_ACPI_BOOT) := boot.o
obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y += cstate.o
+endif
+
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 848bb97af7c..b7808a89d94 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -29,6 +29,7 @@
#include <linux/efi.h>
#include <linux/irq.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -158,9 +159,15 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
#endif
#ifdef CONFIG_PCI_MMCONFIG
-static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_mcfg *mcfg;
+ unsigned long i;
+ int config_size;
if (!phys_addr || !size)
return -EINVAL;
@@ -171,18 +178,38 @@ static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
return -ENODEV;
}
- if (mcfg->base_reserved) {
- printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+ /* how many config structures do we have */
+ pci_mmcfg_config_num = 0;
+ i = size - sizeof(struct acpi_table_mcfg);
+ while (i >= sizeof(struct acpi_table_mcfg_config)) {
+ ++pci_mmcfg_config_num;
+ i -= sizeof(struct acpi_table_mcfg_config);
+ };
+ if (pci_mmcfg_config_num == 0) {
+ printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
return -ENODEV;
}
- pci_mmcfg_base_addr = mcfg->base_address;
+ config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+ pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+ if (!pci_mmcfg_config) {
+ printk(KERN_WARNING PREFIX
+ "No memory for MCFG config tables\n");
+ return -ENOMEM;
+ }
+
+ memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
+ if (mcfg->config[i].base_reserved) {
+ printk(KERN_ERR PREFIX
+ "MMCONFIG not in low 4GB of memory\n");
+ return -ENODEV;
+ }
+ }
return 0;
}
-#else
-#define acpi_parse_mcfg NULL
-#endif /* !CONFIG_PCI_MMCONFIG */
+#endif /* CONFIG_PCI_MMCONFIG */
#ifdef CONFIG_X86_LOCAL_APIC
static int __init
@@ -506,6 +533,22 @@ acpi_unmap_lsapic(int cpu)
EXPORT_SYMBOL(acpi_unmap_lsapic);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+int
+acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int
+acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
static unsigned long __init
acpi_scan_rsdp (
unsigned long start,
@@ -815,6 +858,219 @@ acpi_process_madt(void)
return;
}
+extern int acpi_force;
+
+#ifdef __i386__
+
+#ifdef CONFIG_ACPI_PCI
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+ d->ident);
+ acpi_noirq_set();
+ }
+ return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+ d->ident);
+ acpi_disable_pci();
+ }
+ return 0;
+}
+#endif
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
+ disable_acpi();
+ } else {
+ printk(KERN_NOTICE
+ "Warning: DMI blacklist says broken, but acpi forced\n");
+ }
+ return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
+ disable_acpi();
+ acpi_ht = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
+ }
+ return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@sourceforge.net
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+ /*
+ * Boxes that need ACPI disabled
+ */
+ {
+ .callback = dmi_disable_acpi,
+ .ident = "IBM Thinkpad",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+ },
+ },
+
+ /*
+ * Boxes that need acpi=ht
+ */
+ {
+ .callback = force_acpi_ht,
+ .ident = "FSC Primergy T850",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "DELL GX240",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "HP VISUALIZE NT Workstation",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "Compaq Workstation W8000",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P4B266",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P2B-DS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS CUR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ABIT i440BX-W83977",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+ DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM Bladecenter",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eServer xSeries 360",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 330",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 440",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+ },
+ },
+
+#ifdef CONFIG_ACPI_PCI
+ /*
+ * Boxes that need ACPI PCI IRQ routing disabled
+ */
+ {
+ .callback = disable_acpi_irq,
+ .ident = "ASUS A7V",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+ DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+ /* newer BIOS, Revision 1011, does work */
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
+ },
+ },
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+ */
+ { /* _BBN 0 bug */
+ .callback = disable_acpi_pci,
+ .ident = "ASUS PR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
+ DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+ },
+ },
+ {
+ .callback = disable_acpi_pci,
+ .ident = "Acer TravelMate 36x Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
+#endif
+ { }
+};
+
+#endif /* __i386__ */
+
/*
* acpi_boot_table_init() and acpi_boot_init()
* called from setup_arch(), always.
@@ -843,6 +1099,10 @@ acpi_boot_table_init(void)
{
int error;
+#ifdef __i386__
+ dmi_check_system(acpi_dmi_table);
+#endif
+
/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -870,8 +1130,6 @@ acpi_boot_table_init(void)
*/
error = acpi_blacklisted();
if (error) {
- extern int acpi_force;
-
if (acpi_force) {
printk(KERN_WARNING PREFIX "acpi=force override\n");
} else {
@@ -907,7 +1165,6 @@ int __init acpi_boot_init(void)
acpi_process_madt();
acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
- acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
return 0;
}
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
new file mode 100644
index 00000000000..4c3036ba65d
--- /dev/null
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -0,0 +1,103 @@
+/*
+ * arch/i386/kernel/acpi/cstate.c
+ *
+ * Copyright (C) 2005 Intel Corporation
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * - Added _PDC for SMP C-states on Intel CPUs
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include <acpi/processor.h>
+#include <asm/acpi.h>
+
+static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
+ *pow)
+{
+ struct acpi_object_list *obj_list;
+ union acpi_object *obj;
+ u32 *buf;
+
+ /* allocate and initialize pdc. It will be used later. */
+ obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
+ if (!obj_list) {
+ printk(KERN_ERR "Memory allocation error\n");
+ return;
+ }
+
+ obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+ if (!obj) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj_list);
+ return;
+ }
+
+ buf = kmalloc(12, GFP_KERNEL);
+ if (!buf) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj);
+ kfree(obj_list);
+ return;
+ }
+
+ buf[0] = ACPI_PDC_REVISION_ID;
+ buf[1] = 1;
+ buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = 12;
+ obj->buffer.pointer = (u8 *) buf;
+ obj_list->count = 1;
+ obj_list->pointer = obj;
+ pow->pdc = obj_list;
+
+ return;
+}
+
+/* Initialize _PDC data based on the CPU vendor */
+void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ pow->pdc = NULL;
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ acpi_processor_power_init_intel_pdc(pow);
+
+ return;
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_pdc);
+
+/*
+ * Initialize bm_flags based on the CPU cache properties
+ * On SMP it depends on cache configuration
+ * - When cache is not shared among all CPUs, we flush cache
+ * before entering C3.
+ * - When cache is shared among all CPUs, we use bm_check
+ * mechanism as in UP case
+ *
+ * This routine is called only after all the CPUs are online
+ */
+void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ flags->bm_check = 0;
+ if (num_online_cpus() == 1)
+ flags->bm_check = 1;
+ else if (c->x86_vendor == X86_VENDOR_INTEL) {
+ /*
+ * Today all CPUs that support C3 share cache.
+ * TBD: This needs to look at cache shared map, once
+ * multi-core detection patch makes to the base.
+ */
+ flags->bm_check = 1;
+ }
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 28bb0514bb6..c1af93032ff 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/dmi.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
@@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str)
__setup("acpi_sleep=", acpi_sleep_setup);
+
+
+static __init int reset_videomode_after_s3(struct dmi_system_id *d)
+{
+ acpi_video_flags |= 2;
+ return 0;
+}
+
+static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
+ { /* Reset video mode after returning from ACPI S3 sleep */
+ .callback = reset_videomode_after_s3,
+ .ident = "Toshiba Satellite 4030cdt",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+ },
+ },
+ { }
+};
+
+static int __init acpisleep_dmi_init(void)
+{
+ dmi_check_system(acpisleep_dmi_table);
+ return 0;
+}
+
+core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 39d32484f6f..44d886c745e 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -74,8 +74,9 @@ wakeup_code:
movw %ax,%fs
movw $0x0e00 + 'i', %fs:(0x12)
- # need a gdt
- lgdt real_save_gdt - wakeup_code
+ # need a gdt -- use lgdtl to force 32-bit operands, in case
+ # the GDT is located past 16 megabytes.
+ lgdtl real_save_gdt - wakeup_code
movl real_save_cr0 - wakeup_code, %eax
movl %eax, %cr0
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8d993fa7175..bd1dbf3bd22 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/cpu.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -34,12 +35,18 @@
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
+#include <asm/i8253.h>
#include <mach_apic.h>
#include "io_ports.h"
/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
* Debug level
*/
int apic_verbosity;
@@ -205,7 +212,7 @@ void __init connect_bsp_APIC(void)
enable_apic_mode();
}
-void disconnect_bsp_APIC(void)
+void disconnect_bsp_APIC(int virt_wire_setup)
{
if (pic_mode) {
/*
@@ -219,6 +226,42 @@ void disconnect_bsp_APIC(void)
outb(0x70, 0x22);
outb(0x00, 0x23);
}
+ else {
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write_around(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /* For LVT0 make it edge triggered, active high, external and enabled */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write_around(APIC_LVT0, value);
+ }
+ else {
+ /* Disable LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ }
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(
+ APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write_around(APIC_LVT1, value);
+ }
}
void disable_local_APIC(void)
@@ -363,7 +406,7 @@ void __init init_bsp_APIC(void)
apic_write_around(APIC_LVT1, value);
}
-void __init setup_local_APIC (void)
+void __devinit setup_local_APIC(void)
{
unsigned long oldvalue, value, ver, maxlvt;
@@ -634,7 +677,7 @@ static struct sys_device device_lapic = {
.cls = &lapic_sysclass,
};
-static void __init apic_pm_activate(void)
+static void __devinit apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -665,26 +708,6 @@ static void apic_pm_activate(void) { }
* Original code written by Keir Fraser.
*/
-/*
- * Knob to control our willingness to enable the local APIC.
- */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
-
-static int __init lapic_disable(char *str)
-{
- enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- return 0;
-}
-__setup("nolapic", lapic_disable);
-
-static int __init lapic_enable(char *str)
-{
- enable_local_apic = 1;
- return 0;
-}
-__setup("lapic", lapic_enable);
-
static int __init apic_set_verbosity(char *str)
{
if (strcmp("debug", str) == 0)
@@ -855,9 +878,8 @@ fake_ioapic_page:
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
*/
-static unsigned int __init get_8254_timer_count(void)
+static unsigned int __devinit get_8254_timer_count(void)
{
- extern spinlock_t i8253_lock;
unsigned long flags;
unsigned int count;
@@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void)
}
/* next tick in 8254 can be caught by catching timer wraparound */
-static void __init wait_8254_wraparound(void)
+static void __devinit wait_8254_wraparound(void)
{
unsigned int curr_count, prev_count;
@@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void)
* Default initialization for 8254 timers. If we use other timers like HPET,
* we override this later
*/
-void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
+void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
/*
* This function sets up the local APIC timer, with a timeout of
@@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void __init setup_APIC_timer(unsigned int clocks)
+static void __devinit setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
@@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void)
local_irq_enable();
}
-void __init setup_secondary_APIC_clock(void)
+void __devinit setup_secondary_APIC_clock(void)
{
setup_APIC_timer(calibration_result);
}
-void __init disable_APIC_timer(void)
+void __devinit disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 0ff65abcd56..064211d5f41 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -228,10 +228,10 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/i8253.h>
#include "io_ports.h"
-extern spinlock_t i8253_lock;
extern unsigned long get_cmos_time(void);
extern void machine_real_restart(unsigned char *, int);
@@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int);
struct apm_user {
int magic;
struct apm_user * next;
- int suser: 1;
- int writer: 1;
- int reader: 1;
- int suspend_wait: 1;
+ unsigned int suser: 1;
+ unsigned int writer: 1;
+ unsigned int reader: 1;
+ unsigned int suspend_wait: 1;
int suspend_result;
int suspends_pending;
int standbys_pending;
@@ -1168,8 +1168,7 @@ static void get_time_diff(void)
static void reinit_timer(void)
{
#ifdef INIT_TIMER_AFTER_SUSPEND
- unsigned long flags;
- extern spinlock_t i8253_lock;
+ unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
/* set the clock to 100 Hz */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index b9954248d0a..4553ffd94b1 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
-static int cachesize_override __initdata = -1;
-static int disable_x86_fxsr __initdata = 0;
-static int disable_x86_serial_nr __initdata = 1;
+static int cachesize_override __devinitdata = -1;
+static int disable_x86_fxsr __devinitdata = 0;
+static int disable_x86_serial_nr __devinitdata = 1;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
@@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str)
}
__setup("cachesize=", cachesize_setup);
-int __init get_model_name(struct cpuinfo_x86 *c)
+int __devinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
@@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c)
}
-void __init display_cacheinfo(struct cpuinfo_x86 *c)
+void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ecx, edx, l2size;
@@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c)
/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
/* Look up CPU names by table lookup. */
-static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
@@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c)
}
-void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
{
char *v = c->x86_vendor_id;
int i;
@@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag)
/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
+static int __devinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -249,7 +249,7 @@ static void __init early_cpu_detect(void)
#endif
}
-void __init generic_identify(struct cpuinfo_x86 * c)
+void __devinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
int junk;
@@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c)
}
}
-static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
/* Disable processor serial number */
@@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __init identify_cpu(struct cpuinfo_x86 *c)
+void __devinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -432,10 +432,18 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
+ if (c == &boot_cpu_data)
+ sysenter_setup();
+ enable_sep_cpu();
+
+ if (c == &boot_cpu_data)
+ mtrr_bp_init();
+ else
+ mtrr_ap_init();
}
#ifdef CONFIG_X86_HT
-void __init detect_ht(struct cpuinfo_x86 *c)
+void __devinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
int index_msb, tmp;
@@ -490,7 +498,7 @@ void __init detect_ht(struct cpuinfo_x86 *c)
}
#endif
-void __init print_cpu_info(struct cpuinfo_x86 *c)
+void __devinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -513,7 +521,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c)
printk("\n");
}
-cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
/* This is hacky. :)
* We're emulating future behavior.
@@ -560,7 +568,7 @@ void __init early_cpu_init(void)
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-void __init cpu_init (void)
+void __devinit cpu_init(void)
{
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -648,3 +656,15 @@ void __init cpu_init (void)
clear_used_math();
mxcsr_feature_mask_init();
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __devinit cpu_uninit(void)
+{
+ int cpu = raw_smp_processor_id();
+ cpu_clear(cpu, cpu_initialized);
+
+ /* lazy TLB state */
+ per_cpu(cpu_tlbstate, cpu).state = 0;
+ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 1a49adb1f4a..e86ea486c31 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -190,7 +190,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
/* detect which companion chip is used */
while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
- if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) {
+ if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) {
return gx_pci;
}
}
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 5c530064eb7..73a5dc5b26b 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
}
#endif
- if (powernow_table)
- kfree(powernow_table);
-
+ kfree(powernow_table);
return 0;
}
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 7dcbf70fc16..327a55d4d1c 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -375,7 +375,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
arg0.buffer.pointer = (u8 *) arg0_buf;
arg0_buf[0] = ACPI_PDC_REVISION_ID;
arg0_buf[1] = 1;
- arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
+ arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
p.pdc = &arg_list;
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 121aa2176e6..a2c33c1a46c 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -25,10 +25,10 @@ extern int trap_init_f00f_bug(void);
/*
* Alignment at which movsl is preferred for bulk memory copies.
*/
-struct movsl_mask movsl_mask;
+struct movsl_mask movsl_mask __read_mostly;
#endif
-void __init early_intel_workaround(struct cpuinfo_x86 *c)
+void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
{
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
@@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c)
* This is called before we do cpu ident work
*/
-int __init ppro_with_ram_bug(void)
+int __devinit ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void)
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
-static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
/*
* find out the number of processor cores on the die
*/
-static int __init num_cpu_cores(struct cpuinfo_x86 *c)
+static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax;
@@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-static void __init init_intel(struct cpuinfo_x86 *c)
+static void __devinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
char *p = NULL;
@@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
return size;
}
-static struct cpu_dev intel_cpu_dev __initdata = {
+static struct cpu_dev intel_cpu_dev __devinitdata = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
.c_models = {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index a710dc4eb20..1d768b26326 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -28,7 +28,7 @@ struct _cache_table
};
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
-static struct _cache_table cache_table[] __initdata =
+static struct _cache_table cache_table[] __devinitdata =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void)
return retval;
}
-unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
+unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 8df52e86c4d..c4abe765739 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
/* AMD K7 machine check is Intel like */
-void __init amd_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index bf6d1aefafc..2cf25d2ba0f 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
#include "mce.h"
-int mce_disabled __initdata = 0;
+int mce_disabled __devinitdata = 0;
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
-void __init mcheck_init(struct cpuinfo_x86 *c)
+void __devinit mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled==1)
return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8b16ceb929b..0abccb6fdf9 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal regulation detect and init */
-static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned int cpu = smp_processor_id();
@@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
-void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index c45a1b485c8..ec0614cd292 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 46640f8c249..f01b73f947e 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 753fa7acb98..7bae68fa168 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting on the Winchip C6 series */
-void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f468a979e9a..169ac8e0db6 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -67,14 +67,6 @@ void __init get_mtrr_state(void)
mtrr_state.enabled = (lo & 0xc00) >> 10;
}
-/* Free resources associated with a struct mtrr_state */
-void __init finalize_mtrr_state(void)
-{
- if (mtrr_state.var_ranges)
- kfree(mtrr_state.var_ranges);
- mtrr_state.var_ranges = NULL;
-}
-
/* Some BIOS's are fucked and don't set all MTRRs the same! */
void __init mtrr_state_warn(void)
{
@@ -335,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
*/
{
unsigned long flags;
+ struct mtrr_var_range *vr;
+
+ vr = &mtrr_state.var_ranges[reg];
local_irq_save(flags);
prepare_set();
@@ -343,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
/* The invalid bit is kept in the mask, so we simply clear the
relevant mask register to disable a range. */
mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+ memset(vr, 0, sizeof(struct mtrr_var_range));
} else {
- mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
- (base & size_and_mask) >> (32 - PAGE_SHIFT));
- mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
- (-size & size_and_mask) >> (32 - PAGE_SHIFT));
+ vr->base_lo = base << PAGE_SHIFT | type;
+ vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+ vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+ vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+ mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
+ mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
}
post_set();
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index d66b09e0c82..764cac64e21 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = -EINVAL;
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
/* Search for existing MTRR */
down(&main_lock);
for (i = 0; i < num_var_ranges; ++i) {
@@ -372,6 +374,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
out:
up(&main_lock);
+ unlock_cpu_hotplug();
return error;
}
@@ -461,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
return -ENXIO;
max = num_var_ranges;
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
down(&main_lock);
if (reg < 0) {
/* Search for existing MTRR */
@@ -501,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
error = reg;
out:
up(&main_lock);
+ unlock_cpu_hotplug();
return error;
}
/**
@@ -544,21 +550,9 @@ static void __init init_ifs(void)
centaur_init_mtrr();
}
-static void __init init_other_cpus(void)
-{
- if (use_intel())
- get_mtrr_state();
-
- /* bring up the other processors */
- set_mtrr(~0U,0,0,0);
-
- if (use_intel()) {
- finalize_mtrr_state();
- mtrr_state_warn();
- }
-}
-
-
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
struct mtrr_value {
mtrr_type ltype;
unsigned long lbase;
@@ -611,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = {
/**
- * mtrr_init - initialize mtrrs on the boot CPU
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
*
* This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
*
*/
-static int __init mtrr_init(void)
+void __init mtrr_bp_init(void)
{
init_ifs();
@@ -674,12 +668,48 @@ static int __init mtrr_init(void)
if (mtrr_if) {
set_num_var_ranges();
init_table();
- init_other_cpus();
-
- return sysdev_driver_register(&cpu_sysdev_class,
- &mtrr_sysdev_driver);
+ if (use_intel())
+ get_mtrr_state();
}
- return -ENXIO;
}
-subsys_initcall(mtrr_init);
+void mtrr_ap_init(void)
+{
+ unsigned long flags;
+
+ if (!mtrr_if || !use_intel())
+ return;
+ /*
+ * Ideally we should hold main_lock here to avoid mtrr entries changed,
+ * but this routine will be called in cpu boot time, holding the lock
+ * breaks it. This routine is called in two cases: 1.very earily time
+ * of software resume, when there absolutely isn't mtrr entry changes;
+ * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+ * prevent mtrr entry changes
+ */
+ local_irq_save(flags);
+
+ mtrr_if->set_all();
+
+ local_irq_restore(flags);
+}
+
+static int __init mtrr_init_finialize(void)
+{
+ if (!mtrr_if)
+ return 0;
+ if (use_intel())
+ mtrr_state_warn();
+ else {
+ /* The CPUs haven't MTRR and seemes not support SMP. They have
+ * specific drivers, we use a tricky method to support
+ * suspend/resume for them.
+ * TBD: is there any system with such CPU which supports
+ * suspend/resume? if no, we should remove the code.
+ */
+ sysdev_driver_register(&cpu_sysdev_class,
+ &mtrr_sysdev_driver);
+ }
+ return 0;
+}
+subsys_initcall(mtrr_init_finialize);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index de135124559..99c9f268204 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if;
extern unsigned int num_var_ranges;
-void finalize_mtrr_state(void);
void mtrr_state_warn(void);
char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 00000000000..e5fab12f792
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,223 @@
+/*
+ * Architecture specific (i386) functions for kexec based crash dumps.
+ *
+ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *
+ * Copyright (C) IBM Corporation, 2004. All rights reserved.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include <asm/processor.h>
+#include <asm/hardirq.h>
+#include <asm/nmi.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <mach_ipi.h>
+
+
+note_buf_t crash_notes[NR_CPUS];
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, &note, sizeof(note));
+ buf += (sizeof(note) +3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, &note, sizeof(note));
+}
+
+static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
+{
+ struct elf_prstatus prstatus;
+ u32 *buf;
+
+ if ((cpu < 0) || (cpu >= NR_CPUS))
+ return;
+
+ /* Using ELF notes here is opportunistic.
+ * I need a well defined structure format
+ * for the data I pass, and I need tags
+ * on the data to indicate what information I have
+ * squirrelled away. ELF notes happen to provide
+ * all of that that no need to invent something new.
+ */
+ buf = &crash_notes[cpu][0];
+ memset(&prstatus, 0, sizeof(prstatus));
+ prstatus.pr_pid = current->pid;
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ final_note(buf);
+}
+
+static void crash_get_current_regs(struct pt_regs *regs)
+{
+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
+
+ regs->eip = (unsigned long)current_text_addr();
+}
+
+/* CPU does not save ss and esp on stack if execution is already
+ * running in kernel mode at the time of NMI occurrence. This code
+ * fixes it.
+ */
+static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ newregs->esp = (unsigned long)&(oldregs->esp);
+ __asm__ __volatile__("xorl %eax, %eax;");
+ __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
+}
+
+/* We may have saved_regs from where the error came from
+ * or it is NULL if via a direct panic().
+ */
+static void crash_save_self(struct pt_regs *saved_regs)
+{
+ struct pt_regs regs;
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (saved_regs)
+ crash_setup_regs(&regs, saved_regs);
+ else
+ crash_get_current_regs(&regs);
+ crash_save_this_cpu(&regs, cpu);
+}
+
+#ifdef CONFIG_SMP
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+{
+ struct pt_regs fixed_regs;
+
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if (cpu == crashing_cpu)
+ return 1;
+ local_irq_disable();
+
+ if (!user_mode(regs)) {
+ crash_setup_regs(&fixed_regs, regs);
+ regs = &fixed_regs;
+ }
+ crash_save_this_cpu(regs, cpu);
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ __asm__("hlt");
+ for(;;);
+
+ return 1;
+}
+
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want. AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+static void smp_send_nmi_allbutself(void)
+{
+ send_IPI_allbutself(APIC_DM_NMI);
+}
+
+static void nmi_shootdown_cpus(void)
+{
+ unsigned long msecs;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ set_nmi_callback(crash_nmi_callback);
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+ disable_local_APIC();
+}
+#else
+static void nmi_shootdown_cpus(void)
+{
+ /* There are no cpus to shootdown */
+}
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ /* The kernel is broken so disable interrupts */
+ local_irq_disable();
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
+ nmi_shootdown_cpus();
+ lapic_shutdown();
+#if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+#endif
+ crash_save_self(regs);
+}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index 6ed7e28f306..a3cdf894302 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -1,22 +1,15 @@
#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/acpi.h>
-#include <asm/io.h>
-#include <linux/pm.h>
-#include <asm/system.h>
#include <linux/dmi.h>
#include <linux/bootmem.h>
-struct dmi_header
-{
- u8 type;
- u8 length;
- u16 handle;
+struct dmi_header {
+ u8 type;
+ u8 length;
+ u16 handle;
};
#undef DMI_DEBUG
@@ -29,15 +22,13 @@ struct dmi_header
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
- u8 *bp=(u8 *)dm;
- bp+=dm->length;
- if(!s)
+ u8 *bp = ((u8 *) dm) + dm->length;
+
+ if (!s)
return "";
s--;
- while(s>0 && *bp)
- {
- bp+=strlen(bp);
- bp++;
+ while (s > 0 && *bp) {
+ bp += strlen(bp) + 1;
s--;
}
return bp;
@@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
* We have to be cautious here. We have seen BIOSes with DMI pointers
* pointing to completely the wrong place for example
*/
-
-static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
+static int __init dmi_table(u32 base, int len, int num,
+ void (*decode)(struct dmi_header *))
{
- u8 *buf;
- struct dmi_header *dm;
- u8 *data;
- int i=0;
+ u8 *buf, *data;
+ int i = 0;
buf = bt_ioremap(base, len);
- if(buf==NULL)
+ if (buf == NULL)
return -1;
data = buf;
@@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm
* Stop when we see all the items the table claimed to have
* OR we run off the end of the table (also happens)
*/
-
- while(i<num && data-buf+sizeof(struct dmi_header)<=len)
- {
- dm=(struct dmi_header *)data;
+ while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
+ struct dmi_header *dm = (struct dmi_header *)data;
/*
* We want to know the total length (formated area and strings)
* before decoding to make sure we won't run off the table in
* dmi_decode or dmi_string
*/
- data+=dm->length;
- while(data-buf<len-1 && (data[0] || data[1]))
+ data += dm->length;
+ while ((data - buf < len - 1) && (data[0] || data[1]))
data++;
- if(data-buf<len-1)
+ if (data - buf < len - 1)
decode(dm);
- data+=2;
+ data += 2;
i++;
}
bt_iounmap(buf, len);
return 0;
}
-
-inline static int __init dmi_checksum(u8 *buf)
+static int __init dmi_checksum(u8 *buf)
{
- u8 sum=0;
+ u8 sum = 0;
int a;
- for(a=0; a<15; a++)
- sum+=buf[a];
- return (sum==0);
+ for (a = 0; a < 15; a++)
+ sum += buf[a];
+
+ return sum == 0;
}
static int __init dmi_iterate(void (*decode)(struct dmi_header *))
@@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
p = ioremap(0xF0000, 0x10000);
if (p == NULL)
return -1;
+
for (q = p; q < p + 0x10000; q += 16) {
memcpy_fromio(buf, q, 15);
- if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
- {
- u16 num=buf[13]<<8|buf[12];
- u16 len=buf[7]<<8|buf[6];
- u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
+ if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+ u16 num = (buf[13] << 8) | buf[12];
+ u16 len = (buf[7] << 8) | buf[6];
+ u32 base = (buf[11] << 24) | (buf[10] << 16) |
+ (buf[9] << 8) | buf[8];
/*
* DMI version 0.0 means that the real version is taken from
* the SMBIOS version, which we don't know at this point.
*/
- if(buf[14]!=0)
+ if (buf[14] != 0)
printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14]>>4, buf[14]&0x0F);
+ buf[14] >> 4, buf[14] & 0xF);
else
printk(KERN_INFO "DMI present.\n");
+
dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
- base));
- if(dmi_table(base,len, num, decode)==0)
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+
+ if (dmi_table(base,len, num, decode) == 0)
return 0;
}
}
@@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX];
/*
* Save a DMI string
*/
-
static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
{
char *d = (char*)dm;
char *p = dmi_string(dm, d[string]);
- if(p==NULL || *p == 0)
+
+ if (p == NULL || *p == 0)
return;
if (dmi_ident[slot])
return;
- dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
+
+ dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
if(dmi_ident[slot])
strcpy(dmi_ident[slot], p);
else
@@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
}
/*
- * Ugly compatibility crap.
- */
-#define dmi_blacklist dmi_system_id
-#define NO_MATCH { DMI_NONE, NULL}
-#define MATCH DMI_MATCH
-
-/*
- * Toshiba keyboard likes to repeat keys when they are not repeated.
- */
-
-static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
-{
- printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
- return 0;
-}
-
-
-#ifdef CONFIG_ACPI_SLEEP
-static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
-{
- /* See acpi_wakeup.S */
- extern long acpi_video_flags;
- acpi_video_flags |= 2;
- return 0;
-}
-#endif
-
-
-#ifdef CONFIG_ACPI_BOOT
-extern int acpi_force;
-
-static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
- disable_acpi();
- } else {
- printk(KERN_NOTICE
- "Warning: DMI blacklist says broken, but acpi forced\n");
- }
- return 0;
-}
-
-/*
- * Limit ACPI to CPU enumeration for HT
- */
-static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
- disable_acpi();
- acpi_ht = 1;
- } else {
- printk(KERN_NOTICE
- "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
- }
- return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI_PCI
-static __init int disable_acpi_irq(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
- d->ident);
- acpi_noirq_set();
- }
- return 0;
-}
-static __init int disable_acpi_pci(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
- d->ident);
- acpi_disable_pci();
- }
- return 0;
-}
-#endif
-
-/*
- * Process the DMI blacklists
- */
-
-
-/*
- * This will be expanded over time to force things like the APM
- * interrupt mask settings according to the laptop
- */
-
-static __initdata struct dmi_blacklist dmi_blacklist[]={
-
- { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#ifdef CONFIG_ACPI_SLEEP
- { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#endif
-
-#ifdef CONFIG_ACPI_BOOT
- /*
- * If your system is blacklisted here, but you find that acpi=force
- * works for you, please contact acpi-devel@sourceforge.net
- */
-
- /*
- * Boxes that need ACPI disabled
- */
-
- { dmi_disable_acpi, "IBM Thinkpad", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "2629H1G"),
- NO_MATCH, NO_MATCH }},
-
- /*
- * Boxes that need acpi=ht
- */
-
- { force_acpi_ht, "FSC Primergy T850", {
- MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
- MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "DELL GX240", {
- MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
- MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "HP VISUALIZE NT Workstation", {
- MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "Compaq Workstation W8000", {
- MATCH(DMI_SYS_VENDOR, "Compaq"),
- MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P4B266", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P4B266"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P2B-DS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P2B-DS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS CUR-DLS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "CUR-DLS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ABIT i440BX-W83977", {
- MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
- MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM Bladecenter", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eServer xSeries 360", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 330", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 440", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
- NO_MATCH, NO_MATCH }},
-
-#endif // CONFIG_ACPI_BOOT
-
-#ifdef CONFIG_ACPI_PCI
- /*
- * Boxes that need ACPI PCI IRQ routing disabled
- */
-
- { disable_acpi_irq, "ASUS A7V", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
- MATCH(DMI_BOARD_NAME, "<A7V>"),
- /* newer BIOS, Revision 1011, does work */
- MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
- NO_MATCH }},
-
- /*
- * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
- */
- { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "PR-DLS"),
- MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
- MATCH(DMI_BIOS_DATE, "03/21/2003") }},
-
- { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
- MATCH(DMI_SYS_VENDOR, "Acer"),
- MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
- NO_MATCH, NO_MATCH
- } },
-
-#endif
-
- { NULL, }
-};
-
-/*
* Process a DMI table entry. Right now all we care about are the BIOS
* and machine entries. For 2.5 we should pull the smbus controller info
* out of here.
*/
-
static void __init dmi_decode(struct dmi_header *dm)
{
-#ifdef DMI_DEBUG
- u8 *data = (u8 *)dm;
-#endif
+ u8 *data __attribute__((__unused__)) = (u8 *)dm;
- switch(dm->type)
- {
- case 0:
- dmi_printk(("BIOS Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
- dmi_printk(("BIOS Version: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
- dmi_printk(("BIOS Release: %s\n",
- dmi_string(dm, data[8])));
- dmi_save_ident(dm, DMI_BIOS_DATE, 8);
- break;
- case 1:
- dmi_printk(("System Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
- dmi_printk(("Product Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
- dmi_printk(("Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
- dmi_printk(("Serial Number: %s\n",
- dmi_string(dm, data[7])));
- break;
- case 2:
- dmi_printk(("Board Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
- dmi_printk(("Board Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BOARD_NAME, 5);
- dmi_printk(("Board Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
- break;
+ switch(dm->type) {
+ case 0:
+ dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
+ dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
+ dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
+ dmi_save_ident(dm, DMI_BIOS_DATE, 8);
+ break;
+ case 1:
+ dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
+ dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
+ dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
+ dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
+ dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
+ break;
+ case 2:
+ dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
+ dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BOARD_NAME, 5);
+ dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
+ break;
}
}
void __init dmi_scan_machine(void)
{
- int err = dmi_iterate(dmi_decode);
- if(err == 0)
- dmi_check_system(dmi_blacklist);
- else
+ if (dmi_iterate(dmi_decode))
printk(KERN_INFO "DMI not present.\n");
}
@@ -470,7 +226,6 @@ fail: d++;
return count;
}
-
EXPORT_SYMBOL(dmi_check_system);
/**
@@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system);
* Returns one DMI data value, can be used to perform
* complex DMI data checks.
*/
-char * dmi_get_system_info(int field)
+char *dmi_get_system_info(int field)
{
return dmi_ident[field];
}
-
+EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index f732f427b41..385883ea8c1 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -30,6 +30,7 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/setup.h>
#include <asm/io.h>
@@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->type == EFI_CONVENTIONAL_MEMORY) {
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index e966fc8c44c..4477bb10709 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP
movl %eax,%cr0
call check_x87
- incb ready
lgdt cpu_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
@@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
#ifdef CONFIG_SMP
- movb ready, %cl
- cmpb $1,%cl
+ movb ready, %cl
+ movb $1, ready
+ cmpb $0,%cl
je 1f # the first CPU calls start_kernel
# all other CPUs call initialize_secondary
call initialize_secondary
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 2c4813b47e5..178f4e9bac9 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
return 0;
}
+static int i8259A_shutdown(struct sys_device *dev)
+{
+ /* Put the i8259A into a quiescent state that
+ * the kernel initialization code can get it
+ * out of.
+ */
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ return 0;
+}
+
static struct sysdev_class i8259_sysdev_class = {
set_kset_name("i8259"),
.suspend = i8259A_suspend,
.resume = i8259A_resume,
+ .shutdown = i8259A_shutdown,
};
static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 08540bc4ba3..6578f40bd50 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -37,6 +37,7 @@
#include <asm/smp.h>
#include <asm/desc.h>
#include <asm/timer.h>
+#include <asm/i8259.h>
#include <mach_apic.h>
@@ -573,12 +574,14 @@ static int balanced_irq(void *unused)
for ( ; ; ) {
set_current_state(TASK_INTERRUPTIBLE);
time_remaining = schedule_timeout(time_remaining);
- try_to_freeze(PF_FREEZE);
+ try_to_freeze();
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
+ preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
+ preempt_enable();
}
}
return 0;
@@ -630,10 +633,8 @@ static int __init balanced_irq_init(void)
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
for (i = 0; i < NR_CPUS; i++) {
- if(irq_cpu_data[i].irq_delta)
- kfree(irq_cpu_data[i].irq_delta);
- if(irq_cpu_data[i].last_irq)
- kfree(irq_cpu_data[i].last_irq);
+ kfree(irq_cpu_data[i].irq_delta);
+ kfree(irq_cpu_data[i].last_irq);
}
return 0;
}
@@ -1566,7 +1567,6 @@ void print_all_local_APICs (void)
void /*__init*/ print_PIC(void)
{
- extern spinlock_t i8259A_lock;
unsigned int v;
unsigned long flags;
@@ -1634,12 +1634,43 @@ static void __init enable_IO_APIC(void)
*/
void disable_IO_APIC(void)
{
+ int pin;
/*
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
- disconnect_bsp_APIC();
+ /*
+ * If the i82559 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrups can be delivered.
+ */
+ pin = find_isa_irq_pin(0, mp_ExtINT);
+ if (pin != -1) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = 7; /* ExtInt */
+ entry.vector = 0;
+ entry.dest.physical.physical_dest = 0;
+
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ disconnect_bsp_APIC(pin != -1);
}
/*
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c..ce66dcc26d9 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -153,6 +156,11 @@ void irq_ctx_init(int cpu)
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}
+void irq_ctx_exit(int cpu)
+{
+ hardirq_ctx[cpu] = NULL;
+}
+
extern asmlinkage void __do_softirq(void);
asmlinkage void do_softirq(void)
@@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
+ for_each_cpu(j)
+ seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
@@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
@@ -240,16 +246,14 @@ skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).apic_timer_irqs);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +263,45 @@ skip:
}
return 0;
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <mach_apic.h>
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 3762f6b35ab..a6d8c45961d 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->eip = (unsigned long)&p->ainsn.insn;
}
-struct task_struct *arch_get_kprobe_task(void *ptr)
-{
- return ((struct thread_info *) (((unsigned long) ptr) &
- (~(THREAD_SIZE -1))))->task;
-}
-
void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)&regs->esp;
- struct kretprobe_instance *ri;
- static void *orig_ret_addr;
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *) *sara;
- /*
- * Save the return address when the return probe hits
- * the first time, and use it to populate the (krprobe
- * instance)->ret_addr for subsequent return probes at
- * the same addrress since stack address would have
- * the kretprobe_trampoline by then.
- */
- if (((void*) *sara) != kretprobe_trampoline)
- orig_ret_addr = (void*) *sara;
-
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->stack_addr = sara;
- ri->ret_addr = orig_ret_addr;
- add_rp_inst(ri);
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
- } else {
- rp->nmissed++;
- }
-}
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
- struct kretprobe_instance *ri;
- while ((ri = get_rp_inst_tsk(tk)) != NULL) {
- *((unsigned long *)(ri->stack_addr)) =
- (unsigned long) ri->ret_addr;
- recycle_rp_inst(ri);
- }
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
}
/*
@@ -286,36 +261,59 @@ no_kprobe:
*/
int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct task_struct *tsk;
- struct kretprobe_instance *ri;
- struct hlist_head *head;
- struct hlist_node *node;
- unsigned long *sara = ((unsigned long *) &regs->esp) - 1;
-
- tsk = arch_get_kprobe_task(sara);
- head = kretprobe_inst_table_head(tsk);
-
- hlist_for_each_entry(ri, node, head, hlist) {
- if (ri->stack_addr == sara && ri->rp) {
- if (ri->rp->handler)
- ri->rp->handler(ri, regs);
- }
- }
- return 0;
-}
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
- unsigned long flags)
-{
- struct kretprobe_instance *ri;
- /* RA already popped */
- unsigned long *sara = ((unsigned long *)&regs->esp) - 1;
+ head = kretprobe_inst_table_head(current);
- while ((ri = get_rp_inst(sara))) {
- regs->eip = (unsigned long)ri->ret_addr;
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
}
- regs->eflags &= ~TF_MASK;
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->eip = orig_ret_address;
+
+ unlock_kprobes();
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we have handled unlocking
+ * and re-enabling preemption.
+ */
+ return 1;
}
/*
@@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
current_kprobe->post_handler(current_kprobe, regs, 0);
}
- if (current_kprobe->post_handler != trampoline_post_handler)
- resume_execution(current_kprobe, regs);
+ resume_execution(current_kprobe, regs);
regs->eflags |= kprobe_saved_eflags;
/*Restore back the original saved kprobes variables and continue. */
@@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
}
return 0;
}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 00000000000..52ed18d8b51
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,226 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/cpufeature.h>
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("movl %%cr3,%0": "=r"(cr3));
+ return cr3;
+}
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#define LEVEL0_SIZE (1UL << 12UL)
+
+#ifndef CONFIG_X86_PAE
+#define LEVEL1_SIZE (1UL << 22UL)
+static u32 pgtable_level1[1024] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ pgtable_level2 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = address / LEVEL1_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level2);
+}
+
+#else
+#define LEVEL1_SIZE (1UL << 21UL)
+#define LEVEL2_SIZE (1UL << 30UL)
+static u64 pgtable_level1[512] PAGE_ALIGNED;
+static u64 pgtable_level2[512] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index, level3_index;
+ u64 *pgtable_level3;
+
+ /* Find the current page table */
+ pgtable_level3 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+ level3_index = address / LEVEL2_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[level3_index],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level3);
+}
+#endif
+
+
+static void set_idt(void *newidt, __u16 limit)
+{
+ unsigned char curidt[6];
+
+ /* ia32 supports unaliged loads & stores */
+ (*(__u16 *)(curidt)) = limit;
+ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
+
+ __asm__ __volatile__ (
+ "lidt %0\n"
+ : "=m" (curidt)
+ );
+};
+
+
+static void set_gdt(void *newgdt, __u16 limit)
+{
+ unsigned char curgdt[6];
+
+ /* ia32 supports unaligned loads & stores */
+ (*(__u16 *)(curgdt)) = limit;
+ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
+
+ __asm__ __volatile__ (
+ "lgdt %0\n"
+ : "=m" (curgdt)
+ );
+};
+
+static void load_segments(void)
+{
+#define __STR(X) #X
+#define STR(X) __STR(X)
+
+ __asm__ __volatile__ (
+ "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"STR(__KERNEL_DS)",%eax\n"
+ "\tmovl %eax,%ds\n"
+ "\tmovl %eax,%es\n"
+ "\tmovl %eax,%fs\n"
+ "\tmovl %eax,%gs\n"
+ "\tmovl %eax,%ss\n"
+ );
+#undef STR
+#undef __STR
+}
+
+typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
+const extern unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel_end(void);
+const extern unsigned int relocate_new_kernel_size;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ unsigned long page_list;
+ unsigned long reboot_code_buffer;
+
+ relocate_new_kernel_t rnk;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Compute some offsets */
+ reboot_code_buffer = page_to_pfn(image->control_code_page)
+ << PAGE_SHIFT;
+ page_list = image->head;
+
+ /* Set up an identity mapping for the reboot_code_buffer */
+ identity_map_page(reboot_code_buffer);
+
+ /* copy it out */
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+ /* The segment registers are funny things, they are
+ * automatically loaded from a table, in memory wherever you
+ * set them to a specific selector, but this table is never
+ * accessed again you set the segment to a different selector.
+ *
+ * The more common model is are caches where the behide
+ * the scenes work is done, but is also dropped at arbitrary
+ * times.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+ */
+ load_segments();
+ /* The gdt & idt are now invalid.
+ * If you want to load them you must set up your own idt & gdt.
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+ rnk = (relocate_new_kernel_t) reboot_code_buffer;
+ (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
+}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 383a11600d2..af917f609c7 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -67,7 +67,6 @@ unsigned long mp_lapic_addr;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int boot_cpu_logical_apicid = -1U;
/* Internal processor count */
static unsigned int __initdata num_processors;
@@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = apicid;
}
if (num_processors >= NR_CPUS) {
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index aea2ce1145d..ba243a4cc11 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -55,6 +56,9 @@
#include <linux/irq.h>
#include <linux/err.h>
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
static int hlt_counter;
@@ -143,14 +147,42 @@ static void poll_idle (void)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* This must be done before dead CPU ack */
+ cpu_exit_clear();
+ wbinvd();
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+ while (1)
+ __asm__ __volatile__("hlt":::"memory");
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
-void cpu_idle (void)
+void cpu_idle(void)
{
+ int cpu = raw_smp_processor_id();
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -165,6 +197,9 @@ void cpu_idle (void)
if (!idle)
idle = default_idle;
+ if (cpu_is_offline(cpu))
+ play_dead();
+
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
@@ -223,7 +258,7 @@ static void mwait_idle(void)
}
}
-void __init select_idle_routine(const struct cpuinfo_x86 *c)
+void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_MWAIT)) {
printk("monitor/mwait feature present.\n");
@@ -582,6 +617,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
}
/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+ struct task_struct *next_p)
+{
+ struct thread_info *prev, *next;
+
+ /*
+ * gcc should eliminate the ->thread_info dereference if
+ * has_secure_computing returns 0 at compile time (SECCOMP=n).
+ */
+ prev = prev_p->thread_info;
+ next = next_p->thread_info;
+
+ if (has_secure_computing(prev) || has_secure_computing(next)) {
+ /* slow path here */
+ if (has_secure_computing(prev) &&
+ !has_secure_computing(next)) {
+ write_cr4(read_cr4() & ~X86_CR4_TSD);
+ } else if (!has_secure_computing(prev) &&
+ has_secure_computing(next))
+ write_cr4(read_cr4() | X86_CR4_TSD);
+ }
+}
+
+/*
* switch_to(x,yn) should switch tasks from x to y.
*
* We fsave/fwait so that an exception goes off at the right time
@@ -660,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
handle_io_bitmap(next, tss);
+ disable_tsc(prev_p, next_p);
+
return prev_p;
}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index db912209a8d..b3e58484996 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -26,7 +26,6 @@ static int reboot_mode;
static int reboot_thru_bios;
#ifdef CONFIG_SMP
-int reboot_smp = 0;
static int reboot_cpu = -1;
/* shamelessly grabbed from lib/vsprintf.c for readability */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
@@ -49,7 +48,6 @@ static int __init reboot_setup(char *str)
break;
#ifdef CONFIG_SMP
case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
- reboot_smp = 1;
if (is_digit(*(str+1))) {
reboot_cpu = (int) (*(str+1) - '0');
if (is_digit(*(str+2)))
@@ -88,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
return 0;
}
-/*
- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_reboot(struct dmi_system_id *d)
-{
-#ifdef CONFIG_SMP
- if (!reboot_smp) {
- reboot_smp = 1;
- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
- }
-#endif
- return 0;
-}
-
-/*
- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_bios_reboot(struct dmi_system_id *d)
-{
- set_smp_reboot(d);
- set_bios_reboot(d);
- return 0;
-}
-
static struct dmi_system_id __initdata reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Dell 1300's */
- .callback = set_smp_bios_reboot,
+ .callback = set_bios_reboot,
.ident = "Dell PowerEdge 1300",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -301,41 +275,32 @@ void machine_real_restart(unsigned char *code, int length)
EXPORT_SYMBOL(machine_real_restart);
#endif
-void machine_restart(char * __unused)
+void machine_shutdown(void)
{
#ifdef CONFIG_SMP
- int cpuid;
-
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- if (reboot_smp) {
-
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !physid_isset(cpuid, phys_cpu_present_map))
- reboot_cpu = boot_cpu_physical_apicid;
-
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
+ int reboot_cpu_id;
+
+ /* The boot cpu is always logical cpu 0 */
+ reboot_cpu_id = 0;
+
+ /* See if there has been given a command line override */
+ if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
+ cpu_isset(reboot_cpu, cpu_online_map)) {
+ reboot_cpu_id = reboot_cpu;
}
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
+ /* Make certain the cpu I'm rebooting on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ reboot_cpu_id = smp_processor_id();
}
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
+
+ /* Make certain I only run on the appropriate processor */
+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
+
+ /* O.K. Now that I'm on the appropriate processor, stop
+ * all of the others, and disable their local APICs.
*/
+
smp_send_stop();
#endif /* CONFIG_SMP */
@@ -344,6 +309,11 @@ void machine_restart(char * __unused)
#ifdef CONFIG_X86_IO_APIC
disable_IO_APIC();
#endif
+}
+
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
if (!reboot_thru_bios) {
if (efi_enabled) {
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 00000000000..d312616effa
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
+/*
+ * relocate_kernel.S - put the kernel image in place to boot
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+
+ /*
+ * Must be relocatable PIC code callable as a C function, that once
+ * it starts can not use the previous processes stack.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+ movl 8(%esp), %ebp /* reboot_code_buffer */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+ /* zero out flags, and disable interrupts */
+ pushl $0
+ popfl
+
+ /* set a new stack at the bottom of our page... */
+ lea 4096(%ebp), %esp
+
+ /* store the parameters back on the stack */
+ pushl %edx /* store the start address */
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+ * 18 0 == Alignment check disabled
+ * 16 0 == Write protect disabled
+ * 3 0 == No task switch
+ * 2 0 == Don't do FP software emulation.
+ * 0 1 == Proctected mode enabled
+ */
+ movl %cr0, %eax
+ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
+ orl $(1<<0), %eax
+ movl %eax, %cr0
+
+ /* clear cr4 if applicable */
+ testl %ecx, %ecx
+ jz 1f
+ /* Set cr4 to a known state:
+ * Setting everything to zero seems safe.
+ */
+ movl %cr4, %eax
+ andl $0, %eax
+ movl %eax, %cr4
+
+ jmp 1f
+1:
+
+ /* Flush the TLB (needed?) */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* Do the copies */
+ movl %ebx, %ecx
+ jmp 1f
+
+0: /* top, read another word from the indirection page */
+ movl (%ebx), %ecx
+ addl $4, %ebx
+1:
+ testl $0x1, %ecx /* is it a destination page */
+ jz 2f
+ movl %ecx, %edi
+ andl $0xfffff000, %edi
+ jmp 0b
+2:
+ testl $0x2, %ecx /* is it an indirection page */
+ jz 2f
+ movl %ecx, %ebx
+ andl $0xfffff000, %ebx
+ jmp 0b
+2:
+ testl $0x4, %ecx /* is it the done indicator */
+ jz 2f
+ jmp 3f
+2:
+ testl $0x8, %ecx /* is it the source indicator */
+ jz 0b /* Ignore it otherwise */
+ movl %ecx, %esi /* For every source page do a copy */
+ andl $0xfffff000, %esi
+
+ movl $1024, %ecx
+ rep ; movsl
+ jmp 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 30406fd0b64..7306353c520 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -43,7 +43,12 @@
#include <linux/init.h>
#include <linux/edd.h>
#include <linux/nodemask.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
#include <video/edid.h>
+
+#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -55,12 +60,15 @@
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+/* Forward Declaration. */
+void __init find_max_pfn(void);
+
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_end __initdata = ~0UL;
-int disable_pse __initdata = 0;
+int disable_pse __devinitdata = 0;
/*
* Machine setup..
@@ -732,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p)
if (to != command_line)
to--;
if (!memcmp(from+7, "exactmap", 8)) {
+#ifdef CONFIG_CRASH_DUMP
+ /* If we are doing a crash dump, we
+ * still need to know the real mem
+ * size before original memory map is
+ * reset.
+ */
+ find_max_pfn();
+ saved_max_pfn = max_pfn;
+#endif
from += 8+7;
e820.nr_map = 0;
userdef = 1;
@@ -835,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p)
#endif /* CONFIG_X86_LOCAL_APIC */
#endif /* CONFIG_ACPI_BOOT */
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* enable local APIC */
+ else if (!memcmp(from, "lapic", 5))
+ lapic_enable();
+
+ /* disable local APIC */
+ else if (!memcmp(from, "nolapic", 6))
+ lapic_disable();
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+ else if (!memcmp(from, "crashkernel=", 12)) {
+ unsigned long size, base;
+ size = memparse(from+12, &from);
+ if (*from == '@') {
+ base = memparse(from+1, &from);
+ /* FIXME: Do I want a sanity check
+ * to validate the memory range?
+ */
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+#endif
+#ifdef CONFIG_CRASH_DUMP
+ /* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel.
+ */
+ else if (!memcmp(from, "elfcorehdr=", 11))
+ elfcorehdr_addr = memparse(from+11, &from);
+#endif
+
/*
* highmem=size forces highmem to be exactly 'size' bytes.
* This works even on boxes that have no highmem otherwise.
@@ -1113,8 +1168,8 @@ void __init setup_bootmem_allocator(void)
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+ reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1170,6 +1225,11 @@ void __init setup_bootmem_allocator(void)
}
}
#endif
+#ifdef CONFIG_KEXEC
+ if (crashk_res.start != crashk_res.end)
+ reserve_bootmem(crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
+#endif
}
/*
@@ -1223,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index b9b8f4e20fa..89ef7adc63a 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -608,10 +608,8 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
if (!user_mode(regs))
return 1;
- if (current->flags & PF_FREEZE) {
- refrigerator(0);
+ if (try_to_freeze())
goto no_signal;
- }
if (!oldset)
oldset = &current->blocked;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 68be7d0c723..cec4bde6716 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/cpu.h>
#include <linux/module.h>
#include <asm/mtrr.h>
@@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
unsigned long flags;
local_irq_save(flags);
-
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
*/
@@ -346,21 +347,21 @@ out:
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
- * - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
-
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+ if (cpus_empty(cpumask))
+ return;
+
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
@@ -476,6 +477,7 @@ void flush_tlb_all(void)
*/
void smp_send_reschedule(int cpu)
{
+ WARN_ON(cpu_is_offline(cpu));
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
@@ -493,6 +495,16 @@ struct call_data_struct {
int wait;
};
+void lock_ipi_call_lock(void)
+{
+ spin_lock_irq(&call_lock);
+}
+
+void unlock_ipi_call_lock(void)
+{
+ spin_unlock_irq(&call_lock);
+}
+
static struct call_data_struct * call_data;
/*
@@ -516,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
*/
{
struct call_data_struct data;
- int cpus = num_online_cpus()-1;
+ int cpus;
- if (!cpus)
+ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
+ cpus = num_online_cpus() - 1;
+ if (!cpus) {
+ spin_unlock(&call_lock);
return 0;
+ }
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -531,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
if (wait)
atomic_set(&data.finished, 0);
- spin_lock(&call_lock);
call_data = &data;
mb();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index c20d96d5c15..8ac8e9fd561 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
#include <linux/smp_lock.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -56,20 +59,30 @@
#include <smpboot_hooks.h>
/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
+static int __devinitdata smp_b_stepping;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
#ifdef CONFIG_X86_HT
EXPORT_SYMBOL(smp_num_siblings);
#endif
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+
+/* Package ID of each logical CPU */
+int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(phys_proc_id);
-int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
+
+/* Core ID of each logical CPU */
+int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(cpu_core_id);
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
+
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_core_map);
+
/* bitmap of online cpus */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
@@ -77,11 +90,17 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask;
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
+ * should use IA64's algorithm
+ */
+static int __devinitdata tsc_sync_disabled;
+
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
-u8 x86_cpu_to_apicid[NR_CPUS] =
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = 0xff };
EXPORT_SYMBOL(x86_cpu_to_apicid);
@@ -96,13 +115,16 @@ static int trampoline_exec;
static void map_cpu_to_logical_apicid(void);
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-static unsigned long __init setup_trampoline(void)
+static unsigned long __devinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
@@ -132,7 +154,7 @@ void __init smp_alloc_memory(void)
* a given CPU
*/
-static void __init smp_store_cpu_info(int id)
+static void __devinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
@@ -326,7 +348,7 @@ extern void calibrate_delay(void);
static atomic_t init_deasserted;
-static void __init smp_callin(void)
+static void __devinit smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -411,16 +433,48 @@ static void __init smp_callin(void)
/*
* Synchronize the TSC with the BP
*/
- if (cpu_has_tsc && cpu_khz)
+ if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
}
static int cpucount;
+static inline void
+set_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (cpu_core_id[cpu] == cpu_core_id[i]) {
+ cpu_set(i, cpu_sibling_map[cpu]);
+ cpu_set(cpu, cpu_sibling_map[i]);
+ }
+ }
+ } else {
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (current_cpu_data.x86_num_cores > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ cpu_set(i, cpu_core_map[cpu]);
+ cpu_set(cpu, cpu_core_map[i]);
+ }
+ }
+ } else {
+ cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ }
+}
+
/*
* Activate a secondary processor.
*/
-static void __init start_secondary(void *unused)
+static void __devinit start_secondary(void *unused)
{
/*
* Dont put anything before smp_callin(), SMP
@@ -443,7 +497,23 @@ static void __init start_secondary(void *unused)
* the local TLBs too.
*/
local_flush_tlb();
+
+ /* This must be done before setting cpu_online_map */
+ set_cpu_sibling_map(raw_smp_processor_id());
+ wmb();
+
+ /*
+ * We need to hold call_lock, so there is no inconsistency
+ * between the time smp_call_function() determines number of
+ * IPI receipients, and the time when the determination is made
+ * for which cpus receive the IPI. Holding this
+ * lock helps us to not include this cpu in a currently in progress
+ * smp_call_function().
+ */
+ lock_ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_ipi_call_lock();
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -458,7 +528,7 @@ static void __init start_secondary(void *unused)
* from the task structure
* This function must not return.
*/
-void __init initialize_secondary(void)
+void __devinit initialize_secondary(void)
{
/*
* We don't actually need to load the full TSS,
@@ -480,10 +550,10 @@ extern struct {
#ifdef CONFIG_NUMA
/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
EXPORT_SYMBOL(cpu_2_node);
/* set up a mapping between cpu and node. */
@@ -511,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu)
#endif /* CONFIG_NUMA */
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
static void map_cpu_to_logical_apicid(void)
{
@@ -572,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid)
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __init
+static int __devinit
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -618,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_NMI */
#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
+static int __devinit
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -753,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_INIT */
extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+ cpumask_t tmp_map;
+ int cpu;
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+ if (cpu >= NR_CPUS)
+ return -ENODEV;
+ return cpu;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
+static inline struct task_struct * alloc_idle_task(int cpu)
+{
+ struct task_struct *idle;
+
+ if ((idle = cpu_idle_tasks[cpu]) != NULL) {
+ /* initialize thread_struct. we really want to avoid destroy
+ * idle tread
+ */
+ idle->thread.esp = (unsigned long)(((struct pt_regs *)
+ (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
+ init_idle(idle, cpu);
+ return idle;
+ }
+ idle = fork_idle(cpu);
+
+ if (!IS_ERR(idle))
+ cpu_idle_tasks[cpu] = idle;
+ return idle;
+}
+#else
+#define alloc_idle_task(cpu) fork_idle(cpu)
+#endif
-static int __init do_boot_cpu(int apicid)
+static int __devinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -763,16 +868,17 @@ static int __init do_boot_cpu(int apicid)
{
struct task_struct *idle;
unsigned long boot_error;
- int timeout, cpu;
+ int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- cpu = ++cpucount;
+ ++cpucount;
+
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
- idle = fork_idle(cpu);
+ idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary;
@@ -839,13 +945,16 @@ static int __init do_boot_cpu(int apicid)
inquire_remote_apic(apicid);
}
}
- x86_cpu_to_apicid[cpu] = apicid;
+
if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--;
+ } else {
+ x86_cpu_to_apicid[cpu] = apicid;
+ cpu_set(cpu, cpu_present_map);
}
/* mark "stuck" area as not stuck */
@@ -854,6 +963,75 @@ static int __init do_boot_cpu(int apicid)
return boot_error;
}
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_exit_clear(void)
+{
+ int cpu = raw_smp_processor_id();
+
+ idle_task_exit();
+
+ cpucount --;
+ cpu_uninit();
+ irq_ctx_exit(cpu);
+
+ cpu_clear(cpu, cpu_callout_map);
+ cpu_clear(cpu, cpu_callin_map);
+ cpu_clear(cpu, cpu_present_map);
+
+ cpu_clear(cpu, smp_commenced_mask);
+ unmap_cpu_to_logical_apicid(cpu);
+}
+
+struct warm_boot_cpu_info {
+ struct completion *complete;
+ int apicid;
+ int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+ struct warm_boot_cpu_info *info = p;
+ do_boot_cpu(info->apicid, info->cpu);
+ complete(info->complete);
+}
+
+int __devinit smp_prepare_cpu(int cpu)
+{
+ DECLARE_COMPLETION(done);
+ struct warm_boot_cpu_info info;
+ struct work_struct task;
+ int apicid, ret;
+
+ lock_cpu_hotplug();
+ apicid = x86_cpu_to_apicid[cpu];
+ if (apicid == BAD_APICID) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ info.complete = &done;
+ info.apicid = apicid;
+ info.cpu = cpu;
+ INIT_WORK(&task, do_warm_boot_cpu, &info);
+
+ tsc_sync_disabled = 1;
+
+ /* init low mem mapping */
+ memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+ flush_tlb_all();
+ schedule_work(&task);
+ wait_for_completion(&done);
+
+ tsc_sync_disabled = 0;
+ zap_low_mappings();
+ ret = 0;
+exit:
+ unlock_cpu_hotplug();
+ return ret;
+}
+#endif
+
static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
@@ -895,13 +1073,6 @@ void *xquad_portio;
EXPORT_SYMBOL(xquad_portio);
#endif
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-#ifdef CONFIG_X86_HT
-EXPORT_SYMBOL(cpu_sibling_map);
-#endif
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
-
static void __init smp_boot_cpus(unsigned int max_cpus)
{
int apicid, cpu, bit, kicked;
@@ -1013,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (max_cpus <= cpucount+1)
continue;
- if (do_boot_cpu(apicid))
+ if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
else
@@ -1065,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpus_clear(cpu_core_map[cpu]);
}
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- struct cpuinfo_x86 *c = cpu_data + cpu;
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
-
- if (c->x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- }
- }
- } else {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
- }
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
smpboot_setup_io_apic();
@@ -1119,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ smp_commenced_mask = cpumask_of_cpu(0);
+ cpu_callin_map = cpumask_of_cpu(0);
+ mb();
smp_boot_cpus(max_cpus);
}
@@ -1126,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
+ cpu_set(smp_processor_id(), cpu_present_map);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static void
+remove_siblinginfo(int cpu)
{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
- return -ENOSYS;
+ int sibling;
+
+ for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
+ cpu_clear(cpu, cpu_sibling_map[sibling]);
+ for_each_cpu_mask(sibling, cpu_core_map[cpu])
+ cpu_clear(cpu, cpu_core_map[sibling]);
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ phys_proc_id[cpu] = BAD_APICID;
+ cpu_core_id[cpu] = BAD_APICID;
+}
+
+int __cpu_disable(void)
+{
+ cpumask_t map = cpu_online_map;
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ /* We enable the timer again on the exit path of the death loop */
+ disable_APIC_timer();
+ /* Allow any queued timer interrupts to get serviced */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ remove_siblinginfo(cpu);
+
+ cpu_clear(cpu, map);
+ fixup_irqs(map);
+ /* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpu_online_map);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We don't do anything here: idle task is faking death itself. */
+ unsigned int i;
+
+ for (i = 0; i < 10; i++) {
+ /* They ack this in play_dead by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+ printk ("CPU %d is now offline\n", cpu);
+ return;
+ }
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
}
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We said "no" in __cpu_disable */
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+int __devinit __cpu_up(unsigned int cpu)
+{
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
+ printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
local_irq_enable();
return -EIO;
}
local_irq_enable();
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
@@ -1156,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
zap_low_mappings();
+#ifndef CONFIG_HOTPLUG_CPU
/*
* Disable executability of the SMP trampoline:
*/
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
}
void __init smp_intr_init(void)
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index d408afaf649..468500a7e89 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -283,9 +283,14 @@ ENTRY(sys_call_table)
.long sys_mq_timedreceive /* 280 */
.long sys_mq_notify
.long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
+ .long sys_kexec_load
.long sys_waitid
.long sys_ni_syscall /* 285 */ /* available */
.long sys_add_key
.long sys_request_key
.long sys_keyctl
+ .long sys_ioprio_set
+ .long sys_ioprio_get /* 290 */
+ .long sys_inotify_init
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 960d8bd137d..0bada1870bd 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -21,11 +21,16 @@
extern asmlinkage void sysenter_entry(void);
-void enable_sep_cpu(void *info)
+void enable_sep_cpu(void)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
+
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
@@ -41,7 +46,7 @@ void enable_sep_cpu(void *info)
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static int __init sysenter_setup(void)
+int __init sysenter_setup(void)
{
void *page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -58,8 +63,5 @@ static int __init sysenter_setup(void)
&vsyscall_sysenter_start,
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
- on_each_cpu(enable_sep_cpu, NULL, 1, 1);
return 0;
}
-
-__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index e68d9fdb075..0ee9dee8af0 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -68,7 +68,8 @@
#include "io_ports.h"
-extern spinlock_t i8259A_lock;
+#include <asm/i8259.h>
+
int pit_latch_buggy; /* extern */
#include "do_timer.h"
@@ -85,10 +86,12 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
+#include <asm/i8253.h>
+
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
-struct timer_opts *cur_timer = &timer_none;
+struct timer_opts *cur_timer __read_mostly = &timer_none;
/*
* This is a special lock that is owned by the CPU and holds the index
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 10a0cbb88e7..658c0629ba6 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a)
* comparator value and continue. Next tick can be caught by checking
* for a change in the comparator value. Used in apic.c.
*/
-static void __init wait_hpet_tick(void)
+static void __devinit wait_hpet_tick(void)
{
unsigned int start_cmp_val, end_cmp_val;
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
index 37353bd3180..8163fe0cf1f 100644
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -86,7 +86,7 @@ bad_ctc:
#define CALIBRATE_CNT_HPET (5 * hpet_tick)
#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
+unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
{
unsigned long tsc_startlow, tsc_starthigh;
unsigned long tsc_endlow, tsc_endhigh;
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index f6f1206a11b..13892a65c94 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -17,9 +17,9 @@
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
-#include "io_ports.h"
+#include <asm/i8253.h>
-extern spinlock_t i8253_lock;
+#include "io_ports.h"
/* Number of usecs that the last interrupt was delayed */
static int delay_at_last_interrupt;
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index d766e0963ac..ef8dac5dd33 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,7 +18,7 @@
#include "mach_timer.h"
#include <asm/hpet.h>
-static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */
+static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */
static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
static unsigned long hpet_last; /* hpet counter value at last tick*/
static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
@@ -180,7 +180,7 @@ static int __init init_hpet(char* override)
/************************************************************/
/* tsc timer_opts struct */
-static struct timer_opts timer_hpet = {
+static struct timer_opts timer_hpet __read_mostly = {
.name = "hpet",
.mark_offset = mark_offset_hpet,
.get_offset = get_offset_hpet,
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 967d5453cd0..06de036a820 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -15,9 +15,8 @@
#include <asm/smp.h>
#include <asm/io.h>
#include <asm/arch_hooks.h>
+#include <asm/i8253.h>
-extern spinlock_t i8259A_lock;
-extern spinlock_t i8253_lock;
#include "do_timer.h"
#include "io_ports.h"
@@ -166,7 +165,6 @@ struct init_timer_opts __initdata timer_pit_init = {
void setup_pit_timer(void)
{
- extern spinlock_t i8253_lock;
unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 54c36b18202..8f4e4d5bc56 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -24,6 +24,7 @@
#include "mach_timer.h"
#include <asm/hpet.h>
+#include <asm/i8253.h>
#ifdef CONFIG_HPET_TIMER
static unsigned long hpet_usec_quotient;
@@ -33,9 +34,7 @@ static struct timer_opts timer_tsc;
static inline void cpufreq_delayed_get(void);
-int tsc_disable __initdata = 0;
-
-extern spinlock_t i8253_lock;
+int tsc_disable __devinitdata = 0;
static int use_tsc;
/* Number of usecs that the last interrupt was delayed */
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e4d4e2162c7..a61f33d06ea 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -27,6 +27,7 @@
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/kprobes.h>
+#include <linux/kexec.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -234,22 +235,22 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (in_kernel) {
- u8 *eip;
+ u8 __user *eip;
printk("\nStack: ");
show_stack(NULL, (unsigned long*)esp);
printk("Code: ");
- eip = (u8 *)regs->eip - 43;
+ eip = (u8 __user *)regs->eip - 43;
for (i = 0; i < 64; i++, eip++) {
unsigned char c;
- if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
printk(" Bad EIP value.");
break;
}
- if (eip == (u8 *)regs->eip)
+ if (eip == (u8 __user *)regs->eip)
printk("<%02x> ", c);
else
printk("%02x ", c);
@@ -273,13 +274,13 @@ static void handle_BUG(struct pt_regs *regs)
if (eip < PAGE_OFFSET)
goto no_bug;
- if (__get_user(ud2, (unsigned short *)eip))
+ if (__get_user(ud2, (unsigned short __user *)eip))
goto no_bug;
if (ud2 != 0x0b0f)
goto no_bug;
- if (__get_user(line, (unsigned short *)(eip + 2)))
+ if (__get_user(line, (unsigned short __user *)(eip + 2)))
goto bug;
- if (__get_user(file, (char **)(eip + 4)) ||
+ if (__get_user(file, (char * __user *)(eip + 4)) ||
(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
file = "<bad filename>";
@@ -294,6 +295,9 @@ bug:
printk("Kernel BUG\n");
}
+/* This is gone through when something in the kernel
+ * has done something bad and is about to be terminated.
+*/
void die(const char * str, struct pt_regs * regs, long err)
{
static struct {
@@ -341,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(0);
die.lock_owner = -1;
spin_unlock_irq(&die.lock);
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
if (in_interrupt())
panic("Fatal exception in interrupt");
@@ -361,6 +369,10 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
static void do_trap(int trapnr, int signr, char *str, int vm86,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
+ struct task_struct *tsk = current;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
if (regs->eflags & VM_MASK) {
if (vm86)
goto vm86_trap;
@@ -371,9 +383,6 @@ static void do_trap(int trapnr, int signr, char *str, int vm86,
goto kernel_trap;
trap_signal: {
- struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
if (info)
force_sig_info(signr, info, tsk);
else
@@ -486,6 +495,9 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
}
put_cpu();
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
+
if (regs->eflags & VM_MASK)
goto gp_in_vm86;
@@ -570,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg)
console_silent();
spin_unlock(&nmi_print_lock);
bust_spinlocks(0);
+
+ /* If we are in kernel we are probably nested up pretty bad
+ * and might aswell get out now while we still can.
+ */
+ if (!user_mode(regs)) {
+ current->thread.trap_no = 2;
+ crash_kexec(regs);
+ }
+
do_exit(SIGSEGV);
}
@@ -625,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
nmi_enter();
cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (!cpu_online(cpu)) {
+ nmi_exit();
+ return;
+ }
+#endif
+
++nmi_count(cpu);
if (!nmi_callback(regs, cpu))
@@ -872,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
error_code);
return;
}
- die_if_kernel("cache flush denied", regs, error_code);
current->thread.trap_no = 19;
current->thread.error_code = error_code;
+ die_if_kernel("cache flush denied", regs, error_code);
force_sig(SIGSEGV, current);
}
}
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index e0512cc8bea..761972f8cb6 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -2,20 +2,23 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
*/
+#define LOAD_OFFSET __PAGE_OFFSET
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/page.h>
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
-ENTRY(startup_32)
+ENTRY(phys_startup_32)
jiffies = jiffies_64;
SECTIONS
{
- . = __PAGE_OFFSET + 0x100000;
+ . = __KERNEL_START;
+ phys_startup_32 = startup_32 - LOAD_OFFSET;
/* read-only */
_text = .; /* Text and read-only data */
- .text : {
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
*(.text)
SCHED_TEXT
LOCK_TEXT
@@ -27,49 +30,58 @@ SECTIONS
. = ALIGN(16); /* Exception table */
__start___ex_table = .;
- __ex_table : { *(__ex_table) }
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
__stop___ex_table = .;
RODATA
/* writeable */
- .data : { /* Data */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
CONSTRUCTORS
}
. = ALIGN(4096);
__nosave_begin = .;
- .data_nosave : { *(.data.nosave) }
+ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
. = ALIGN(4096);
__nosave_end = .;
. = ALIGN(4096);
- .data.page_aligned : { *(.data.idt) }
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+ *(.data.idt)
+ }
. = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+ *(.data.cacheline_aligned)
+ }
+ /* rarely changed data like cpu maps */
+ . = ALIGN(32);
+ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
. = ALIGN(THREAD_SIZE); /* init_task */
- .data.init_task : { *(.data.init_task) }
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+ }
/* will be freed after init */
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
- .init.text : {
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
*(.init.text)
_einittext = .;
}
- .init.data : { *(.init.data) }
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
. = ALIGN(16);
__setup_start = .;
- .init.setup : { *(.init.setup) }
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
__setup_end = .;
__initcall_start = .;
- .initcall.init : {
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
*(.initcall1.init)
*(.initcall2.init)
*(.initcall3.init)
@@ -80,33 +92,41 @@ SECTIONS
}
__initcall_end = .;
__con_initcall_start = .;
- .con_initcall.init : { *(.con_initcall.init) }
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ *(.con_initcall.init)
+ }
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(4);
__alt_instructions = .;
- .altinstructions : { *(.altinstructions) }
+ .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+ *(.altinstructions)
+ }
__alt_instructions_end = .;
- .altinstr_replacement : { *(.altinstr_replacement) }
+ .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+ *(.altinstr_replacement)
+ }
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
- .exit.text : { *(.exit.text) }
- .exit.data : { *(.exit.data) }
+ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
+ .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
. = ALIGN(4096);
__initramfs_start = .;
- .init.ramfs : { *(.init.ramfs) }
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
__initramfs_end = .;
. = ALIGN(32);
__per_cpu_start = .;
- .data.percpu : { *(.data.percpu) }
+ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
. = ALIGN(4096);
__init_end = .;
/* freed after init ends here */
__bss_start = .; /* BSS */
- .bss : {
+ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
*(.bss.page_aligned)
+ }
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
*(.bss)
}
. = ALIGN(4);
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
index 0aa08eaa893..e5a1a83d09e 100644
--- a/arch/i386/mach-default/setup.c
+++ b/arch/i386/mach-default/setup.c
@@ -10,6 +10,14 @@
#include <asm/acpi.h>
#include <asm/arch_hooks.h>
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI (1)
+#else
+#define DEFAULT_SEND_IPI (0)
+#endif
+
+int no_broadcast=DEFAULT_SEND_IPI;
+
/**
* pre_intr_init_hook - initialisation prior to setting up interrupt vectors
*
@@ -104,3 +112,22 @@ void __init mca_nmi_hook(void)
printk("NMI generated from unknown source!\n");
}
#endif
+
+static __init int no_ipi_broadcast(char *str)
+{
+ get_option(&str, &no_broadcast);
+ printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
+ "IPI Broadcast");
+ return 1;
+}
+
+__setup("no_ipi_broadcast", no_ipi_broadcast);
+
+static int __init print_ipi_mode(void)
+{
+ printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
+ "Shortcut");
+ return 0;
+}
+
+late_initcall(print_ipi_mode);
diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c
index 5b3e8817dae..23395fff35d 100644
--- a/arch/i386/mach-default/topology.c
+++ b/arch/i386/mach-default/topology.c
@@ -73,12 +73,11 @@ static int __init topology_init(void)
{
int i;
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (node_online(i))
- arch_register_node(i);
- }
- for (i = 0; i < NR_CPUS; i++)
- if (cpu_possible(i)) arch_register_cpu(i);
+ for_each_online_node(i)
+ arch_register_node(i);
+
+ for_each_cpu(i)
+ arch_register_cpu(i);
return 0;
}
@@ -88,8 +87,8 @@ static int __init topology_init(void)
{
int i;
- for (i = 0; i < NR_CPUS; i++)
- if (cpu_possible(i)) arch_register_cpu(i);
+ for_each_cpu(i)
+ arch_register_cpu(i);
return 0;
}
diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c
index 5a22082147f..5f3d7e6de37 100644
--- a/arch/i386/mach-visws/mpparse.c
+++ b/arch/i386/mach-visws/mpparse.c
@@ -23,7 +23,6 @@ unsigned long mp_lapic_addr;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int boot_cpu_logical_apicid = -1U;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
@@ -52,10 +51,8 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
m->mpc_apicver);
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = logical_apicid;
- }
ver = m->mpc_apicver;
if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 602aea240e9..3e439ce5e1b 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -30,6 +30,7 @@
#include <linux/irq.h>
#include <asm/tlbflush.h>
#include <asm/arch_hooks.h>
+#include <asm/i8253.h>
/*
* Power off function, if any
@@ -182,7 +183,6 @@ voyager_timer_interrupt(struct pt_regs *regs)
* and swiftly introduce it to something sharp and
* pointy. */
__u16 val;
- extern spinlock_t i8253_lock;
spin_lock(&i8253_lock);
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index f429c871e84..b358f0702a4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -30,6 +30,8 @@
#include <linux/initrd.h>
#include <linux/nodemask.h>
#include <linux/module.h>
+#include <linux/kexec.h>
+
#include <asm/e820.h>
#include <asm/setup.h>
#include <asm/mmzone.h>
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index a509237c481..8e90339d6ea 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -146,7 +146,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
if (instr > limit)
break;
- if (__get_user(opcode, (unsigned char *) instr))
+ if (__get_user(opcode, (unsigned char __user *) instr))
break;
instr_hi = opcode & 0xf0;
@@ -173,7 +173,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
scan_more = 0;
if (instr > limit)
break;
- if (__get_user(opcode, (unsigned char *) instr))
+ if (__get_user(opcode, (unsigned char __user *) instr))
break;
prefetch = (instr_lo == 0xF) &&
(opcode == 0x0D || opcode == 0x18);
@@ -463,6 +463,9 @@ no_context:
printk(KERN_ALERT "*pte = %08lx\n", page);
}
#endif
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index 4b7aaf99d7e..b6eb4dcb877 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -75,6 +75,24 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
preempt_check_resched();
}
+/* This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+
+ inc_preempt_count();
+
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
+ __flush_tlb_one(vaddr);
+
+ return (void*) vaddr;
+}
+
struct page *kmap_atomic_to_page(void *ptr)
{
unsigned long idx, vaddr = (unsigned long)ptr;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 3672e2ef51a..12216b52e28 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -352,7 +352,7 @@ static void __init pagetable_init (void)
#endif
}
-#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
+#ifdef CONFIG_SOFTWARE_SUSPEND
/*
* Swap suspend & friends need this for resume because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index d393eefc705..f379b8d6755 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -228,7 +228,8 @@ EXPORT_SYMBOL(ioremap_nocache);
void iounmap(volatile void __iomem *addr)
{
struct vm_struct *p;
- if ((void __force *) addr <= high_memory)
+
+ if ((void __force *)addr <= high_memory)
return;
/*
@@ -241,9 +242,10 @@ void iounmap(volatile void __iomem *addr)
return;
write_lock(&vmlist_lock);
- p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+ p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
if (!p) {
- printk("iounmap: bad address %p\n", addr);
+ printk(KERN_WARNING "iounmap: bad address %p\n", addr);
+ dump_stack();
goto out_unlock;
}
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 270c59f099a..bd2f7afc7a2 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -32,9 +32,9 @@ void show_mem(void)
unsigned long i;
struct page_state ps;
- printk("Mem-info:\n");
+ printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
@@ -49,18 +49,18 @@ void show_mem(void)
shared += page_count(page) - 1;
}
}
- printk("%d pages of RAM\n", total);
- printk("%d pages of HIGHMEM\n",highmem);
- printk("%d reserved pages\n",reserved);
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
+ printk(KERN_INFO "%d pages of RAM\n", total);
+ printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
+ printk(KERN_INFO "%d reserved pages\n", reserved);
+ printk(KERN_INFO "%d pages shared\n", shared);
+ printk(KERN_INFO "%d pages swap cached\n", cached);
get_page_state(&ps);
- printk("%lu pages dirty\n", ps.nr_dirty);
- printk("%lu pages writeback\n", ps.nr_writeback);
- printk("%lu pages mapped\n", ps.nr_mapped);
- printk("%lu pages slab\n", ps.nr_slab);
- printk("%lu pages pagetables\n", ps.nr_page_table_pages);
+ printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty);
+ printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback);
+ printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
+ printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
+ printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
}
/*
@@ -113,16 +113,16 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
pmd_t *pmd;
if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
- printk ("set_pmd_pfn: vaddr misaligned\n");
+ printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
return; /* BUG(); */
}
if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
- printk ("set_pmd_pfn: pfn misaligned\n");
+ printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
return; /* BUG(); */
}
pgd = swapper_pg_dir + pgd_index(vaddr);
if (pgd_none(*pgd)) {
- printk ("set_pmd_pfn: pgd_none\n");
+ printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
return; /* BUG(); */
}
pud = pud_offset(pgd, vaddr);
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 720975e1af5..70bcd53451f 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -25,7 +25,8 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
int pci_routeirq;
int pcibios_last_bus = -1;
-struct pci_bus *pci_root_bus = NULL;
+unsigned long pirq_table_addr;
+struct pci_bus *pci_root_bus;
struct pci_raw_ops *raw_pci_ops;
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
@@ -133,7 +134,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
- return pci_scan_bus(busnum, &pci_root_ops, NULL);
+ return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
}
extern u8 pci_cache_line_size;
@@ -164,6 +165,7 @@ static int __init pcibios_init(void)
if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
pcibios_sort();
#endif
+ pci_assign_unassigned_resources();
return 0;
}
@@ -188,6 +190,9 @@ char * __devinit pcibios_setup(char *str)
} else if (!strcmp(str, "biosirq")) {
pci_probe |= PCI_BIOS_IRQ_SCAN;
return NULL;
+ } else if (!strncmp(str, "pirqaddr=", 9)) {
+ pirq_table_addr = simple_strtoul(str+9, NULL, 0);
+ return NULL;
}
#endif
#ifdef CONFIG_PCI_DIRECT
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
index c205ea7e233..93a364c8215 100644
--- a/arch/i386/pci/i386.c
+++ b/arch/i386/pci/i386.c
@@ -106,11 +106,16 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
if ((dev = bus->self)) {
for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
r = &dev->resource[idx];
- if (!r->start)
+ if (!r->flags)
continue;
pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0)
+ if (!r->start || !pr || request_resource(pr, r) < 0) {
printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, pci_name(dev));
+ /* Something is wrong with the region.
+ Invalidate the resource to prevent child
+ resource allocations in this range. */
+ r->flags = 0;
+ }
}
}
pcibios_allocate_bus_resources(&bus->children);
@@ -227,7 +232,7 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask)
pci_read_config_word(dev, PCI_COMMAND, &cmd);
old_cmd = cmd;
- for(idx=0; idx<6; idx++) {
+ for(idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
/* Only set up the requested stuff */
if (!(mask & (1<<idx)))
continue;
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index 83458f81e66..766b104ac1a 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -58,6 +58,35 @@ struct irq_router_handler {
int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
/*
+ * Check passed address for the PCI IRQ Routing Table signature
+ * and perform checksum verification.
+ */
+
+static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr)
+{
+ struct irq_routing_table *rt;
+ int i;
+ u8 sum;
+
+ rt = (struct irq_routing_table *) addr;
+ if (rt->signature != PIRQ_SIGNATURE ||
+ rt->version != PIRQ_VERSION ||
+ rt->size % 16 ||
+ rt->size < sizeof(struct irq_routing_table))
+ return NULL;
+ sum = 0;
+ for (i=0; i < rt->size; i++)
+ sum += addr[i];
+ if (!sum) {
+ DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ return rt;
+ }
+ return NULL;
+}
+
+
+
+/*
* Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
*/
@@ -65,23 +94,17 @@ static struct irq_routing_table * __init pirq_find_routing_table(void)
{
u8 *addr;
struct irq_routing_table *rt;
- int i;
- u8 sum;
+ if (pirq_table_addr) {
+ rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr));
+ if (rt)
+ return rt;
+ printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
+ }
for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
- rt = (struct irq_routing_table *) addr;
- if (rt->signature != PIRQ_SIGNATURE ||
- rt->version != PIRQ_VERSION ||
- rt->size % 16 ||
- rt->size < sizeof(struct irq_routing_table))
- continue;
- sum = 0;
- for(i=0; i<rt->size; i++)
- sum += addr[i];
- if (!sum) {
- DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ rt = pirq_check_routing_table(addr);
+ if (rt)
return rt;
- }
}
return NULL;
}
@@ -1028,24 +1051,28 @@ static int __init pcibios_irq_init(void)
subsys_initcall(pcibios_irq_init);
-static void pirq_penalize_isa_irq(int irq)
+static void pirq_penalize_isa_irq(int irq, int active)
{
/*
* If any ISAPnP device reports an IRQ in its list of possible
* IRQ's, we try to avoid assigning it to PCI devices.
*/
- if (irq < 16)
- pirq_penalty[irq] += 100;
+ if (irq < 16) {
+ if (active)
+ pirq_penalty[irq] += 1000;
+ else
+ pirq_penalty[irq] += 100;
+ }
}
-void pcibios_penalize_isa_irq(int irq)
+void pcibios_penalize_isa_irq(int irq, int active)
{
#ifdef CONFIG_ACPI_PCI
if (!acpi_noirq)
- acpi_penalize_isa_irq(irq);
+ acpi_penalize_isa_irq(irq, active);
else
#endif
- pirq_penalize_isa_irq(irq);
+ pirq_penalize_isa_irq(irq, active);
}
static int pirq_enable_irq(struct pci_dev *dev)
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
index 1492e375386..149a9588c25 100644
--- a/arch/i386/pci/legacy.c
+++ b/arch/i386/pci/legacy.c
@@ -45,6 +45,8 @@ static int __init pci_legacy_init(void)
printk("PCI: Probing PCI hardware\n");
pci_root_bus = pcibios_scan_root(0);
+ if (pci_root_bus)
+ pci_bus_add_devices(pci_root_bus);
pcibios_fixup_peer_bridges();
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 021a50aa51f..60f0e7a1162 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -11,11 +11,9 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <linux/acpi.h>
#include "pci.h"
-/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
-u32 pci_mmcfg_base_addr;
-
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
/* The base address of the last MMCONFIG device accessed */
@@ -24,10 +22,31 @@ static u32 mmcfg_last_accessed_device;
/*
* Functions for accessing PCI configuration space with MMCONFIG accesses
*/
+static u32 get_base_addr(unsigned int seg, int bus)
+{
+ int cfg_num = -1;
+ struct acpi_table_mcfg_config *cfg;
+
+ while (1) {
+ ++cfg_num;
+ if (cfg_num >= pci_mmcfg_config_num) {
+ /* something bad is going on, no cfg table is found. */
+ /* so we fall back to the old way we used to do this */
+ /* and just rely on the first entry to be correct. */
+ return pci_mmcfg_config[0].base_address;
+ }
+ cfg = &pci_mmcfg_config[cfg_num];
+ if (cfg->pci_segment_group_number != seg)
+ continue;
+ if ((cfg->start_bus_number <= bus) &&
+ (cfg->end_bus_number >= bus))
+ return cfg->base_address;
+ }
+}
-static inline void pci_exp_set_dev_base(int bus, int devfn)
+static inline void pci_exp_set_dev_base(unsigned int seg, int bus, int devfn)
{
- u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+ u32 dev_base = get_base_addr(seg, bus) | (bus << 20) | (devfn << 12);
if (dev_base != mmcfg_last_accessed_device) {
mmcfg_last_accessed_device = dev_base;
set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
@@ -44,7 +63,7 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
spin_lock_irqsave(&pci_config_lock, flags);
- pci_exp_set_dev_base(bus, devfn);
+ pci_exp_set_dev_base(seg, bus, devfn);
switch (len) {
case 1:
@@ -73,7 +92,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
spin_lock_irqsave(&pci_config_lock, flags);
- pci_exp_set_dev_base(bus, devfn);
+ pci_exp_set_dev_base(seg, bus, devfn);
switch (len) {
case 1:
@@ -101,7 +120,11 @@ static int __init pci_mmcfg_init(void)
{
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
goto out;
- if (!pci_mmcfg_base_addr)
+
+ acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if ((pci_mmcfg_config_num == 0) ||
+ (pci_mmcfg_config == NULL) ||
+ (pci_mmcfg_config[0].base_address == 0))
goto out;
/* Kludge for now. Don't use mmconfig on AMD systems because
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
index 9e369546189..adbe17a38f6 100644
--- a/arch/i386/pci/numa.c
+++ b/arch/i386/pci/numa.c
@@ -115,6 +115,8 @@ static int __init pci_numa_init(void)
return 0;
pci_root_bus = pcibios_scan_root(0);
+ if (pci_root_bus)
+ pci_bus_add_devices(pci_root_bus);
if (num_online_nodes() > 1)
for_each_online_node(quad) {
if (quad == 0)
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index a8fc80ca69f..a80f0f55ff5 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -27,6 +27,7 @@
#define PCI_ASSIGN_ALL_BUSSES 0x4000
extern unsigned int pci_probe;
+extern unsigned long pirq_table_addr;
/* pci-i386.c */
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
index 6a924878443..314c933b6b8 100644
--- a/arch/i386/pci/visws.c
+++ b/arch/i386/pci/visws.c
@@ -21,7 +21,7 @@ static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
-void __init pcibios_penalize_isa_irq(int irq) {}
+void __init pcibios_penalize_isa_irq(int irq, int active) {}
unsigned int pci_bus0, pci_bus1;
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 6f521cf19a1..c547c1af6fa 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -22,9 +22,11 @@
#include <linux/device.h>
#include <linux/suspend.h>
#include <linux/acpi.h>
+
#include <asm/uaccess.h>
#include <asm/acpi.h>
#include <asm/tlbflush.h>
+#include <asm/processor.h>
static struct saved_context saved_context;
@@ -33,8 +35,6 @@ unsigned long saved_context_esp, saved_context_ebp;
unsigned long saved_context_esi, saved_context_edi;
unsigned long saved_context_eflags;
-extern void enable_sep_cpu(void *);
-
void __save_processor_state(struct saved_context *ctxt)
{
kernel_fpu_begin();
@@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt)
*/
asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
- asm volatile ("sldt %0" : "=m" (ctxt->ldt));
asm volatile ("str %0" : "=m" (ctxt->tr));
/*
@@ -107,7 +106,6 @@ static void fix_processor_context(void)
void __restore_processor_state(struct saved_context *ctxt)
{
-
/*
* control registers
*/
@@ -117,6 +115,13 @@ void __restore_processor_state(struct saved_context *ctxt)
asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
/*
+ * now restore the descriptor tables to their proper values
+ * ltr is done i fix_processor_context().
+ */
+ asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
+ asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+
+ /*
* segment registers
*/
asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
@@ -125,21 +130,14 @@ void __restore_processor_state(struct saved_context *ctxt)
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
- */
- asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
- asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
- asm volatile ("lldt %0" :: "m" (ctxt->ldt));
-
- /*
* sysenter MSRs
*/
if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu(NULL);
+ enable_sep_cpu();
fix_processor_context();
do_fpu_end();
+ mtrr_ap_init();
}
void restore_processor_state(void)