aboutsummaryrefslogtreecommitdiff
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig13
-rw-r--r--arch/i386/Makefile9
-rw-r--r--arch/i386/boot/Makefile36
-rw-r--r--arch/i386/boot/video.S2
-rw-r--r--arch/i386/kernel/acpi/boot.c8
-rw-r--r--arch/i386/kernel/cpu/common.c10
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c16
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h4
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c77
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c13
-rw-r--r--arch/i386/kernel/dmi_scan.c90
-rw-r--r--arch/i386/kernel/efi.c23
-rw-r--r--arch/i386/kernel/io_apic.c4
-rw-r--r--arch/i386/kernel/kprobes.c253
-rw-r--r--arch/i386/kernel/microcode.c24
-rw-r--r--arch/i386/kernel/nmi.c9
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/setup.c9
-rw-r--r--arch/i386/kernel/smpboot.c24
-rw-r--r--arch/i386/kernel/syscall_table.S2
-rw-r--r--arch/i386/kernel/timers/timer_pm.c104
-rw-r--r--arch/i386/kernel/traps.c34
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c2
-rw-r--r--arch/i386/mm/discontig.c12
-rw-r--r--arch/i386/mm/pgtable.c2
-rw-r--r--arch/i386/oprofile/nmi_int.c2
26 files changed, 558 insertions, 232 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b008fb0cd7b..f17bd1d2707 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -37,6 +37,10 @@ config GENERIC_IOMAP
bool
default y
+config GENERIC_HWEIGHT
+ bool
+ default y
+
config ARCH_MAY_HAVE_PC_FDC
bool
default y
@@ -227,6 +231,15 @@ config SCHED_SMT
cost of slightly increased overhead in some places. If unsure say
N here.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ default y
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index c848a5b3039..3e4adb1e224 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -103,7 +103,7 @@ AFLAGS += $(mflags-y)
boot := arch/i386/boot
PHONY += zImage bzImage compressed zlilo bzlilo \
- zdisk bzdisk fdimage fdimage144 fdimage288 install
+ zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
all: bzImage
@@ -122,7 +122,7 @@ zlilo bzlilo: vmlinux
zdisk bzdisk: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
-fdimage fdimage144 fdimage288: vmlinux
+fdimage fdimage144 fdimage288 isoimage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
install:
@@ -139,6 +139,9 @@ define archhelp
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' bzdisk - Create a boot floppy in /dev/fd0'
echo ' fdimage - Create a boot floppy image'
+ echo ' isoimage - Create a boot CD-ROM image'
endef
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
+ arch/$(ARCH)/boot/image.iso \
+ arch/$(ARCH)/boot/mtools.conf
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index f136752563b..33e55476381 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -62,8 +62,12 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
-# Set this if you want to pass append arguments to the zdisk/fdimage kernel
+# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
FDARGS =
+# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
+FDINITRD =
+
+image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
$(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
@@ -72,8 +76,11 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in
zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
syslinux /dev/fd0 ; sync
- echo 'default linux $(FDARGS)' | \
+ echo '$(image_cmdline)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
+ if [ -f '$(FDINITRD)' ] ; then \
+ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
+ fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync
# These require being root or having syslinux 2.02 or higher installed
@@ -81,18 +88,39 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
syslinux $(obj)/fdimage ; sync
- echo 'default linux $(FDARGS)' | \
+ echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
+ if [ -f '$(FDINITRD)' ] ; then \
+ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
+ fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync
fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
syslinux $(obj)/fdimage ; sync
- echo 'default linux $(FDARGS)' | \
+ echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
+ if [ -f '$(FDINITRD)' ] ; then \
+ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
+ fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync
+isoimage: $(BOOTIMAGE)
+ -rm -rf $(obj)/isoimage
+ mkdir $(obj)/isoimage
+ cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
+ $(obj)/isoimage
+ cp $(BOOTIMAGE) $(obj)/isoimage/linux
+ echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
+ if [ -f '$(FDINITRD)' ] ; then \
+ cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
+ fi
+ mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
+ -no-emul-boot -boot-load-size 4 -boot-info-table \
+ $(obj)/isoimage
+ rm -rf $(obj)/isoimage
+
zlilo: $(BOOTIMAGE)
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index 2ac40c8244c..0000a267453 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1924,6 +1924,7 @@ skip10: movb %ah, %al
ret
store_edid:
+#ifdef CONFIG_FB_FIRMWARE_EDID
pushw %es # just save all registers
pushw %ax
pushw %bx
@@ -1954,6 +1955,7 @@ store_edid:
popw %bx
popw %ax
popw %es
+#endif
ret
# VIDEO_SELECT-only variables
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index f1a21945963..033066176b3 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -668,10 +668,10 @@ unsigned long __init acpi_find_rsdp(void)
unsigned long rsdp_phys = 0;
if (efi_enabled) {
- if (efi.acpi20)
- return __pa(efi.acpi20);
- else if (efi.acpi)
- return __pa(efi.acpi);
+ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi20;
+ else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi;
}
/*
* Scan memory looking for the RSDP signature. First search EBDA (low
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 7e3d6b6a4e9..a06a49075f1 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -266,7 +266,7 @@ static void __init early_cpu_detect(void)
void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
- int junk;
+ int ebx;
if (have_cpuid_p()) {
/* Get vendor name */
@@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
/* Intel-defined flags: level 0x00000001 */
if ( c->cpuid_level >= 0x00000001 ) {
u32 capability, excap;
- cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
c->x86_capability[0] = capability;
c->x86_capability[4] = excap;
c->x86 = (tfms >> 8) & 15;
@@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
c->x86_mask = tfms & 15;
+#ifdef CONFIG_SMP
+ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+ c->apicid = (ebx >> 24) & 0xFF;
+#endif
} else {
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
@@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
cpuid(1, &eax, &ebx, &ecx, &edx);
- c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index e5bc06480ff..712a26bd445 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -40,6 +40,7 @@
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
#include <linux/acpi.h>
+#include <linux/mutex.h>
#include <acpi/processor.h>
#endif
@@ -49,7 +50,7 @@
#include "powernow-k8.h"
/* serialize freq changes */
-static DECLARE_MUTEX(fidvid_sem);
+static DEFINE_MUTEX(fidvid_mutex);
static struct powernow_k8_data *powernow_data[NR_CPUS];
@@ -943,17 +944,17 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
goto err_out;
- down(&fidvid_sem);
+ mutex_lock(&fidvid_mutex);
powernow_k8_acpi_pst_values(data, newstate);
if (transition_frequency(data, newstate)) {
printk(KERN_ERR PFX "transition frequency failed\n");
ret = 1;
- up(&fidvid_sem);
+ mutex_unlock(&fidvid_mutex);
goto err_out;
}
- up(&fidvid_sem);
+ mutex_unlock(&fidvid_mutex);
pol->cur = find_khz_freq_from_fid(data->currfid);
ret = 0;
@@ -1094,10 +1095,15 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
static unsigned int powernowk8_get (unsigned int cpu)
{
- struct powernow_k8_data *data = powernow_data[cpu];
+ struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
+ data = powernow_data[first_cpu(cpu_core_map[cpu])];
+
+ if (!data)
+ return -EINVAL;
+
set_cpus_allowed(current, cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 00ea899c17e..79a7c5c87ed 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -182,10 +182,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
-#ifndef for_each_cpu_mask
-#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++)
-#endif
-
#ifdef CONFIG_SMP
static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
{
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index ce61921369e..9df87b03612 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_SMP
+ unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+#endif
if (c->cpuid_level > 3) {
static int is_initialized;
@@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
break;
case 2:
new_l2 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l2_id = c->apicid >> index_msb;
break;
case 3:
new_l3 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l3_id = c->apicid >> index_msb;
break;
default:
break;
@@ -215,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
}
}
}
- if (c->cpuid_level > 1) {
+ /*
+ * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+ * trace cache
+ */
+ if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
/* supports eax=2 call */
int i, j, n;
int regs[4];
unsigned char *dp = (unsigned char *)regs;
+ int only_trace = 0;
+
+ if (num_cache_leaves != 0 && c->x86 == 15)
+ only_trace = 1;
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;
@@ -241,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
while (cache_table[k].descriptor != 0)
{
if (cache_table[k].descriptor == des) {
+ if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+ break;
switch (cache_table[k].cache_type) {
case LVL_1_INST:
l1i += cache_table[k].size;
@@ -266,34 +286,45 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
}
}
}
+ }
- if (new_l1d)
- l1d = new_l1d;
+ if (new_l1d)
+ l1d = new_l1d;
- if (new_l1i)
- l1i = new_l1i;
+ if (new_l1i)
+ l1i = new_l1i;
- if (new_l2)
- l2 = new_l2;
+ if (new_l2) {
+ l2 = new_l2;
+#ifdef CONFIG_SMP
+ cpu_llc_id[cpu] = l2_id;
+#endif
+ }
- if (new_l3)
- l3 = new_l3;
+ if (new_l3) {
+ l3 = new_l3;
+#ifdef CONFIG_SMP
+ cpu_llc_id[cpu] = l3_id;
+#endif
+ }
- if ( trace )
- printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if ( l1i )
- printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
- if ( l1d )
- printk(", L1 D cache: %dK\n", l1d);
- else
- printk("\n");
- if ( l2 )
- printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
- if ( l3 )
- printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+ if (trace)
+ printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+ else if ( l1i )
+ printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
- c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- }
+ if (l1d)
+ printk(", L1 D cache: %dK\n", l1d);
+ else
+ printk("\n");
+
+ if (l2)
+ printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+ if (l3)
+ printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+ c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;
}
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 3b4618bed70..fff90bda473 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#include <linux/mutex.h>
#include <asm/mtrr.h>
@@ -47,7 +48,7 @@
u32 num_var_ranges = 0;
unsigned int *usage_table;
-static DECLARE_MUTEX(mtrr_sem);
+static DEFINE_MUTEX(mtrr_mutex);
u32 size_or_mask, size_and_mask;
@@ -333,7 +334,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* No CPU hotplug when we change MTRR entries */
lock_cpu_hotplug();
/* Search for existing MTRR */
- down(&mtrr_sem);
+ mutex_lock(&mtrr_mutex);
for (i = 0; i < num_var_ranges; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
if (base >= lbase + lsize)
@@ -371,7 +372,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
printk(KERN_INFO "mtrr: no more MTRRs available\n");
error = i;
out:
- up(&mtrr_sem);
+ mutex_unlock(&mtrr_mutex);
unlock_cpu_hotplug();
return error;
}
@@ -464,7 +465,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
max = num_var_ranges;
/* No CPU hotplug when we change MTRR entries */
lock_cpu_hotplug();
- down(&mtrr_sem);
+ mutex_lock(&mtrr_mutex);
if (reg < 0) {
/* Search for existing MTRR */
for (i = 0; i < max; ++i) {
@@ -503,7 +504,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
set_mtrr(reg, 0, 0, 0);
error = reg;
out:
- up(&mtrr_sem);
+ mutex_unlock(&mtrr_mutex);
unlock_cpu_hotplug();
return error;
}
@@ -685,7 +686,7 @@ void mtrr_ap_init(void)
if (!mtrr_if || !use_intel())
return;
/*
- * Ideally we should hold mtrr_sem here to avoid mtrr entries changed,
+ * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
* but this routine will be called in cpu boot time, holding the lock
* breaks it. This routine is called in two cases: 1.very earily time
* of software resume, when there absolutely isn't mtrr entry changes;
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index ebc8dc116c4..5efceebc48d 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/dmi.h>
+#include <linux/efi.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <asm/dmi.h>
@@ -185,47 +186,72 @@ static void __init dmi_decode(struct dmi_header *dm)
}
}
-void __init dmi_scan_machine(void)
+static int __init dmi_present(char __iomem *p)
{
u8 buf[15];
- char __iomem *p, *q;
+ memcpy_fromio(buf, p, 15);
+ if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+ u16 num = (buf[13] << 8) | buf[12];
+ u16 len = (buf[7] << 8) | buf[6];
+ u32 base = (buf[11] << 24) | (buf[10] << 16) |
+ (buf[9] << 8) | buf[8];
- /*
- * no iounmap() for that ioremap(); it would be a no-op, but it's
- * so early in setup that sucker gets confused into doing what
- * it shouldn't if we actually call it.
- */
- p = ioremap(0xF0000, 0x10000);
- if (p == NULL)
- goto out;
-
- for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
- u16 num = (buf[13] << 8) | buf[12];
- u16 len = (buf[7] << 8) | buf[6];
- u32 base = (buf[11] << 24) | (buf[10] << 16) |
- (buf[9] << 8) | buf[8];
-
- /*
- * DMI version 0.0 means that the real version is taken from
- * the SMBIOS version, which we don't know at this point.
- */
- if (buf[14] != 0)
- printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14] >> 4, buf[14] & 0xF);
- else
- printk(KERN_INFO "DMI present.\n");
+ /*
+ * DMI version 0.0 means that the real version is taken from
+ * the SMBIOS version, which we don't know at this point.
+ */
+ if (buf[14] != 0)
+ printk(KERN_INFO "DMI %d.%d present.\n",
+ buf[14] >> 4, buf[14] & 0xF);
+ else
+ printk(KERN_INFO "DMI present.\n");
+ if (dmi_table(base,len, num, dmi_decode) == 0)
+ return 0;
+ }
+ return 1;
+}
- if (dmi_table(base,len, num, dmi_decode) == 0)
+void __init dmi_scan_machine(void)
+{
+ char __iomem *p, *q;
+ int rc;
+
+ if (efi_enabled) {
+ if (efi.smbios == EFI_INVALID_TABLE_ADDR)
+ goto out;
+
+ /* This is called as a core_initcall() because it isn't
+ * needed during early boot. This also means we can
+ * iounmap the space when we're done with it.
+ */
+ p = dmi_ioremap(efi.smbios, 32);
+ if (p == NULL)
+ goto out;
+
+ rc = dmi_present(p + 0x10); /* offset of _DMI_ string */
+ dmi_iounmap(p, 32);
+ if (!rc)
+ return;
+ }
+ else {
+ /*
+ * no iounmap() for that ioremap(); it would be a no-op, but
+ * it's so early in setup that sucker gets confused into doing
+ * what it shouldn't if we actually call it.
+ */
+ p = dmi_ioremap(0xF0000, 0x10000);
+ if (p == NULL)
+ goto out;
+
+ for (q = p; q < p + 0x10000; q += 16) {
+ rc = dmi_present(q);
+ if (!rc)
return;
}
}
-
-out: printk(KERN_INFO "DMI not present or invalid.\n");
+ out: printk(KERN_INFO "DMI not present or invalid.\n");
}
-
/**
* dmi_check_system - check system DMI data
* @list: array of dmi_system_id structures to match against
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 7ec6cfa01fb..9202b67c4b2 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -361,7 +361,7 @@ void __init efi_init(void)
*/
c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
if (c16) {
- for (i = 0; i < sizeof(vendor) && *c16; ++i)
+ for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i)
vendor[i] = *c16++;
vendor[i] = '\0';
} else
@@ -381,29 +381,38 @@ void __init efi_init(void)
if (config_tables == NULL)
printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
+ efi.mps = EFI_INVALID_TABLE_ADDR;
+ efi.acpi = EFI_INVALID_TABLE_ADDR;
+ efi.acpi20 = EFI_INVALID_TABLE_ADDR;
+ efi.smbios = EFI_INVALID_TABLE_ADDR;
+ efi.sal_systab = EFI_INVALID_TABLE_ADDR;
+ efi.boot_info = EFI_INVALID_TABLE_ADDR;
+ efi.hcdp = EFI_INVALID_TABLE_ADDR;
+ efi.uga = EFI_INVALID_TABLE_ADDR;
+
for (i = 0; i < num_config_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
- efi.mps = (void *)config_tables[i].table;
+ efi.mps = config_tables[i].table;
printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
} else
if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
- efi.acpi20 = __va(config_tables[i].table);
+ efi.acpi20 = config_tables[i].table;
printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
} else
if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
- efi.acpi = __va(config_tables[i].table);
+ efi.acpi = config_tables[i].table;
printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
} else
if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
- efi.smbios = (void *) config_tables[i].table;
+ efi.smbios = config_tables[i].table;
printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
} else
if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
- efi.hcdp = (void *)config_tables[i].table;
+ efi.hcdp = config_tables[i].table;
printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
} else
if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
- efi.uga = (void *)config_tables[i].table;
+ efi.uga = config_tables[i].table;
printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
}
}
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 311b4e7266f..3b329af4afc 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -381,7 +381,7 @@ static void do_irq_balance(void)
unsigned long imbalance = 0;
cpumask_t allowed_mask, target_cpu_mask, tmp;
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
int package_index;
CPU_IRQ(i) = 0;
if (!cpu_online(i))
@@ -632,7 +632,7 @@ static int __init balanced_irq_init(void)
else
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
kfree(irq_cpu_data[i].irq_delta);
irq_cpu_data[i].irq_delta = NULL;
kfree(irq_cpu_data[i].last_irq);
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 7a59050242a..f19768789e8 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -35,12 +35,56 @@
#include <asm/cacheflush.h>
#include <asm/kdebug.h>
#include <asm/desc.h>
+#include <asm/uaccess.h>
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+/* insert a jmp code */
+static inline void set_jmp_op(void *from, void *to)
+{
+ struct __arch_jmp_op {
+ char op;
+ long raddr;
+ } __attribute__((packed)) *jop;
+ jop = (struct __arch_jmp_op *)from;
+ jop->raddr = (long)(to) - ((long)(from) + 5);
+ jop->op = RELATIVEJUMP_INSTRUCTION;
+}
+
+/*
+ * returns non-zero if opcodes can be boosted.
+ */
+static inline int can_boost(kprobe_opcode_t opcode)
+{
+ switch (opcode & 0xf0 ) {
+ case 0x70:
+ return 0; /* can't boost conditional jump */
+ case 0x90:
+ /* can't boost call and pushf */
+ return opcode != 0x9a && opcode != 0x9c;
+ case 0xc0:
+ /* can't boost undefined opcodes and soft-interruptions */
+ return (0xc1 < opcode && opcode < 0xc6) ||
+ (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
+ case 0xd0:
+ /* can boost AA* and XLAT */
+ return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
+ case 0xe0:
+ /* can boost in/out and (may be) jmps */
+ return (0xe3 < opcode && opcode != 0xe8);
+ case 0xf0:
+ /* clear and set flags can be boost */
+ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+ default:
+ /* currently, can't boost 2 bytes opcodes */
+ return opcode != 0x0f;
+ }
+}
+
+
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -65,6 +109,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
+ if (can_boost(p->opcode)) {
+ p->ainsn.boostable = 0;
+ } else {
+ p->ainsn.boostable = -1;
+ }
return 0;
}
@@ -155,9 +204,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
- kprobe_opcode_t *addr = NULL;
- unsigned long *lp;
+ kprobe_opcode_t *addr;
struct kprobe_ctlblk *kcb;
+#ifdef CONFIG_PREEMPT
+ unsigned pre_preempt_count = preempt_count();
+#endif /* CONFIG_PREEMPT */
+
+ addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
/*
* We don't want to be preempted for the entire
@@ -166,17 +219,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_disable();
kcb = get_kprobe_ctlblk();
- /* Check if the application is using LDT entry for its code segment and
- * calculate the address by reading the base address from the LDT entry.
- */
- if ((regs->xcs & 4) && (current->mm)) {
- lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8)
- + (char *) current->mm->context.ldt);
- addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip -
- sizeof(kprobe_opcode_t));
- } else {
- addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
- }
/* Check we're not actually recursing */
if (kprobe_running()) {
p = get_kprobe(addr);
@@ -252,6 +294,21 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* handler has already set things up, so skip ss setup */
return 1;
+ if (p->ainsn.boostable == 1 &&
+#ifdef CONFIG_PREEMPT
+ !(pre_preempt_count) && /*
+ * This enables booster when the direct
+ * execution path aren't preempted.
+ */
+#endif /* CONFIG_PREEMPT */
+ !p->post_handler && !p->break_handler ) {
+ /* Boost up -- we can execute copied instructions directly */
+ reset_current_kprobe();
+ regs->eip = (unsigned long)p->ainsn.insn;
+ preempt_enable_no_resched();
+ return 1;
+ }
+
ss_probe:
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
@@ -267,17 +324,44 @@ no_kprobe:
* here. When a retprobed function returns, this probe is hit and
* trampoline_probe_handler() runs, calling the kretprobe's handler.
*/
- void kretprobe_trampoline_holder(void)
+ void __kprobes kretprobe_trampoline_holder(void)
{
- asm volatile ( ".global kretprobe_trampoline\n"
+ asm volatile ( ".global kretprobe_trampoline\n"
"kretprobe_trampoline: \n"
- "nop\n");
- }
+ " pushf\n"
+ /* skip cs, eip, orig_eax, es, ds */
+ " subl $20, %esp\n"
+ " pushl %eax\n"
+ " pushl %ebp\n"
+ " pushl %edi\n"
+ " pushl %esi\n"
+ " pushl %edx\n"
+ " pushl %ecx\n"
+ " pushl %ebx\n"
+ " movl %esp, %eax\n"
+ " call trampoline_handler\n"
+ /* move eflags to cs */
+ " movl 48(%esp), %edx\n"
+ " movl %edx, 44(%esp)\n"
+ /* save true return address on eflags */
+ " movl %eax, 48(%esp)\n"
+ " popl %ebx\n"
+ " popl %ecx\n"
+ " popl %edx\n"
+ " popl %esi\n"
+ " popl %edi\n"
+ " popl %ebp\n"
+ " popl %eax\n"
+ /* skip eip, orig_eax, es, ds */
+ " addl $16, %esp\n"
+ " popf\n"
+ " ret\n");
+}
/*
- * Called when we hit the probe point at kretprobe_trampoline
+ * Called from kretprobe_trampoline
*/
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
@@ -306,8 +390,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
/* another task is sharing our hash bucket */
continue;
- if (ri->rp && ri->rp->handler)
+ if (ri->rp && ri->rp->handler){
+ __get_cpu_var(current_kprobe) = &ri->rp->kp;
ri->rp->handler(ri, regs);
+ __get_cpu_var(current_kprobe) = NULL;
+ }
orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri);
@@ -322,18 +409,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
}
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
- regs->eip = orig_ret_address;
- reset_current_kprobe();
spin_unlock_irqrestore(&kretprobe_lock, flags);
- preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+ return (void*)orig_ret_address;
}
/*
@@ -357,15 +436,17 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* 2) If the single-stepped instruction was a call, the return address
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
+ *
+ * This function also checks instruction size for preparing direct execution.
*/
static void __kprobes resume_execution(struct kprobe *p,
struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = (unsigned long *)&regs->esp;
- unsigned long next_eip = 0;
unsigned long copy_eip = (unsigned long)p->ainsn.insn;
unsigned long orig_eip = (unsigned long)p->addr;
+ regs->eflags &= ~TF_MASK;
switch (p->ainsn.insn[0]) {
case 0x9c: /* pushfl */
*tos &= ~(TF_MASK | IF_MASK);
@@ -375,37 +456,51 @@ static void __kprobes resume_execution(struct kprobe *p,
case 0xcb:
case 0xc2:
case 0xca:
- regs->eflags &= ~TF_MASK;
- /* eip is already adjusted, no more changes required*/
- return;
+ case 0xea: /* jmp absolute -- eip is correct */
+ /* eip is already adjusted, no more changes required */
+ p->ainsn.boostable = 1;
+ goto no_change;
case 0xe8: /* call relative - Fix return addr */
*tos = orig_eip + (*tos - copy_eip);
break;
case 0xff:
if ((p->ainsn.insn[1] & 0x30) == 0x10) {
/* call absolute, indirect */
- /* Fix return addr; eip is correct. */
- next_eip = regs->eip;
+ /*
+ * Fix return addr; eip is correct.
+ * But this is not boostable
+ */
*tos = orig_eip + (*tos - copy_eip);
+ goto no_change;
} else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
- /* eip is correct. */
- next_eip = regs->eip;
+ /* eip is correct. And this is boostable */
+ p->ainsn.boostable = 1;
+ goto no_change;
}
- break;
- case 0xea: /* jmp absolute -- eip is correct */
- next_eip = regs->eip;
- break;
default:
break;
}
- regs->eflags &= ~TF_MASK;
- if (next_eip) {
- regs->eip = next_eip;
- } else {
- regs->eip = orig_eip + (regs->eip - copy_eip);
+ if (p->ainsn.boostable == 0) {
+ if ((regs->eip > copy_eip) &&
+ (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
+ /*
+ * These instructions can be executed directly if it
+ * jumps back to correct address.
+ */
+ set_jmp_op((void *)regs->eip,
+ (void *)orig_eip + (regs->eip - copy_eip));
+ p->ainsn.boostable = 1;
+ } else {
+ p->ainsn.boostable = -1;
+ }
}
+
+ regs->eip = orig_eip + (regs->eip - copy_eip);
+
+no_change:
+ return;
}
/*
@@ -453,15 +548,57 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- if (kcb->kprobe_status & KPROBE_HIT_SS) {
- resume_execution(cur, regs, kcb);
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the eip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->eip = (unsigned long)cur->addr;
regs->eflags |= kcb->kprobe_old_eflags;
-
- reset_current_kprobe();
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accouting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
}
return 0;
}
@@ -475,6 +612,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
+ if (args->regs && user_mode(args->regs))
+ return ret;
+
switch (val) {
case DIE_INT3:
if (kprobe_handler(args->regs))
@@ -564,12 +704,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
int __init arch_init_kprobes(void)
{
- return register_kprobe(&trampoline_p);
+ return 0;
}
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index 55bc365b875..e7c138f66c5 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -81,6 +81,7 @@
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
@@ -114,7 +115,7 @@ MODULE_LICENSE("GPL");
static DEFINE_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DECLARE_MUTEX(microcode_sem);
+static DEFINE_MUTEX(microcode_mutex);
static void __user *user_buffer; /* user area microcode data buffer */
static unsigned int user_buffer_size; /* it's size */
@@ -444,7 +445,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
return -EINVAL;
}
- down(&microcode_sem);
+ mutex_lock(&microcode_mutex);
user_buffer = (void __user *) buf;
user_buffer_size = (int) len;
@@ -453,31 +454,14 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
if (!ret)
ret = (ssize_t)len;
- up(&microcode_sem);
+ mutex_unlock(&microcode_mutex);
return ret;
}
-static int microcode_ioctl (struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- /*
- * XXX: will be removed after microcode_ctl
- * is updated to ignore failure of this ioctl()
- */
- case MICROCODE_IOCFREE:
- return 0;
- default:
- return -EINVAL;
- }
- return -EINVAL;
-}
-
static struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
- .ioctl = microcode_ioctl,
.open = microcode_open,
};
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 9074818b947..d43b498ec74 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -138,12 +138,12 @@ static int __init check_nmi_watchdog(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
- for_each_cpu(cpu)
+ for_each_possible_cpu(cpu)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
- for_each_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
#ifdef CONFIG_SMP
/* Check cpu_callin_map here because that is set
after the timer is started. */
@@ -510,7 +510,7 @@ void touch_nmi_watchdog (void)
* Just reset the alert counters, (other CPUs might be
* spinning on locks we hold):
*/
- for_each_cpu(i)
+ for_each_possible_cpu(i)
alert_counter[i] = 0;
/*
@@ -529,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
* always switch the stack NMI-atomically, it's safe to use
* smp_processor_id().
*/
- int sum, cpu = smp_processor_id();
+ unsigned int sum;
+ int cpu = smp_processor_id();
sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 299e6167408..24b3e745478 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -38,7 +38,6 @@
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
-#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -364,13 +363,6 @@ void exit_thread(void)
struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
int cpu = get_cpu();
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index d313a11acaf..8c08660b4e5 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -46,6 +46,7 @@
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/dmi.h>
+#include <linux/pfn.h>
#include <video/edid.h>
@@ -1058,10 +1059,10 @@ static int __init
free_available_memory(unsigned long start, unsigned long end, void *arg)
{
/* check max_low_pfn */
- if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ if (start >= (max_low_pfn << PAGE_SHIFT))
return 0;
- if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
- end = (max_low_pfn + 1) << PAGE_SHIFT;
+ if (end >= (max_low_pfn << PAGE_SHIFT))
+ end = max_low_pfn << PAGE_SHIFT;
if (start < end)
free_bootmem(start, end - start);
@@ -1286,8 +1287,6 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
probe_roms();
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 82371d83bfa..a6969903f2d 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
/* Core ID of each logical CPU */
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
+/* Last level cache ID of each logical CPU */
+int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+
/* representing HT siblings of each logical CPU */
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
@@ -440,6 +443,18 @@ static void __devinit smp_callin(void)
static int cpucount;
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+ /*
+ * For perf, we return last level cache shared map.
+ * TBD: when power saving sched policy is added, we will return
+ * cpu_core_map when power saving policy is enabled
+ */
+ return c->llc_shared_map;
+}
+
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
@@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu)
cpu_set(cpu, cpu_sibling_map[i]);
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
+ cpu_set(i, c[cpu].llc_shared_map);
+ cpu_set(cpu, c[i].llc_shared_map);
}
}
} else {
cpu_set(cpu, cpu_sibling_map[cpu]);
}
+ cpu_set(cpu, c[cpu].llc_shared_map);
+
if (current_cpu_data.x86_max_cores == 1) {
cpu_core_map[cpu] = cpu_sibling_map[cpu];
c[cpu].booted_cores = 1;
@@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu)
}
for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ if (cpu_llc_id[cpu] != BAD_APICID &&
+ cpu_llc_id[cpu] == cpu_llc_id[i]) {
+ cpu_set(i, c[cpu].llc_shared_map);
+ cpu_set(cpu, c[i].llc_shared_map);
+ }
if (phys_proc_id[cpu] == phys_proc_id[i]) {
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index ac687d00a1c..326595f3fa4 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -310,3 +310,5 @@ ENTRY(sys_call_table)
.long sys_pselect6
.long sys_ppoll
.long sys_unshare /* 310 */
+ .long sys_set_robust_list
+ .long sys_get_robust_list
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 264edaaac31..144e94a0493 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/init.h>
+#include <linux/pci.h>
#include <asm/types.h>
#include <asm/timer.h>
#include <asm/smp.h>
@@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+static int pmtmr_need_workaround __read_mostly = 1;
+
/*helper function to safely read acpi pm timesource*/
static inline u32 read_pmtmr(void)
{
- u32 v1=0,v2=0,v3=0;
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
+ if (pmtmr_need_workaround) {
+ u32 v1, v2, v3;
+
+ /* It has been reported that because of various broken
+ * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+ * source is not latched, so you must read it multiple
+ * times to insure a safe value is read.
+ */
+ do {
+ v1 = inl(pmtmr_ioport);
+ v2 = inl(pmtmr_ioport);
+ v3 = inl(pmtmr_ioport);
+ } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2));
+
+ /* mask the output to 24 bits */
+ return v2 & ACPI_PM_MASK;
+ }
+
+ return inl(pmtmr_ioport) & ACPI_PM_MASK;
}
@@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = {
.opts = &timer_pmtmr,
};
+#ifdef CONFIG_PCI
+/*
+ * PIIX4 Errata:
+ *
+ * The power management timer may return improper results when read.
+ * Although the timer value settles properly after incrementing,
+ * while incrementing there is a 3 ns window every 69.8 ns where the
+ * timer value is indeterminate (a 4.2% chance that the data will be
+ * incorrect when read). As a result, the ACPI free running count up
+ * timer specification is violated due to erroneous reads.
+ */
+static int __init pmtmr_bug_check(void)
+{
+ static struct pci_device_id gray_list[] __initdata = {
+ /* these chipsets may have bug. */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801DB_0) },
+ { },
+ };
+ struct pci_dev *dev;
+ int pmtmr_has_bug = 0;
+ u8 rev;
+
+ if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
+ return 0;
+
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
+ if (dev) {
+ pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+ /* the bug has been fixed in PIIX4M */
+ if (rev < 3) {
+ printk(KERN_WARNING "* Found PM-Timer Bug on this "
+ "chipset. Due to workarounds for a bug,\n"
+ "* this time source is slow. Consider trying "
+ "other time sources (clock=)\n");
+ pmtmr_has_bug = 1;
+ }
+ pci_dev_put(dev);
+ }
+
+ if (pci_dev_present(gray_list)) {
+ printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
+ " to workarounds for a bug,\n"
+ "* this time source is slow. If you are sure your timer"
+ " does not have\n"
+ "* this bug, please use \"pmtmr_good\" to disable the "
+ "workaround\n");
+ pmtmr_has_bug = 1;
+ }
+
+ if (!pmtmr_has_bug)
+ pmtmr_need_workaround = 0;
+
+ return 0;
+}
+device_initcall(pmtmr_bug_check);
+#endif
+
+static int __init pmtr_good_setup(char *__str)
+{
+ pmtmr_need_workaround = 0;
+ return 1;
+}
+__setup("pmtmr_good", pmtr_good_setup);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index de5386b01d3..6b63a5aa1e4 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -92,22 +92,21 @@ asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
{
- int err = 0;
- unsigned long flags;
-
vmalloc_sync_all();
- spin_lock_irqsave(&die_notifier_lock, flags);
- err = notifier_chain_register(&i386die_chain, nb);
- spin_unlock_irqrestore(&die_notifier_lock, flags);
- return err;
+ return atomic_notifier_chain_register(&i386die_chain, nb);
}
EXPORT_SYMBOL(register_die_notifier);
+int unregister_die_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
{
return p > (void *)tinfo &&
@@ -386,8 +385,12 @@ void die(const char * str, struct pt_regs * regs, long err)
#endif
if (nl)
printk("\n");
- notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
- show_registers(regs);
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) !=
+ NOTIFY_STOP)
+ show_registers(regs);
+ else
+ regs = NULL;
} else
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
@@ -395,6 +398,9 @@ void die(const char * str, struct pt_regs * regs, long err)
die.lock_owner = -1;
spin_unlock_irqrestore(&die.lock, flags);
+ if (!regs)
+ return;
+
if (kexec_should_crash(current))
crash_kexec(regs);
@@ -623,7 +629,7 @@ static DEFINE_SPINLOCK(nmi_print_lock);
void die_nmi (struct pt_regs *regs, const char *msg)
{
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
NOTIFY_STOP)
return;
@@ -662,7 +668,7 @@ static void default_do_nmi(struct pt_regs * regs)
reason = get_nmi_reason();
if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
#ifdef CONFIG_X86_LOCAL_APIC
@@ -678,7 +684,7 @@ static void default_do_nmi(struct pt_regs * regs)
unknown_nmi_error(reason, regs);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
return;
if (reason & 0x80)
mem_parity_error(reason, regs);
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 8165626a5c3..70e560a1b79 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -1700,7 +1700,7 @@ after_handle_vic_irq(unsigned int irq)
printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
cpu, irq);
- for_each_cpu(real_cpu, mask) {
+ for_each_possible_cpu(real_cpu, mask) {
outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
VIC_PROCESSOR_ID);
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index c4af9638dbf..fe6eb901326 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -31,6 +31,7 @@
#include <linux/nodemask.h>
#include <linux/module.h>
#include <linux/kexec.h>
+#include <linux/pfn.h>
#include <asm/e820.h>
#include <asm/setup.h>
@@ -352,17 +353,6 @@ void __init zone_sizes_init(void)
{
int nid;
- /*
- * Insert nodes into pgdat_list backward so they appear in order.
- * Clobber node 0's links and NULL out pgdat_list before starting.
- */
- pgdat_list = NULL;
- for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) {
- if (!node_online(nid))
- continue;
- NODE_DATA(nid)->pgdat_next = pgdat_list;
- pgdat_list = NODE_DATA(nid);
- }
for_each_online_node(nid) {
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 9db3242103b..2889567e21a 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -36,7 +36,7 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 1accce50c2c..1a2076ce6f6 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy)
static void free_msrs(void)
{
int i;
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
kfree(cpu_msrs[i].counters);
cpu_msrs[i].counters = NULL;
kfree(cpu_msrs[i].controls);