diff options
Diffstat (limited to 'arch/i386')
26 files changed, 558 insertions, 232 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b008fb0cd7b..f17bd1d2707 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -37,6 +37,10 @@ config GENERIC_IOMAP bool default y +config GENERIC_HWEIGHT + bool + default y + config ARCH_MAY_HAVE_PC_FDC bool default y @@ -227,6 +231,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config X86_UP_APIC diff --git a/arch/i386/Makefile b/arch/i386/Makefile index c848a5b3039..3e4adb1e224 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -103,7 +103,7 @@ AFLAGS += $(mflags-y) boot := arch/i386/boot PHONY += zImage bzImage compressed zlilo bzlilo \ - zdisk bzdisk fdimage fdimage144 fdimage288 install + zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install all: bzImage @@ -122,7 +122,7 @@ zlilo bzlilo: vmlinux zdisk bzdisk: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk -fdimage fdimage144 fdimage288: vmlinux +fdimage fdimage144 fdimage288 isoimage: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ install: @@ -139,6 +139,9 @@ define archhelp echo ' install to $$(INSTALL_PATH) and run lilo' echo ' bzdisk - Create a boot floppy in /dev/fd0' echo ' fdimage - Create a boot floppy image' + echo ' isoimage - Create a boot CD-ROM image' endef -CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += arch/$(ARCH)/boot/fdimage \ + arch/$(ARCH)/boot/image.iso \ + arch/$(ARCH)/boot/mtools.conf diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index f136752563b..33e55476381 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -62,8 +62,12 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ -# Set this if you want to pass append arguments to the zdisk/fdimage kernel +# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel FDARGS = +# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel +FDINITRD = + +image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ @@ -72,8 +76,11 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in zdisk: $(BOOTIMAGE) $(obj)/mtools.conf MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync syslinux /dev/fd0 ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync # These require being root or having syslinux 2.02 or higher installed @@ -81,18 +88,39 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync +isoimage: $(BOOTIMAGE) + -rm -rf $(obj)/isoimage + mkdir $(obj)/isoimage + cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ + $(obj)/isoimage + cp $(BOOTIMAGE) $(obj)/isoimage/linux + echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ + fi + mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ + -no-emul-boot -boot-load-size 4 -boot-info-table \ + $(obj)/isoimage + rm -rf $(obj)/isoimage + zlilo: $(BOOTIMAGE) if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 2ac40c8244c..0000a267453 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -1924,6 +1924,7 @@ skip10: movb %ah, %al ret store_edid: +#ifdef CONFIG_FB_FIRMWARE_EDID pushw %es # just save all registers pushw %ax pushw %bx @@ -1954,6 +1955,7 @@ store_edid: popw %bx popw %ax popw %es +#endif ret # VIDEO_SELECT-only variables diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index f1a21945963..033066176b3 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -668,10 +668,10 @@ unsigned long __init acpi_find_rsdp(void) unsigned long rsdp_phys = 0; if (efi_enabled) { - if (efi.acpi20) - return __pa(efi.acpi20); - else if (efi.acpi) - return __pa(efi.acpi); + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + return efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + return efi.acpi; } /* * Scan memory looking for the RSDP signature. First search EBDA (low diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6a4e9..a06a49075f1 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e5bc06480ff..712a26bd445 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86_POWERNOW_K8_ACPI #include <linux/acpi.h> +#include <linux/mutex.h> #include <acpi/processor.h> #endif @@ -49,7 +50,7 @@ #include "powernow-k8.h" /* serialize freq changes */ -static DECLARE_MUTEX(fidvid_sem); +static DEFINE_MUTEX(fidvid_mutex); static struct powernow_k8_data *powernow_data[NR_CPUS]; @@ -943,17 +944,17 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; - down(&fidvid_sem); + mutex_lock(&fidvid_mutex); powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); goto err_out; } - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1094,10 +1095,15 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) static unsigned int powernowk8_get (unsigned int cpu) { - struct powernow_k8_data *data = powernow_data[cpu]; + struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 00ea899c17e..79a7c5c87ed 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -182,10 +182,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -#ifndef for_each_cpu_mask -#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) -#endif - #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921369e..9df87b03612 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -215,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - if (c->cpuid_level > 1) { + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -241,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -266,34 +286,45 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } + } - if (new_l1d) - l1d = new_l1d; + if (new_l1d) + l1d = new_l1d; - if (new_l1i) - l1i = new_l1i; + if (new_l1i) + l1i = new_l1i; - if (new_l2) - l2 = new_l2; + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) - l3 = new_l3; + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - } + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; } diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 3b4618bed70..fff90bda473 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <linux/mutex.h> #include <asm/mtrr.h> @@ -47,7 +48,7 @@ u32 num_var_ranges = 0; unsigned int *usage_table; -static DECLARE_MUTEX(mtrr_sem); +static DEFINE_MUTEX(mtrr_mutex); u32 size_or_mask, size_and_mask; @@ -333,7 +334,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); /* Search for existing MTRR */ - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (base >= lbase + lsize) @@ -371,7 +372,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -464,7 +465,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { @@ -503,7 +504,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) set_mtrr(reg, 0, 0, 0); error = reg; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -685,7 +686,7 @@ void mtrr_ap_init(void) if (!mtrr_if || !use_intel()) return; /* - * Ideally we should hold mtrr_sem here to avoid mtrr entries changed, + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, * but this routine will be called in cpu boot time, holding the lock * breaks it. This routine is called in two cases: 1.very earily time * of software resume, when there absolutely isn't mtrr entry changes; diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ebc8dc116c4..5efceebc48d 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/dmi.h> +#include <linux/efi.h> #include <linux/bootmem.h> #include <linux/slab.h> #include <asm/dmi.h> @@ -185,47 +186,72 @@ static void __init dmi_decode(struct dmi_header *dm) } } -void __init dmi_scan_machine(void) +static int __init dmi_present(char __iomem *p) { u8 buf[15]; - char __iomem *p, *q; + memcpy_fromio(buf, p, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); - if (p == NULL) - goto out; - - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; - - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + if (dmi_table(base,len, num, dmi_decode) == 0) + return 0; + } + return 1; +} - if (dmi_table(base,len, num, dmi_decode) == 0) +void __init dmi_scan_machine(void) +{ + char __iomem *p, *q; + int rc; + + if (efi_enabled) { + if (efi.smbios == EFI_INVALID_TABLE_ADDR) + goto out; + + /* This is called as a core_initcall() because it isn't + * needed during early boot. This also means we can + * iounmap the space when we're done with it. + */ + p = dmi_ioremap(efi.smbios, 32); + if (p == NULL) + goto out; + + rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + dmi_iounmap(p, 32); + if (!rc) + return; + } + else { + /* + * no iounmap() for that ioremap(); it would be a no-op, but + * it's so early in setup that sucker gets confused into doing + * what it shouldn't if we actually call it. + */ + p = dmi_ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; + + for (q = p; q < p + 0x10000; q += 16) { + rc = dmi_present(q); + if (!rc) return; } } - -out: printk(KERN_INFO "DMI not present or invalid.\n"); + out: printk(KERN_INFO "DMI not present or invalid.\n"); } - /** * dmi_check_system - check system DMI data * @list: array of dmi_system_id structures to match against diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 7ec6cfa01fb..9202b67c4b2 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -361,7 +361,7 @@ void __init efi_init(void) */ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else @@ -381,29 +381,38 @@ void __init efi_init(void) if (config_tables == NULL) printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < num_config_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = (void *)config_tables[i].table; + efi.mps = config_tables[i].table; printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = (void *) config_tables[i].table; + efi.smbios = config_tables[i].table; printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = (void *)config_tables[i].table; + efi.hcdp = config_tables[i].table; printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { - efi.uga = (void *)config_tables[i].table; + efi.uga = config_tables[i].table; printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); } } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 311b4e7266f..3b329af4afc 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -381,7 +381,7 @@ static void do_irq_balance(void) unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; - for_each_cpu(i) { + for_each_possible_cpu(i) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) @@ -632,7 +632,7 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for_each_cpu(i) { + for_each_possible_cpu(i) { kfree(irq_cpu_data[i].irq_delta); irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 7a59050242a..f19768789e8 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -35,12 +35,56 @@ #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> +#include <asm/uaccess.h> void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* insert a jmp code */ +static inline void set_jmp_op(void *from, void *to) +{ + struct __arch_jmp_op { + char op; + long raddr; + } __attribute__((packed)) *jop; + jop = (struct __arch_jmp_op *)from; + jop->raddr = (long)(to) - ((long)(from) + 5); + jop->op = RELATIVEJUMP_INSTRUCTION; +} + +/* + * returns non-zero if opcodes can be boosted. + */ +static inline int can_boost(kprobe_opcode_t opcode) +{ + switch (opcode & 0xf0 ) { + case 0x70: + return 0; /* can't boost conditional jump */ + case 0x90: + /* can't boost call and pushf */ + return opcode != 0x9a && opcode != 0x9c; + case 0xc0: + /* can't boost undefined opcodes and soft-interruptions */ + return (0xc1 < opcode && opcode < 0xc6) || + (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf; + case 0xd0: + /* can boost AA* and XLAT */ + return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); + case 0xe0: + /* can boost in/out and (may be) jmps */ + return (0xe3 < opcode && opcode != 0xe8); + case 0xf0: + /* clear and set flags can be boost */ + return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); + default: + /* currently, can't boost 2 bytes opcodes */ + return opcode != 0x0f; + } +} + + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -65,6 +109,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + if (can_boost(p->opcode)) { + p->ainsn.boostable = 0; + } else { + p->ainsn.boostable = -1; + } return 0; } @@ -155,9 +204,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; - kprobe_opcode_t *addr = NULL; - unsigned long *lp; + kprobe_opcode_t *addr; struct kprobe_ctlblk *kcb; +#ifdef CONFIG_PREEMPT + unsigned pre_preempt_count = preempt_count(); +#endif /* CONFIG_PREEMPT */ + + addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire @@ -166,17 +219,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_disable(); kcb = get_kprobe_ctlblk(); - /* Check if the application is using LDT entry for its code segment and - * calculate the address by reading the base address from the LDT entry. - */ - if ((regs->xcs & 4) && (current->mm)) { - lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8) - + (char *) current->mm->context.ldt); - addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip - - sizeof(kprobe_opcode_t)); - } else { - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); - } /* Check we're not actually recursing */ if (kprobe_running()) { p = get_kprobe(addr); @@ -252,6 +294,21 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* handler has already set things up, so skip ss setup */ return 1; + if (p->ainsn.boostable == 1 && +#ifdef CONFIG_PREEMPT + !(pre_preempt_count) && /* + * This enables booster when the direct + * execution path aren't preempted. + */ +#endif /* CONFIG_PREEMPT */ + !p->post_handler && !p->break_handler ) { + /* Boost up -- we can execute copied instructions directly */ + reset_current_kprobe(); + regs->eip = (unsigned long)p->ainsn.insn; + preempt_enable_no_resched(); + return 1; + } + ss_probe: prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -267,17 +324,44 @@ no_kprobe: * here. When a retprobed function returns, this probe is hit and * trampoline_probe_handler() runs, calling the kretprobe's handler. */ - void kretprobe_trampoline_holder(void) + void __kprobes kretprobe_trampoline_holder(void) { - asm volatile ( ".global kretprobe_trampoline\n" + asm volatile ( ".global kretprobe_trampoline\n" "kretprobe_trampoline: \n" - "nop\n"); - } + " pushf\n" + /* skip cs, eip, orig_eax, es, ds */ + " subl $20, %esp\n" + " pushl %eax\n" + " pushl %ebp\n" + " pushl %edi\n" + " pushl %esi\n" + " pushl %edx\n" + " pushl %ecx\n" + " pushl %ebx\n" + " movl %esp, %eax\n" + " call trampoline_handler\n" + /* move eflags to cs */ + " movl 48(%esp), %edx\n" + " movl %edx, 44(%esp)\n" + /* save true return address on eflags */ + " movl %eax, 48(%esp)\n" + " popl %ebx\n" + " popl %ecx\n" + " popl %edx\n" + " popl %esi\n" + " popl %edi\n" + " popl %ebp\n" + " popl %eax\n" + /* skip eip, orig_eax, es, ds */ + " addl $16, %esp\n" + " popf\n" + " ret\n"); +} /* - * Called when we hit the probe point at kretprobe_trampoline + * Called from kretprobe_trampoline */ -int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -306,8 +390,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) + if (ri->rp && ri->rp->handler){ + __get_cpu_var(current_kprobe) = &ri->rp->kp; ri->rp->handler(ri, regs); + __get_cpu_var(current_kprobe) = NULL; + } orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); @@ -322,18 +409,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); - regs->eip = orig_ret_address; - reset_current_kprobe(); spin_unlock_irqrestore(&kretprobe_lock, flags); - preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + return (void*)orig_ret_address; } /* @@ -357,15 +436,17 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. + * + * This function also checks instruction size for preparing direct execution. */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)®s->esp; - unsigned long next_eip = 0; unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; + regs->eflags &= ~TF_MASK; switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); @@ -375,37 +456,51 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xcb: case 0xc2: case 0xca: - regs->eflags &= ~TF_MASK; - /* eip is already adjusted, no more changes required*/ - return; + case 0xea: /* jmp absolute -- eip is correct */ + /* eip is already adjusted, no more changes required */ + p->ainsn.boostable = 1; + goto no_change; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; case 0xff: if ((p->ainsn.insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ - /* Fix return addr; eip is correct. */ - next_eip = regs->eip; + /* + * Fix return addr; eip is correct. + * But this is not boostable + */ *tos = orig_eip + (*tos - copy_eip); + goto no_change; } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. */ - next_eip = regs->eip; + /* eip is correct. And this is boostable */ + p->ainsn.boostable = 1; + goto no_change; } - break; - case 0xea: /* jmp absolute -- eip is correct */ - next_eip = regs->eip; - break; default: break; } - regs->eflags &= ~TF_MASK; - if (next_eip) { - regs->eip = next_eip; - } else { - regs->eip = orig_eip + (regs->eip - copy_eip); + if (p->ainsn.boostable == 0) { + if ((regs->eip > copy_eip) && + (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { + /* + * These instructions can be executed directly if it + * jumps back to correct address. + */ + set_jmp_op((void *)regs->eip, + (void *)orig_eip + (regs->eip - copy_eip)); + p->ainsn.boostable = 1; + } else { + p->ainsn.boostable = -1; + } } + + regs->eip = orig_eip + (regs->eip - copy_eip); + +no_change: + return; } /* @@ -453,15 +548,57 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the eip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->eip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_eflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -475,6 +612,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -564,12 +704,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return 0; } diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 55bc365b875..e7c138f66c5 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -81,6 +81,7 @@ #include <linux/miscdevice.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/mutex.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -114,7 +115,7 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DECLARE_MUTEX(microcode_sem); +static DEFINE_MUTEX(microcode_mutex); static void __user *user_buffer; /* user area microcode data buffer */ static unsigned int user_buffer_size; /* it's size */ @@ -444,7 +445,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ return -EINVAL; } - down(µcode_sem); + mutex_lock(µcode_mutex); user_buffer = (void __user *) buf; user_buffer_size = (int) len; @@ -453,31 +454,14 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ if (!ret) ret = (ssize_t)len; - up(µcode_sem); + mutex_unlock(µcode_mutex); return ret; } -static int microcode_ioctl (struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - /* - * XXX: will be removed after microcode_ctl - * is updated to ignore failure of this ioctl() - */ - case MICROCODE_IOCFREE: - return 0; - default: - return -EINVAL; - } - return -EINVAL; -} - static struct file_operations microcode_fops = { .owner = THIS_MODULE, .write = microcode_write, - .ioctl = microcode_ioctl, .open = microcode_open, }; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 9074818b947..d43b498ec74 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -138,12 +138,12 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - for_each_cpu(cpu) + for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for_each_cpu(i) + for_each_possible_cpu(i) alert_counter[i] = 0; /* @@ -529,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) * always switch the stack NMI-atomically, it's safe to use * smp_processor_id(). */ - int sum, cpu = smp_processor_id(); + unsigned int sum; + int cpu = smp_processor_id(); sum = per_cpu(irq_stat, cpu).apic_timer_irqs; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 299e6167408..24b3e745478 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -38,7 +38,6 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> -#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -364,13 +363,6 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index d313a11acaf..8c08660b4e5 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -46,6 +46,7 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/dmi.h> +#include <linux/pfn.h> #include <video/edid.h> @@ -1058,10 +1059,10 @@ static int __init free_available_memory(unsigned long start, unsigned long end, void *arg) { /* check max_low_pfn */ - if (start >= ((max_low_pfn + 1) << PAGE_SHIFT)) + if (start >= (max_low_pfn << PAGE_SHIFT)) return 0; - if (end >= ((max_low_pfn + 1) << PAGE_SHIFT)) - end = (max_low_pfn + 1) << PAGE_SHIFT; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; if (start < end) free_bootmem(start, end - start); @@ -1286,8 +1287,6 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d83bfa..a6969903f2d 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index ac687d00a1c..326595f3fa4 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -310,3 +310,5 @@ ENTRY(sys_call_table) .long sys_pselect6 .long sys_ppoll .long sys_unshare /* 310 */ + .long sys_set_robust_list + .long sys_get_robust_list diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 264edaaac31..144e94a0493 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/pci.h> #include <asm/types.h> #include <asm/timer.h> #include <asm/smp.h> @@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ +static int pmtmr_need_workaround __read_mostly = 1; + /*helper function to safely read acpi pm timesource*/ static inline u32 read_pmtmr(void) { - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; + if (pmtmr_need_workaround) { + u32 v1, v2, v3; + + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; + } + + return inl(pmtmr_ioport) & ACPI_PM_MASK; } @@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = { .opts = &timer_pmtmr, }; +#ifdef CONFIG_PCI +/* + * PIIX4 Errata: + * + * The power management timer may return improper results when read. + * Although the timer value settles properly after incrementing, + * while incrementing there is a 3 ns window every 69.8 ns where the + * timer value is indeterminate (a 4.2% chance that the data will be + * incorrect when read). As a result, the ACPI free running count up + * timer specification is violated due to erroneous reads. + */ +static int __init pmtmr_bug_check(void) +{ + static struct pci_device_id gray_list[] __initdata = { + /* these chipsets may have bug. */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_0) }, + { }, + }; + struct pci_dev *dev; + int pmtmr_has_bug = 0; + u8 rev; + + if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) + return 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if (dev) { + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + /* the bug has been fixed in PIIX4M */ + if (rev < 3) { + printk(KERN_WARNING "* Found PM-Timer Bug on this " + "chipset. Due to workarounds for a bug,\n" + "* this time source is slow. Consider trying " + "other time sources (clock=)\n"); + pmtmr_has_bug = 1; + } + pci_dev_put(dev); + } + + if (pci_dev_present(gray_list)) { + printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" + " to workarounds for a bug,\n" + "* this time source is slow. If you are sure your timer" + " does not have\n" + "* this bug, please use \"pmtmr_good\" to disable the " + "workaround\n"); + pmtmr_has_bug = 1; + } + + if (!pmtmr_has_bug) + pmtmr_need_workaround = 0; + + return 0; +} +device_initcall(pmtmr_bug_check); +#endif + +static int __init pmtr_good_setup(char *__str) +{ + pmtmr_need_workaround = 0; + return 1; +} +__setup("pmtmr_good", pmtr_good_setup); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index de5386b01d3..6b63a5aa1e4 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -92,22 +92,21 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; -struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - vmalloc_sync_all(); - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&i386die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return atomic_notifier_chain_register(&i386die_chain, nb); } EXPORT_SYMBOL(register_die_notifier); +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&i386die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { return p > (void *)tinfo && @@ -386,8 +385,12 @@ void die(const char * str, struct pt_regs * regs, long err) #endif if (nl) printk("\n"); - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); - show_registers(regs); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) != + NOTIFY_STOP) + show_registers(regs); + else + regs = NULL; } else printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); @@ -395,6 +398,9 @@ void die(const char * str, struct pt_regs * regs, long err) die.lock_owner = -1; spin_unlock_irqrestore(&die.lock, flags); + if (!regs) + return; + if (kexec_should_crash(current)) crash_kexec(regs); @@ -623,7 +629,7 @@ static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -662,7 +668,7 @@ static void default_do_nmi(struct pt_regs * regs) reason = get_nmi_reason(); if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; #ifdef CONFIG_X86_LOCAL_APIC @@ -678,7 +684,7 @@ static void default_do_nmi(struct pt_regs * regs) unknown_nmi_error(reason, regs); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; if (reason & 0x80) mem_parity_error(reason, regs); diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 8165626a5c3..70e560a1b79 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -1700,7 +1700,7 @@ after_handle_vic_irq(unsigned int irq) printk("VOYAGER SMP: CPU%d lost interrupt %d\n", cpu, irq); - for_each_cpu(real_cpu, mask) { + for_each_possible_cpu(real_cpu, mask) { outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu, VIC_PROCESSOR_ID); diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index c4af9638dbf..fe6eb901326 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -31,6 +31,7 @@ #include <linux/nodemask.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/pfn.h> #include <asm/e820.h> #include <asm/setup.h> @@ -352,17 +353,6 @@ void __init zone_sizes_init(void) { int nid; - /* - * Insert nodes into pgdat_list backward so they appear in order. - * Clobber node 0's links and NULL out pgdat_list before starting. - */ - pgdat_list = NULL; - for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) { - if (!node_online(nid)) - continue; - NODE_DATA(nid)->pgdat_next = pgdat_list; - pgdat_list = NODE_DATA(nid); - } for_each_online_node(nid) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 9db3242103b..2889567e21a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -36,7 +36,7 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 1accce50c2c..1a2076ce6f6 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy) static void free_msrs(void) { int i; - for_each_cpu(i) { + for_each_possible_cpu(i) { kfree(cpu_msrs[i].counters); cpu_msrs[i].counters = NULL; kfree(cpu_msrs[i].controls); |