diff options
Diffstat (limited to 'arch/i386/kernel')
54 files changed, 769 insertions, 912 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index f10de0f2c5e..60c3f76dfca 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -4,10 +4,10 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + quirks.o i8237.o obj-y += cpu/ obj-y += timers/ @@ -25,6 +25,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -33,6 +34,8 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o +obj-$(CONFIG_DOUBLEFAULT) += doublefault.o +obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 267ca48e1b6..d51c7313cae 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o +obj-y += cstate.o processor.o endif diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 447fa9e33ff..f21fa0d4482 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -108,7 +108,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return NULL; - if (phys_addr < (end_pfn_map << PAGE_SHIFT)) + if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) return __va(phys_addr); return NULL; @@ -464,7 +464,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) +int acpi_register_gsi(u32 gsi, int triggering, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -476,14 +476,14 @@ int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { extern void eisa_set_level_irq(unsigned int irq); - if (edge_level == ACPI_LEVEL_SENSITIVE) + if (triggering == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } #endif #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low); + plat_gsi = mp_register_gsi(gsi, triggering, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c index 4c3036ba65d..25db49ef177 100644 --- a/arch/i386/kernel/acpi/cstate.c +++ b/arch/i386/kernel/acpi/cstate.c @@ -14,64 +14,6 @@ #include <acpi/processor.h> #include <asm/acpi.h> -static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power - *pow) -{ - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } - - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_C_CAPABILITY_SMP; - - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pow->pdc = obj_list; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void acpi_processor_power_init_pdc(struct acpi_processor_power *pow, - unsigned int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - - pow->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) - acpi_processor_power_init_intel_pdc(pow); - - return; -} - -EXPORT_SYMBOL(acpi_processor_power_init_pdc); - /* * Initialize bm_flags based on the CPU cache properties * On SMP it depends on cache configuration diff --git a/arch/i386/kernel/acpi/processor.c b/arch/i386/kernel/acpi/processor.c new file mode 100644 index 00000000000..9f4cc02717e --- /dev/null +++ b/arch/i386/kernel/acpi/processor.c @@ -0,0 +1,75 @@ +/* + * arch/i386/kernel/acpi/processor.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * - Added _PDC for platforms with Intel CPUs + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> + +#include <acpi/processor.h> +#include <asm/acpi.h> + +static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + return; +} + +/* Initialize _PDC data based on the CPU vendor */ +void arch_acpi_processor_init_pdc(struct acpi_processor *pr) +{ + unsigned int cpu = pr->id; + struct cpuinfo_x86 *c = cpu_data + cpu; + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + init_intel_pdc(pr, c); + + return; +} + +EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 496a2c9909f..acd3f1e34ca 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/cpu.h> +#include <linux/module.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -37,10 +38,17 @@ #include <asm/i8253.h> #include <mach_apic.h> +#include <mach_ipi.h> #include "io_ports.h" /* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_bcast_ipi; + +/* * Knob to control our willingness to enable the local APIC. */ int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ @@ -92,10 +100,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -721,7 +725,7 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_VERBOSE; else printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug", str); + " use apic=verbose or apic=debug\n", str); return 0; } @@ -935,11 +939,16 @@ void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + + if (cpu_isset(cpu, timer_bcast_ipi)) + lvtt_value |= APIC_LVT_MASKED; + apic_write_around(APIC_LVTT, lvtt_value); /* @@ -1072,7 +1081,7 @@ void __devinit setup_secondary_APIC_clock(void) setup_APIC_timer(calibration_result); } -void __devinit disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; @@ -1084,7 +1093,10 @@ void __devinit disable_APIC_timer(void) void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_bcast_ipi)) { unsigned long v; v = apic_read(APIC_LVTT); @@ -1092,33 +1104,31 @@ void enable_APIC_timer(void) } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_bcast_ipi)) { + disable_APIC_timer(); + cpu_set(cpu, timer_bcast_ipi); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - return 0; +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_bcast_ipi)) { + cpu_clear(cpu, timer_bcast_ipi); + enable_APIC_timer(); + } } +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); #undef APIC_DIVISOR @@ -1134,32 +1144,10 @@ int setup_profiling_timer(unsigned int multiplier) inline void smp_local_timer_interrupt(struct pt_regs * regs) { - int cpu = smp_processor_id(); - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - #ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); + update_process_times(user_mode_vm(regs)); #endif - } /* * We take the 'long' return path, and there every subsystem @@ -1206,6 +1194,43 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_SMP +static void up_apic_timer_interrupt_call(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * the NMI deadlock-detector uses this. + */ + per_cpu(irq_stat, cpu).apic_timer_irqs++; + + smp_local_timer_interrupt(regs); +} +#endif + +void smp_send_timer_broadcast_ipi(struct pt_regs *regs) +{ + cpumask_t mask; + + cpus_and(mask, cpu_online_map, timer_bcast_ipi); + if (!cpus_empty(mask)) { +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#else + /* + * We can directly call the apic timer interrupt handler + * in UP case. Minus all irq related functions + */ + up_apic_timer_interrupt_call(regs); +#endif + } +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + /* * This interrupt should _never_ happen with our APIC/SMP architecture */ diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 1e60acbed3c..05312a8abb8 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -219,6 +219,7 @@ #include <linux/sched.h> #include <linux/pm.h> #include <linux/pm_legacy.h> +#include <linux/capability.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/smp.h> @@ -303,17 +304,6 @@ extern int (*console_blank_hook)(int); #include "apm.h" /* - * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is - * supposed to provide limit information that it recognizes. Many machines - * do this correctly, but many others do not restrict themselves to their - * claimed limit. When this happens, they will cause a segmentation - * violation in the kernel at boot time. Most BIOS's, however, will - * respect a 64k limit, so we use that. If you want to be pedantic and - * hold your BIOS to its claims, then undefine this. - */ -#define APM_RELAX_SEGMENTS - -/* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. * This patched by Chad Miller <cmiller@surfsouth.com>, original code by * David Chen <chen@ctpa04.mit.edu> @@ -1075,22 +1065,23 @@ static int apm_engage_power_management(u_short device, int enable) static int apm_console_blank(int blank) { - int error; - u_short state; + int error, i; + u_short state; + static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - /* Blank the first display device */ - error = set_power_state(0x100, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) { - /* try to blank them all instead */ - error = set_power_state(0x1ff, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) - /* try to blank device one instead */ - error = set_power_state(0x101, state); + + for (i = 0; i < ARRAY_SIZE(dev); i++) { + error = set_power_state(dev[i], state); + + if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) + return 1; + + if (error == APM_NOT_ENGAGED) + break; } - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - if (error == APM_NOT_ENGAGED) { + + if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) { static int tried; int eng_error; if (tried++ == 0) { @@ -2233,8 +2224,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { static int __init apm_init(void) { struct proc_dir_entry *apm_proc; + struct desc_struct *gdt; int ret; - int i; dmi_check_system(apm_dmi_table); @@ -2301,7 +2292,9 @@ static int __init apm_init(void) apm_info.disabled = 1; return -ENODEV; } +#ifdef CONFIG_PM_LEGACY pm_active = 1; +#endif /* * Set up a segment that references the real mode segment 0x40 @@ -2312,45 +2305,30 @@ static int __init apm_init(void) set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + /* + * Set up the long jump entry point to the APM BIOS, which is called + * from inline assembly. + */ apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; - for (i = 0; i < NR_CPUS; i++) { - struct desc_struct *gdt = get_cpu_gdt_table(i); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); -#ifndef APM_RELAX_SEGMENTS - if (apm_info.bios.version == 0x100) { -#endif - /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); - /* For some unknown machine. */ - _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); - /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); -#ifndef APM_RELAX_SEGMENTS - } else { - _set_limit((char *)&gdt[APM_CS >> 3], - (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_CS_16 >> 3], - (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_DS >> 3], - (apm_info.bios.dseg_len - 1) & 0xffff); - /* workaround for broken BIOSes */ - if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); - if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ - /* for the BIOS that assumes granularity = 1 */ - gdt[APM_DS >> 3].b |= 0x800000; - printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); - } - } -#endif - } + /* + * The APM 1.1 BIOS is supposed to provide limit information that it + * recognizes. Many machines do this correctly, but many others do + * not restrict themselves to their claimed limit. When this happens, + * they will cause a segmentation violation in the kernel at boot time. + * Most BIOS's, however, will respect a 64k limit, so we use that. + * + * Note we only set APM segments on CPU zero, since we pin the APM + * code to that CPU. + */ + gdt = get_cpu_gdt_table(0); + set_base(gdt[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt[APM_CS_16 >> 3], + __va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_base(gdt[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); if (apm_proc) @@ -2407,7 +2385,9 @@ static void __exit apm_exit(void) exit_kapmd = 1; while (kapmd_running) schedule(); +#ifdef CONFIG_PM_LEGACY pm_active = 0; +#endif } module_init(apm_init); diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e344ef88cfc..333578a4e91 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -161,8 +161,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); break; } - break; + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ /* Bit 15 of Athlon specific MSR 15, needs to be 0 @@ -211,6 +216,12 @@ static void __init init_amd(struct cpuinfo_x86 *c) c->x86_max_cores = 1; } + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } + #ifdef CONFIG_X86_HT /* * On a AMD dual core setup the lower bits of the APIC id @@ -228,6 +239,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif + } static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog deleted file mode 100644 index cef76b80a71..00000000000 --- a/arch/i386/kernel/cpu/changelog +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean - * and Martin Mares, November 1997. - * - * Force Cyrix 6x86(MX) and M II processors to report MTRR capability - * and Cyrix "coma bug" recognition by - * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999. - * - * Force Centaur C6 processors to report MTRR capability. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999. - * - * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999. - * - * IDT Winchip tweaks, misc clean ups. - * Dave Jones <davej@suse.de>, August 1999 - * - * Better detection of Centaur/IDT WinChip models. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999. - * - * Cleaned up cache-detection code - * Dave Jones <davej@suse.de>, October 1999 - * - * Added proper L2 cache detection for Coppermine - * Dragan Stancevic <visitor@valinux.com>, October 1999 - * - * Added the original array for capability flags but forgot to credit - * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff - * Jauder Ho <jauderho@carumba.com>, January 2000 - * - * Detection for Celeron coppermine, identify_cpu() overhauled, - * and a few other clean ups. - * Dave Jones <davej@suse.de>, April 2000 - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * - * Added proper Cascades CPU and L2 cache detection for Cascades - * and 8-way type cache happy bunch from Intel:^) - * Dragan Stancevic <visitor@valinux.com>, May 2000 - * - * Forward port AMD Duron errata T13 from 2.2.17pre - * Dave Jones <davej@suse.de>, August 2000 - * - * Forward port lots of fixes/improvements from 2.2.18pre - * Cyrix III, Pentium IV support. - * Dave Jones <davej@suse.de>, October 2000 - * - * Massive cleanup of CPU detection and bug handling; - * Transmeta CPU detection, - * H. Peter Anvin <hpa@zytor.com>, November 2000 - * - * VIA C3 Support. - * Dave Jones <davej@suse.de>, March 2001 - * - * AMD Athlon/Duron/Thunderbird bluesmoke support. - * Dave Jones <davej@suse.de>, April 2001. - * - * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. - * Dave Jones <davej@suse.de>, September, October 2001. - * - */ - diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 31e344b26ba..15aee26ec2b 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -18,9 +18,6 @@ #include "cpu.h" -DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); - DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); @@ -207,7 +204,10 @@ static int __devinit have_cpuid_p(void) /* Do minimum CPU detection early. Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. */ + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ static void __init early_cpu_detect(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -239,12 +239,6 @@ static void __init early_cpu_detect(void) if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; } - - early_intel_workaround(c); - -#ifdef CONFIG_X86_HT - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; -#endif } void __devinit generic_identify(struct cpuinfo_x86 * c) @@ -292,6 +286,12 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) get_model_name(c); /* Default name */ } } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; +#endif } static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -599,11 +599,6 @@ void __devinit cpu_init(void) load_idt(&idt_descr); /* - * Delete NT - */ - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - - /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); @@ -617,8 +612,10 @@ void __devinit cpu_init(void) load_TR_desc(); load_LDT(&init_mm.context); +#ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 871366b83b3..3852d0a4c1b 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -40,8 +40,6 @@ #include <linux/acpi.h> #include <acpi/processor.h> -#include "speedstep-est-common.h" - #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); @@ -297,68 +295,6 @@ acpi_cpufreq_guess_freq ( } -/* - * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities - * of this driver - * @perf: processor-specific acpi_io_data struct - * @cpu: CPU being initialized - * - * To avoid issues with legacy OSes, some BIOSes require to be informed of - * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC - * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in - * driver/acpi/processor.c - */ -static void -acpi_processor_cpu_init_pdc_est( - struct acpi_processor_performance *perf, - unsigned int cpu, - struct acpi_object_list *obj_list - ) -{ - union acpi_object *obj; - u32 *buf; - struct cpuinfo_x86 *c = cpu_data + cpu; - dprintk("acpi_processor_cpu_init_pdc_est\n"); - - if (!cpu_has(c, X86_FEATURE_EST)) - return; - - /* Initialize pdc. It will be used later. */ - if (!obj_list) - return; - - if (!(obj_list->count && obj_list->pointer)) - return; - - obj = obj_list->pointer; - if ((obj->buffer.length == 12) && obj->buffer.pointer) { - buf = (u32 *)obj->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; - perf->pdc = obj_list; - } - return; -} - - -/* CPU specific PDC initialization */ -static void -acpi_processor_cpu_init_pdc( - struct acpi_processor_performance *perf, - unsigned int cpu, - struct acpi_object_list *obj_list - ) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - dprintk("acpi_processor_cpu_init_pdc\n"); - perf->pdc = NULL; - if (cpu_has(c, X86_FEATURE_EST)) - acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list); - return; -} - - static int acpi_cpufreq_cpu_init ( struct cpufreq_policy *policy) @@ -367,15 +303,9 @@ acpi_cpufreq_cpu_init ( unsigned int cpu = policy->cpu; struct cpufreq_acpi_io *data; unsigned int result = 0; - - union acpi_object arg0 = {ACPI_TYPE_BUFFER}; - u32 arg0_buf[3]; - struct acpi_object_list arg_list = {1, &arg0}; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; dprintk("acpi_cpufreq_cpu_init\n"); - /* setup arg_list for _PDC settings */ - arg0.buffer.length = 12; - arg0.buffer.pointer = (u8 *) arg0_buf; data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) @@ -383,14 +313,12 @@ acpi_cpufreq_cpu_init ( acpi_io_data[cpu] = data; - acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list); result = acpi_processor_register_performance(&data->acpi_data, cpu); - data->acpi_data.pdc = NULL; if (result) goto err_free; - if (is_const_loops_cpu(cpu)) { + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; } diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 04a40534520..2b62dee35c6 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -177,9 +177,10 @@ static unsigned int nforce2_fsb_read(int bootfsb) */ static int nforce2_set_fsb(unsigned int fsb) { - u32 pll, temp = 0; + u32 temp = 0; unsigned int tfsb; int diff; + int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 68a1fc87f4c..e11a09207ec 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -45,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.4" +#define VERSION "version 1.60.0" #include "powernow-k8.h" /* serialize freq changes */ @@ -216,10 +216,10 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) do { wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { @@ -336,7 +336,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid /* Phase 2 - core frequency transition */ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) { - u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid; + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", @@ -359,9 +359,11 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) : vcoreqfid - vcocurrfid; while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + if (reqfid > data->currfid) { if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + 2)) { + if (write_new_fid(data, data->currfid + fid_interval)) { return 1; } } else { @@ -371,7 +373,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) } } } else { - if (write_new_fid(data, data->currfid - 2)) + if (write_new_fid(data, data->currfid - fid_interval)) return 1; } @@ -464,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); goto out; } @@ -474,7 +476,7 @@ static int check_supported_cpu(unsigned int cpu) eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) { + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; } @@ -517,22 +519,24 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); return -ENODEV; } - if ((pst[j].fid > MAX_FID) - || (pst[j].fid & 1) - || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) { + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); return -EINVAL; } if (pst[j].fid < lastfid) lastfid = pst[j].fid; } if (lastfid & 1) { - printk(KERN_ERR PFX "lastfid invalid\n"); + printk(KERN_ERR BFX "lastfid invalid\n"); return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO PFX "first fid not from lo freq table\n"); + printk(KERN_INFO BFX "first fid not from lo freq table\n"); return 0; } @@ -631,7 +635,7 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("table vers: 0x%x\n", psb->tableversion); if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_INFO BFX "PSB table is not v1.4\n"); + printk(KERN_ERR BFX "PSB table is not v1.4\n"); return -ENODEV; } @@ -689,7 +693,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -912,7 +916,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -976,12 +980,15 @@ static int powernowk8_verify(struct cpufreq_policy *pol) } /* per CPU init entry point to the driver */ -static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) +static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; int rc, i; + if (!cpu_online(pol->cpu)) + return -ENODEV; + if (!check_supported_cpu(pol->cpu)) return -ENODEV; @@ -1021,7 +1028,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -1134,7 +1141,7 @@ static struct cpufreq_driver cpufreq_amd64_driver = { }; /* driver entry point for init */ -static int __init powernowk8_init(void) +static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; @@ -1162,10 +1169,9 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com."); +MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); late_initcall(powernowk8_init); module_exit(powernowk8_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index b1e85bb3639..d0de37d58e9 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -42,7 +42,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_F 0x00040000 +#define CPUID_XMOD_REV_G 0x00060000 #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -86,13 +86,14 @@ struct powernow_k8_data { * low fid table * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry * in the low fid table - * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. * - lowest frequency must be >= interprocessor hypertransport link speed * (only applies to MP systems obviously) */ /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ @@ -106,7 +107,7 @@ struct powernow_k8_data { #define MIN_FREQ 800 /* Min and max freqs, per spec */ #define MAX_FREQ 5000 -#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ #define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ #define VID_OFF 0x3f diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index edb9873e27e..c173c0fa117 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -35,8 +35,6 @@ #include <asm/processor.h> #include <asm/cpufeature.h> -#include "speedstep-est-common.h" - #define PFX "speedstep-centrino: " #define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>" @@ -364,22 +362,10 @@ static struct acpi_processor_performance p; */ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { - union acpi_object arg0 = {ACPI_TYPE_BUFFER}; - u32 arg0_buf[3]; - struct acpi_object_list arg_list = {1, &arg0}; unsigned long cur_freq; int result = 0, i; unsigned int cpu = policy->cpu; - /* _PDC settings */ - arg0.buffer.length = 12; - arg0.buffer.pointer = (u8 *) arg0_buf; - arg0_buf[0] = ACPI_PDC_REVISION_ID; - arg0_buf[1] = 1; - arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR; - - p.pdc = &arg_list; - /* register with ACPI core */ if (acpi_processor_register_performance(&p, cpu)) { dprintk(KERN_INFO PFX "obtaining ACPI data failed\n"); @@ -493,12 +479,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) unsigned l, h; int ret; int i; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; /* Only Intel makes Enhanced Speedstep-capable CPUs */ if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - if (is_const_loops_cpu(policy->cpu)) { + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { centrino_driver.flags |= CPUFREQ_CONST_LOOPS; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h deleted file mode 100644 index 5ce995c9d86..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Routines common for drivers handling Enhanced Speedstep Technology - * Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * - * Licensed under the terms of the GNU GPL License version 2 -- see - * COPYING for details. - */ - -static inline int is_const_loops_cpu(unsigned int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - - if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST)) - return 0; - - /* - * on P-4s, the TSC runs with constant frequency independent of cpu freq - * when we use EST - */ - if (c->x86 == 0xf) - return 1; - - return 0; -} - diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 5b7d18a06af..b425cd3d183 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -40,6 +40,7 @@ static struct pci_dev *speedstep_chipset_dev; */ static unsigned int speedstep_processor = 0; +static u32 pmbase; /* * There are only two frequency states for each processor. Values @@ -56,34 +57,47 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { /** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * speedstep_find_register - read the PMBASE address * - * Tries to change the SpeedStep state. + * Returns: -ENODEV if no register could be found */ -static void speedstep_set_state (unsigned int state) +static int speedstep_find_register (void) { - u32 pmbase; - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (!speedstep_chipset_dev || (state > 0x1)) - return; + if (!speedstep_chipset_dev) + return -ENODEV; /* get PMBASE */ pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); if (!(pmbase & 0x01)) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } pmbase &= 0xFFFFFFFE; if (!pmbase) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + /* Disable IRQs */ local_irq_save(flags); @@ -315,10 +329,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) cpus_allowed = current->cpus_allowed; set_cpus_allowed(current, policy->cpus); - /* detect low and high frequency */ + /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, &speedstep_set_state); set_cpus_allowed(current, cpus_allowed); if (result) @@ -335,7 +350,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = speed; result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); @@ -400,6 +414,9 @@ static int __init speedstep_init(void) return -EINVAL; } + if (speedstep_find_register()) + return -ENODEV; + return cpufreq_register_driver(&speedstep_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index d368b3f5fce..7c47005a180 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -320,11 +320,13 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)) { unsigned int prev_speed; unsigned int ret = 0; unsigned long flags; + struct timeval tv1, tv2; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; @@ -337,7 +339,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, return -EIO; dprintk("previous speed is %u\n", prev_speed); - + local_irq_save(flags); /* switch to low state */ @@ -350,8 +352,17 @@ unsigned int speedstep_get_freqs(unsigned int processor, dprintk("low speed is %u\n", *low_speed); + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + /* switch to high state */ set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + *high_speed = speedstep_get_processor_frequency(processor); if (!*high_speed) { ret = -EIO; @@ -369,6 +380,25 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (*high_speed != prev_speed) set_state(SPEEDSTEP_LOW); + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + out: local_irq_restore(flags); return (ret); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index 261a2c9b7f6..6a727fd3a77 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -44,4 +44,5 @@ extern unsigned int speedstep_get_processor_frequency(unsigned int processor); extern unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 2718fb6f6ab..28cc5d524af 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -269,6 +269,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, &speedstep_set_state); if (result) { diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ff87cc22b32..75015975d03 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -343,6 +343,31 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) } /* + * Handle National Semiconductor branded processors + */ +static void __devinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC aquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected * by the fact that they preserve the flags across the division of 5/2. * PII and PPro exhibit this behavior too, but they have cpuid available. @@ -422,7 +447,7 @@ int __init cyrix_init_cpu(void) static struct cpu_dev nsc_cpu_dev __initdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, - .c_init = init_cyrix, + .c_init = init_nsc, .c_identify = generic_identify, }; diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5e2da704f0f..8c0120186b9 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -183,10 +183,13 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) } #endif - if (c->x86 == 15) + if (c->x86 == 15) set_bit(X86_FEATURE_P4, c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); } diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog deleted file mode 100644 index af136853595..00000000000 --- a/arch/i386/kernel/cpu/mtrr/changelog +++ /dev/null @@ -1,229 +0,0 @@ - ChangeLog - - Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de> - Initial register-setting code (from proform-1.0). - 19971216 Richard Gooch <rgooch@atnf.csiro.au> - Original version for /proc/mtrr interface, SMP-safe. - v1.0 - 19971217 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix for ioctls()'s. - Added sample code in Documentation/mtrr.txt - v1.1 - 19971218 Richard Gooch <rgooch@atnf.csiro.au> - Disallow overlapping regions. - 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de> - Register-setting fixups. - v1.2 - 19971222 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.75. - v1.3 - 19971229 David Wragg <dpw@doc.ic.ac.uk> - Register-setting fixups and conformity with Intel conventions. - 19971229 Richard Gooch <rgooch@atnf.csiro.au> - Cosmetic changes and wrote this ChangeLog ;-) - 19980106 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.78. - v1.4 - 19980119 David Wragg <dpw@doc.ic.ac.uk> - Included passive-release enable code (elsewhere in PCI setup). - v1.5 - 19980131 Richard Gooch <rgooch@atnf.csiro.au> - Replaced global kernel lock with private spinlock. - v1.6 - 19980201 Richard Gooch <rgooch@atnf.csiro.au> - Added wait for other CPUs to complete changes. - v1.7 - 19980202 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix in definition of <set_mtrr> for UP. - v1.8 - 19980319 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.90. - 19980323 Richard Gooch <rgooch@atnf.csiro.au> - Move SMP BIOS fixup before secondary CPUs call <calibrate_delay> - v1.9 - 19980325 Richard Gooch <rgooch@atnf.csiro.au> - Fixed test for overlapping regions: confused by adjacent regions - 19980326 Richard Gooch <rgooch@atnf.csiro.au> - Added wbinvd in <set_mtrr_prepare>. - 19980401 Richard Gooch <rgooch@atnf.csiro.au> - Bug fix for non-SMP compilation. - 19980418 David Wragg <dpw@doc.ic.ac.uk> - Fixed-MTRR synchronisation for SMP and use atomic operations - instead of spinlocks. - 19980418 Richard Gooch <rgooch@atnf.csiro.au> - Differentiate different MTRR register classes for BIOS fixup. - v1.10 - 19980419 David Wragg <dpw@doc.ic.ac.uk> - Bug fix in variable MTRR synchronisation. - v1.11 - 19980419 Richard Gooch <rgooch@atnf.csiro.au> - Fixups for kernel 2.1.97. - v1.12 - 19980421 Richard Gooch <rgooch@atnf.csiro.au> - Safer synchronisation across CPUs when changing MTRRs. - v1.13 - 19980423 Richard Gooch <rgooch@atnf.csiro.au> - Bugfix for SMP systems without MTRR support. - v1.14 - 19980427 Richard Gooch <rgooch@atnf.csiro.au> - Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines. - v1.15 - 19980427 Richard Gooch <rgooch@atnf.csiro.au> - Use atomic bitops for setting SMP change mask. - v1.16 - 19980428 Richard Gooch <rgooch@atnf.csiro.au> - Removed spurious diagnostic message. - v1.17 - 19980429 Richard Gooch <rgooch@atnf.csiro.au> - Moved register-setting macros into this file. - Moved setup code from init/main.c to i386-specific areas. - v1.18 - 19980502 Richard Gooch <rgooch@atnf.csiro.au> - Moved MTRR detection outside conditionals in <mtrr_init>. - v1.19 - 19980502 Richard Gooch <rgooch@atnf.csiro.au> - Documentation improvement: mention Pentium II and AGP. - v1.20 - 19980521 Richard Gooch <rgooch@atnf.csiro.au> - Only manipulate interrupt enable flag on local CPU. - Allow enclosed uncachable regions. - v1.21 - 19980611 Richard Gooch <rgooch@atnf.csiro.au> - Always define <main_lock>. - v1.22 - 19980901 Richard Gooch <rgooch@atnf.csiro.au> - Removed module support in order to tidy up code. - Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>. - Created addition queue for prior to SMP commence. - v1.23 - 19980902 Richard Gooch <rgooch@atnf.csiro.au> - Ported patch to kernel 2.1.120-pre3. - v1.24 - 19980910 Richard Gooch <rgooch@atnf.csiro.au> - Removed sanity checks and addition queue: Linus prefers an OOPS. - v1.25 - 19981001 Richard Gooch <rgooch@atnf.csiro.au> - Fixed harmless compiler warning in include/asm-i386/mtrr.h - Fixed version numbering and history for v1.23 -> v1.24. - v1.26 - 19990118 Richard Gooch <rgooch@atnf.csiro.au> - Added devfs support. - v1.27 - 19990123 Richard Gooch <rgooch@atnf.csiro.au> - Changed locking to spin with reschedule. - Made use of new <smp_call_function>. - v1.28 - 19990201 Zoltán Böszörményi <zboszor@mail.externet.hu> - Extended the driver to be able to use Cyrix style ARRs. - 19990204 Richard Gooch <rgooch@atnf.csiro.au> - Restructured Cyrix support. - v1.29 - 19990204 Zoltán Böszörményi <zboszor@mail.externet.hu> - Refined ARR support: enable MAPEN in set_mtrr_prepare() - and disable MAPEN in set_mtrr_done(). - 19990205 Richard Gooch <rgooch@atnf.csiro.au> - Minor cleanups. - v1.30 - 19990208 Zoltán Böszörményi <zboszor@mail.externet.hu> - Protect plain 6x86s (and other processors without the - Page Global Enable feature) against accessing CR4 in - set_mtrr_prepare() and set_mtrr_done(). - 19990210 Richard Gooch <rgooch@atnf.csiro.au> - Turned <set_mtrr_up> and <get_mtrr> into function pointers. - v1.31 - 19990212 Zoltán Böszörményi <zboszor@mail.externet.hu> - Major rewrite of cyrix_arr_init(): do not touch ARRs, - leave them as the BIOS have set them up. - Enable usage of all 8 ARRs. - Avoid multiplications by 3 everywhere and other - code clean ups/speed ups. - 19990213 Zoltán Böszörményi <zboszor@mail.externet.hu> - Set up other Cyrix processors identical to the boot cpu. - Since Cyrix don't support Intel APIC, this is l'art pour l'art. - Weigh ARRs by size: - If size <= 32M is given, set up ARR# we were given. - If size > 32M is given, set up ARR7 only if it is free, - fail otherwise. - 19990214 Zoltán Böszörményi <zboszor@mail.externet.hu> - Also check for size >= 256K if we are to set up ARR7, - mtrr_add() returns the value it gets from set_mtrr() - 19990218 Zoltán Böszörményi <zboszor@mail.externet.hu> - Remove Cyrix "coma bug" workaround from here. - Moved to linux/arch/i386/kernel/setup.c and - linux/include/asm-i386/bugs.h - 19990228 Richard Gooch <rgooch@atnf.csiro.au> - Added MTRRIOC_KILL_ENTRY ioctl(2) - Trap for counter underflow in <mtrr_file_del>. - Trap for 4 MiB aligned regions for PPro, stepping <= 7. - 19990301 Richard Gooch <rgooch@atnf.csiro.au> - Created <get_free_region> hook. - 19990305 Richard Gooch <rgooch@atnf.csiro.au> - Temporarily disable AMD support now MTRR capability flag is set. - v1.32 - 19990308 Zoltán Böszörményi <zboszor@mail.externet.hu> - Adjust my changes (19990212-19990218) to Richard Gooch's - latest changes. (19990228-19990305) - v1.33 - 19990309 Richard Gooch <rgooch@atnf.csiro.au> - Fixed typo in <printk> message. - 19990310 Richard Gooch <rgooch@atnf.csiro.au> - Support K6-II/III based on Alan Cox's <alan@redhat.com> patches. - v1.34 - 19990511 Bart Hartgers <bart@etpmod.phys.tue.nl> - Support Centaur C6 MCR's. - 19990512 Richard Gooch <rgooch@atnf.csiro.au> - Minor cleanups. - v1.35 - 19990707 Zoltán Böszörményi <zboszor@mail.externet.hu> - Check whether ARR3 is protected in cyrix_get_free_region() - and mtrr_del(). The code won't attempt to delete or change it - from now on if the BIOS protected ARR3. It silently skips ARR3 - in cyrix_get_free_region() or returns with an error code from - mtrr_del(). - 19990711 Zoltán Böszörményi <zboszor@mail.externet.hu> - Reset some bits in the CCRs in cyrix_arr_init() to disable SMM - if ARR3 isn't protected. This is needed because if SMM is active - and ARR3 isn't protected then deleting and setting ARR3 again - may lock up the processor. With SMM entirely disabled, it does - not happen. - 19990812 Zoltán Böszörményi <zboszor@mail.externet.hu> - Rearrange switch() statements so the driver accomodates to - the fact that the AMD Athlon handles its MTRRs the same way - as Intel does. - 19990814 Zoltán Böszörményi <zboszor@mail.externet.hu> - Double check for Intel in mtrr_add()'s big switch() because - that revision check is only valid for Intel CPUs. - 19990819 Alan Cox <alan@redhat.com> - Tested Zoltan's changes on a pre production Athlon - 100% - success. - 19991008 Manfred Spraul <manfreds@colorfullife.com> - replaced spin_lock_reschedule() with a normal semaphore. - v1.36 - 20000221 Richard Gooch <rgooch@atnf.csiro.au> - Compile fix if procfs and devfs not enabled. - Formatting changes. - v1.37 - 20001109 H. Peter Anvin <hpa@zytor.com> - Use the new centralized CPU feature detects. - - v1.38 - 20010309 Dave Jones <davej@suse.de> - Add support for Cyrix III. - - v1.39 - 20010312 Dave Jones <davej@suse.de> - Ugh, I broke AMD support. - Reworked fix by Troels Walsted Hansen <troels@thule.no> - - v1.40 - 20010327 Dave Jones <davej@suse.de> - Adapted Cyrix III support to include VIA C3. - - v2.0 - 20020306 Patrick Mochel <mochel@osdl.org> - Split mtrr.c -> mtrr/*.c - Converted to Linux Kernel Coding Style - Fixed several minor nits in form - Moved some SMP-only functions out, so they can be used - for power management in the future. - TODO: Fix user interface cruft. diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index cf39e205d33..5ac051bb9d5 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -1,5 +1,6 @@ #include <linux/init.h> #include <linux/proc_fs.h> +#include <linux/capability.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/seq_file.h> diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index e7921315ae9..89a85af33d2 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <asm/semaphore.h> #include <linux/seq_file.h> +#include <linux/cpufreq.h> /* * Get CPU information for use by the procfs. @@ -28,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -39,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -56,11 +57,21 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; + static char *x86_power_flags[] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + NULL, + /* nothing */ /* constant_tsc - moved to flags */ + }; struct cpuinfo_x86 *c = v; int i, n = c - cpu_data; int fpu_exception; @@ -86,8 +97,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ @@ -127,6 +141,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) x86_cap_flags[i] != NULL ) seq_printf(m, " %s", x86_cap_flags[i]); + for (i = 0; i < 32; i++) + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 13bae799e62..006141d1c12 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -117,14 +117,13 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, { char __user *tmp = buf; u32 data[4]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); if (count % 16) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 16) { + for (; count; count -= 16) { do_cpuid(cpu, reg, data); if (copy_to_user(tmp, &data, 16)) return -EFAULT; diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 0248e084017..d49dbe8dc96 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -25,7 +25,6 @@ #include <mach_ipi.h> -note_buf_t crash_notes[NR_CPUS]; /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; @@ -72,7 +71,9 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) * squirrelled away. ELF notes happen to provide * all of that that no need to invent something new. */ - buf = &crash_notes[cpu][0]; + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); @@ -81,51 +82,12 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) final_note(buf); } -static void crash_get_current_regs(struct pt_regs *regs) -{ - __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); - __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); - __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); - __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); - __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); - __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); - __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); - __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); - __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); - __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); - __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); - __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); - __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); - - regs->eip = (unsigned long)current_text_addr(); -} - -/* CPU does not save ss and esp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) -{ - memcpy(newregs, oldregs, sizeof(*newregs)); - newregs->esp = (unsigned long)&(oldregs->esp); - __asm__ __volatile__("xorl %eax, %eax;"); - __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss)); -} - -/* We may have saved_regs from where the error came from - * or it is NULL if via a direct panic(). - */ -static void crash_save_self(struct pt_regs *saved_regs) +static void crash_save_self(struct pt_regs *regs) { - struct pt_regs regs; int cpu; cpu = smp_processor_id(); - if (saved_regs) - crash_setup_regs(®s, saved_regs); - else - crash_get_current_regs(®s); - crash_save_this_cpu(®s, cpu); + crash_save_this_cpu(regs, cpu); } #ifdef CONFIG_SMP @@ -144,7 +106,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) local_irq_disable(); if (!user_mode(regs)) { - crash_setup_regs(&fixed_regs, regs); + crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); diff --git a/arch/i386/kernel/crash_dump.c b/arch/i386/kernel/crash_dump.c new file mode 100644 index 00000000000..3f532df488b --- /dev/null +++ b/arch/i386/kernel/crash_dump.c @@ -0,0 +1,74 @@ +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * Copyright (C) IBM Corporation, 2004. All rights reserved + */ + +#include <linux/errno.h> +#include <linux/highmem.h> +#include <linux/crash_dump.h> + +#include <asm/uaccess.h> + +static void *kdump_buf_page; + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + * + * Calling copy_to_user() in atomic context is not desirable. Hence first + * copying the data to a pre-allocated kernel page and then copying to user + * space in non-atomic context. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = kmap_atomic_pfn(pfn, KM_PTE0); + + if (!userbuf) { + memcpy(buf, (vaddr + offset), csize); + kunmap_atomic(vaddr, KM_PTE0); + } else { + if (!kdump_buf_page) { + printk(KERN_WARNING "Kdump: Kdump buffer page not" + " allocated\n"); + return -EFAULT; + } + copy_page(kdump_buf_page, vaddr); + kunmap_atomic(vaddr, KM_PTE0); + if (copy_to_user(buf, (kdump_buf_page + offset), csize)) + return -EFAULT; + } + + return csize; +} + +static int __init kdump_buf_page_init(void) +{ + int ret = 0; + + kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!kdump_buf_page) { + printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" + " page\n"); + ret = -ENOMEM; + } + + return ret; +} +arch_initcall(kdump_buf_page_init); diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 58516e2ac17..6a93d75db43 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -4,7 +4,7 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/bootmem.h> - +#include <linux/slab.h> static char * __init dmi_string(struct dmi_header *dm, u8 s) { @@ -19,7 +19,7 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) } if (*bp != 0) { - str = alloc_bootmem(strlen(bp) + 1); + str = dmi_alloc(strlen(bp) + 1); if (str != NULL) strcpy(str, bp); else @@ -40,7 +40,7 @@ static int __init dmi_table(u32 base, int len, int num, u8 *buf, *data; int i = 0; - buf = bt_ioremap(base, len); + buf = dmi_ioremap(base, len); if (buf == NULL) return -1; @@ -65,7 +65,7 @@ static int __init dmi_table(u32 base, int len, int num, data += 2; i++; } - bt_iounmap(buf, len); + dmi_iounmap(buf, len); return 0; } @@ -112,7 +112,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) if ((*d & 0x80) == 0) continue; - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_devices: out of memory.\n"); break; @@ -131,7 +131,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) struct dmi_device *dev; void * data; - data = alloc_bootmem(dm->length); + data = dmi_alloc(dm->length); if (data == NULL) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -139,7 +139,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) memcpy(data, dm, dm->length); - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -221,7 +221,7 @@ void __init dmi_scan_machine(void) } } -out: printk(KERN_INFO "DMI not present.\n"); +out: printk(KERN_INFO "DMI not present or invalid.\n"); } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e50b9315524..4d704724b2f 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -323,6 +323,7 @@ work_notifysig: # deal with pending signals and ALIGN work_notifysig_v86: +#ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl %ecx @@ -330,6 +331,7 @@ work_notifysig_v86: xorl %edx, %edx call do_notify_resume jmp resume_userspace +#endif # perform syscall exit tracing ALIGN @@ -657,6 +659,7 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e437fb36749..5884469f6bf 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -504,19 +504,24 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x90 32-bit code */ - .quad 0x00809a0000000000 /* 0x98 16-bit code */ - .quad 0x0080920000000000 /* 0xa0 16-bit data */ - .quad 0x0080920000000000 /* 0xa8 16-bit data */ - .quad 0x0080920000000000 /* 0xb0 16-bit data */ + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + .quad 0x00409a000000ffff /* 0x90 32-bit code */ + .quad 0x00009a000000ffff /* 0x98 16-bit code */ + .quad 0x000092000000ffff /* 0xa0 16-bit data */ + .quad 0x0000920000000000 /* 0xa8 16-bit data */ + .quad 0x0000920000000000 /* 0xb0 16-bit data */ + /* * The APM segments have byte granularity and their bases - * and limits are set at run time. + * are set at run time. All have 64k limits. */ - .quad 0x00409a0000000000 /* 0xb8 APM CS code */ - .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0xc8 APM DS data */ + .quad 0x00409a000000ffff /* 0xb8 APM CS code */ + .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004092000000ffff /* 0xc8 APM DS data */ .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ @@ -525,3 +530,5 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 180f070d03c..3999bec50c3 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -3,8 +3,7 @@ #include <asm/checksum.h> #include <asm/desc.h> -/* This is definitely a GPL-only symbol */ -EXPORT_SYMBOL_GPL(cpu_gdt_table); +EXPORT_SYMBOL_GPL(cpu_gdt_descr); EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 9caa8e8db80..cff95d10a4d 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 22c8675c79f..f2dd218d88c 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1649,7 +1649,7 @@ static void __init enable_IO_APIC(void) for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ - for(pin = 0; pin < nr_ioapic_registers[i]; pin++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); @@ -1722,8 +1722,8 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = 0; - + entry.dest.physical.physical_dest = + GET_APIC_ID(apic_read(APIC_ID)); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index b59a34dbe26..79026f026b8 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/ioport.h> diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 1a201a93286..f3a9c78c4a2 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -19,7 +19,7 @@ #include <linux/cpu.h> #include <linux/delay.h> -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); #ifndef CONFIG_X86_LOCAL_APIC diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 19edcd526ba..6483eeb1a4e 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -58,13 +58,9 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) int __kprobes arch_prepare_kprobe(struct kprobe *p) { - return 0; -} - -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -81,10 +77,6 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ -} - static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); @@ -196,6 +188,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_REENTER; return 1; } else { + if (regs->eflags & VM_MASK) { + /* We are in virtual-8086 mode. Return 0 */ + goto no_kprobe; + } + if (*addr != BREAKPOINT_INSTRUCTION) { + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + regs->eip -= sizeof(kprobe_opcode_t); + ret = 1; + goto no_kprobe; + } p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 165f13158c6..d3fdf0057d8 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -70,6 +70,7 @@ */ //#define DEBUG /* pr_debug */ +#include <linux/capability.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> @@ -165,7 +166,7 @@ static void collect_cpu_info (void *unused) wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ - serialize_cpu(); + sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", @@ -379,7 +380,7 @@ static void do_update_one (void * unused) wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ - serialize_cpu(); + sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1ca5269b1e8..0102f3d50e5 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -38,6 +38,12 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; +#ifdef CONFIG_HOTPLUG_CPU +#define CPU_HOTPLUG_ENABLED (1) +#else +#define CPU_HOTPLUG_ENABLED (0) +#endif + /* * Various Linux-internal data structures created from the * MP-table. @@ -219,14 +225,18 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) cpu_set(num_processors, cpu_possible_map); num_processors++; - if ((num_processors > 8) && - ((APIC_XAPIC(ver) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) || - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))) - def_to_bigsmp = 1; - else - def_to_bigsmp = 0; - + if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(ver)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } @@ -1070,7 +1080,7 @@ void __init mp_config_acpi_legacy_irqs (void) #define MAX_GSI_NUM 4096 -int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) +int mp_register_gsi (u32 gsi, int triggering, int polarity) { int ioapic = -1; int ioapic_pin = 0; @@ -1119,7 +1129,7 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); - if (edge_level) { + if (triggering == ACPI_LEVEL_SENSITIVE) { /* * For PCI devices assign IRQs in order, avoiding gaps * due to unused I/O APIC pins. @@ -1141,8 +1151,8 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) } io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, - active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 44470fea430..1d0a55e6876 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -172,7 +172,6 @@ static ssize_t msr_read(struct file *file, char __user * buf, { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); int err; @@ -180,7 +179,7 @@ static ssize_t msr_read(struct file *file, char __user * buf, if (count % 8) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 8) { + for (; count; count -= 8) { err = do_rdmsr(cpu, reg, &data[0], &data[1]); if (err) return err; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index df6c2bcde06..2185377fdde 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -48,6 +48,7 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/desc.h> +#include <asm/vm86.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif @@ -308,9 +309,7 @@ void show_regs(struct pt_regs * regs) cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); - if (current_cpu_data.x86 > 4) { - cr4 = read_cr4(); - } + cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); show_trace(NULL, ®s->esp); } @@ -404,17 +403,7 @@ void flush_thread(void) void release_thread(struct task_struct *dead_task) { - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - + BUG_ON(dead_task->mm); release_vm86_irqs(dead_task); } @@ -435,18 +424,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, struct task_struct *tsk; int err; - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - /* - * The below -8 is to reserve 8 bytes on top of the ring0 stack. - * This is necessary to guarantee that the entire "struct pt_regs" - * is accessable even if the CPU haven't stored the SS/ESP registers - * on the stack (interrupt gate does not save these registers - * when switching to the same priv ring). - * Therefore beware: accessing the xss/esp fields of the - * "struct pt_regs" is possible, but they may contain the - * completely wrong values. - */ - childregs = (struct pt_regs *) ((unsigned long) childregs - 8); + childregs = task_pt_regs(p); *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -551,10 +529,7 @@ EXPORT_SYMBOL(dump_thread); */ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) { - struct pt_regs ptregs; - - ptregs = *(struct pt_regs *) - ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs)); + struct pt_regs ptregs = *task_pt_regs(tsk); ptregs.xcs &= 0xffff; ptregs.xds &= 0xffff; ptregs.xes &= 0xffff; @@ -610,8 +585,8 @@ static inline void disable_tsc(struct task_struct *prev_p, * gcc should eliminate the ->thread_info dereference if * has_secure_computing returns 0 at compile time (SECCOMP=n). */ - prev = prev_p->thread_info; - next = next_p->thread_info; + prev = task_thread_info(prev_p); + next = task_thread_info(next_p); if (has_secure_computing(prev) || has_secure_computing(next)) { /* slow path here */ @@ -796,7 +771,7 @@ unsigned long get_wchan(struct task_struct *p) int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack_page = (unsigned long)p->thread_info; + stack_page = (unsigned long)task_stack_page(p); esp = p->thread.esp; if (!stack_page || esp < stack_page || esp > top_esp+stack_page) return 0; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5ffbb4b7ad0..5c1fb6aada5 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -32,9 +32,12 @@ * in exit.c or in signal.c. */ -/* determines which flags the user has access to. */ -/* 1 = access 0 = no access */ -#define FLAG_MASK 0x00044dd5 +/* + * Determines which flags the user has access to [1 = access, 0 = no access]. + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Also masks reserved bits (31-22, 15, 5, 3, 1). + */ +#define FLAG_MASK 0x00054dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index aaf89cb2bc5..87ccdac8492 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -25,8 +25,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) /* enable access to config space*/ pci_read_config_byte(dev, 0xf4, &config); - config |= 0x2; - pci_write_config_byte(dev, 0xf4, config); + pci_write_config_byte(dev, 0xf4, config|0x2); /* read xTPR register */ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); @@ -42,9 +41,9 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) #endif } - config &= ~0x2; - /* disable access to config space*/ - pci_write_config_byte(dev, 0xf4, config); + /* put back the original value for config space*/ + if (!(config & 0x2)) + pci_write_config_byte(dev, 0xf4, config); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 2afe0f8d555..d207242976d 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/dmi.h> #include <linux/ctype.h> +#include <linux/pm.h> #include <asm/uaccess.h> #include <asm/apic.h> #include <asm/desc.h> @@ -111,12 +112,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, - { /* Handle problems with rebooting on HP nc6120 */ + { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, - .ident = "HP Compaq nc6120", + .ident = "HP Compaq Laptop", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6120"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, { } @@ -355,10 +356,10 @@ void machine_halt(void) void machine_power_off(void) { - machine_shutdown(); - - if (pm_power_off) + if (pm_power_off) { + machine_shutdown(); pm_power_off(); + } } diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 9c968ae67c4..321f5fd26e7 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -143,7 +143,7 @@ static int __init scx200_init(void) { printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); - return pci_module_init(&scx200_pci_driver); + return pci_register_driver(&scx200_pci_driver); } static void __exit scx200_cleanup(void) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index fdfcb0cba9b..51e513b4f72 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -45,6 +45,7 @@ #include <linux/nodemask.h> #include <linux/kexec.h> #include <linux/crash_dump.h> +#include <linux/dmi.h> #include <video/edid.h> @@ -146,7 +147,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void dmi_scan_machine(void); extern void generic_apic_probe(char *); extern int root_mountflags; @@ -898,7 +898,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } #endif -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. */ @@ -954,6 +954,12 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) return 0; } +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, start, end); + return 0; +} /* * Find the highest page frame number we have available @@ -965,6 +971,7 @@ void __init find_max_pfn(void) max_pfn = 0; if (efi_enabled) { efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); return; } @@ -979,6 +986,7 @@ void __init find_max_pfn(void) continue; if (end > max_pfn) max_pfn = end; + memory_present(0, start, end); } } @@ -1576,7 +1584,7 @@ void __init setup_arch(char **cmdline_p) if (s) { extern void setup_early_printk(char *); - setup_early_printk(s); + setup_early_printk(strchr(s, '=') + 1); printk("early console enabled\n"); } } diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index adcd069db91..963616d364e 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -37,51 +37,17 @@ asmlinkage int sys_sigsuspend(int history0, int history1, old_sigset_t mask) { - struct pt_regs * regs = (struct pt_regs *) &history0; - sigset_t saveset; - mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; + current->saved_sigmask = current->blocked; siginitset(¤t->blocked, mask); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_rt_sigsuspend(struct pt_regs regs) -{ - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (regs.ecx != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs.eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(®s, &saveset)) - return -EINTR; - } + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } asmlinkage int @@ -433,11 +399,11 @@ static int setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return 1; + return 0; give_sigsegv: force_sigsegv(sig, current); - return 0; + return -EFAULT; } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -527,11 +493,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return 1; + return 0; give_sigsegv: force_sigsegv(sig, current); - return 0; + return -EFAULT; } /* @@ -581,7 +547,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else ret = setup_frame(sig, ka, oldset, regs); - if (ret) { + if (ret == 0) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) @@ -598,11 +564,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) +static void fastcall do_signal(struct pt_regs *regs) { siginfo_t info; int signr; struct k_sigaction ka; + sigset_t *oldset; /* * We want the common case to go fast, which @@ -613,12 +580,14 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * CS suffices. */ if (!user_mode(regs)) - return 1; + return; if (try_to_freeze()) goto no_signal; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); @@ -628,38 +597,55 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * have been cleared if the watchpoint triggered * inside the kernel. */ - if (unlikely(current->thread.debugreg[7])) { + if (unlikely(current->thread.debugreg[7])) set_debugreg(current->thread.debugreg[7], 7); - } /* Whee! Actually deliver the signal. */ - return handle_signal(signr, &info, &ka, oldset, regs); + if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { + /* a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } + + return; } - no_signal: +no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ - if (regs->eax == -ERESTARTNOHAND || - regs->eax == -ERESTARTSYS || - regs->eax == -ERESTARTNOINTR) { + switch (regs->eax) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: regs->eax = regs->orig_eax; regs->eip -= 2; - } - if (regs->eax == -ERESTART_RESTARTBLOCK){ + break; + + case -ERESTART_RESTARTBLOCK: regs->eax = __NR_restart_syscall; regs->eip -= 2; + break; } } - return 0; + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } /* * notification of userspace execution resumption - * - triggered by current->work.notify_resume + * - triggered by the TIF_WORK_MASK flags */ __attribute__((regparm(3))) -void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, +void do_notify_resume(struct pt_regs *regs, void *_unused, __u32 thread_info_flags) { /* Pending single-step? */ @@ -667,9 +653,10 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, regs->eflags |= TF_MASK; clear_thread_flag(TIF_SINGLESTEP); } + /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs,oldset); + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + do_signal(regs); clear_thread_flag(TIF_IRET); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9ed449af8e9..255adb49826 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -875,8 +875,7 @@ static inline struct task_struct * alloc_idle_task(int cpu) /* initialize thread_struct. we really want to avoid destroy * idle tread */ - idle->thread.esp = (unsigned long)(((struct pt_regs *) - (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); + idle->thread.esp = (unsigned long)task_pt_regs(idle); init_idle(idle, cpu); return idle; } @@ -903,6 +902,12 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; + if (!cpu_gdt_descr[cpu].address && + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { + printk("Failed to allocate GDT for CPU %d\n", cpu); + return 1; + } + ++cpucount; /* @@ -1090,6 +1095,7 @@ static void smp_tune_scheduling (void) cachesize = 16; /* Pentiums, 2x8kB cache */ bandwidth = 100; } + max_cache_size = cachesize * 1024; } } diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 9b21a31d4f4..1b665928336 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -1,4 +1,3 @@ -.data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit @@ -294,3 +293,19 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat /* 295 */ + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_newfstatat /* 300 */ + .long sys_unlinkat + .long sys_renameat + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat /* 305 */ + .long sys_fchmodat + .long sys_faccessat + .long sys_pselect6 + .long sys_ppoll diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 41c5b2dc620..a14d594bfbe 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -302,6 +302,12 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) do_timer_interrupt(irq, regs); write_sequnlock(&xtime_lock); + +#ifdef CONFIG_X86_LOCAL_APIC + if (using_apic_timer) + smp_send_timer_broadcast_ipi(regs); +#endif + return IRQ_HANDLED; } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 9caeaa315cd..a529f0cdce1 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include <linux/mc146818rtc.h> #include <linux/rtc.h> -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index d395e3b4248..47675bbbb31 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -330,7 +330,9 @@ int recalibrate_cpu_khz(void) unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { + local_irq_disable(); init_cpu_khz(); + local_irq_enable(); cpu_data[0].loops_per_jiffy = cpufreq_scale(cpu_data[0].loops_per_jiffy, cpu_khz_old, diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f0dffa03fbb..0aaebf3e1cf 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -112,33 +112,38 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } +static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +{ + printk(log_lvl); + printk(" [<%08lx>] ", addr); + print_symbol("%s", addr); + printk("\n"); +} + static inline unsigned long print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long ebp) + unsigned long *stack, unsigned long ebp, + char *log_lvl) { unsigned long addr; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - printk(" [<%08lx>] ", addr); - print_symbol("%s", addr); - printk("\n"); + print_addr_and_symbol(addr, log_lvl); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (__kernel_text_address(addr)) { - printk(" [<%08lx>]", addr); - print_symbol(" %s", addr); - printk("\n"); - } + if (__kernel_text_address(addr)) + print_addr_and_symbol(addr, log_lvl); } #endif return ebp; } -void show_trace(struct task_struct *task, unsigned long * stack) +static void show_trace_log_lvl(struct task_struct *task, + unsigned long *stack, char *log_lvl) { unsigned long ebp; @@ -157,15 +162,21 @@ void show_trace(struct task_struct *task, unsigned long * stack) struct thread_info *context; context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = print_context_stack(context, stack, ebp); + ebp = print_context_stack(context, stack, ebp, log_lvl); stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(" =======================\n"); + printk(KERN_EMERG " =======================\n"); } } -void show_stack(struct task_struct *task, unsigned long *esp) +void show_trace(struct task_struct *task, unsigned long * stack) +{ + show_trace_log_lvl(task, stack, ""); +} + +static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, + char *log_lvl) { unsigned long *stack; int i; @@ -178,15 +189,26 @@ void show_stack(struct task_struct *task, unsigned long *esp) } stack = esp; + printk(log_lvl); for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) - printk("\n "); + if (i && ((i % 8) == 0)) { + printk("\n"); + printk(log_lvl); + printk(" "); + } printk("%08lx ", *stack++); } - printk("\nCall Trace:\n"); - show_trace(task, esp); + printk("\n"); + printk(log_lvl); + printk("Call Trace:\n"); + show_trace_log_lvl(task, esp, log_lvl); +} + +void show_stack(struct task_struct *task, unsigned long *esp) +{ + show_stack_log_lvl(task, esp, ""); } /* @@ -216,18 +238,18 @@ void show_registers(struct pt_regs *regs) ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" - " (%s) \n", + printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" + "EFLAGS: %08lx (%s) \n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags, system_utsname.release); - print_symbol("EIP is at %s\n", regs->eip); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); + printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", + printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", current->comm, current->pid, current_thread_info(), current); /* * When in-kernel, we also print out the stack and code at the @@ -236,10 +258,10 @@ void show_registers(struct pt_regs *regs) if (in_kernel) { u8 __user *eip; - printk("\nStack: "); - show_stack(NULL, (unsigned long*)esp); + printk("\n" KERN_EMERG "Stack: "); + show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); - printk("Code: "); + printk(KERN_EMERG "Code: "); eip = (u8 __user *)regs->eip - 43; for (i = 0; i < 64; i++, eip++) { @@ -280,15 +302,15 @@ static void handle_BUG(struct pt_regs *regs) (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) file = "<bad filename>"; - printk("------------[ cut here ]------------\n"); - printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); + printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); no_bug: return; /* Here we know it was a BUG but file-n-line is unavailable */ bug: - printk("Kernel BUG\n"); + printk(KERN_EMERG "Kernel BUG\n"); } /* This is gone through when something in the kernel @@ -306,28 +328,35 @@ void die(const char * str, struct pt_regs * regs, long err) .lock_owner_depth = 0 }; static int die_counter; + unsigned long flags; if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irq(&die.lock); + spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } + else + local_save_flags(flags); if (++die.lock_owner_depth < 3) { int nl = 0; handle_BUG(regs); - printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + printk(KERN_EMERG "PREEMPT "); nl = 1; #endif #ifdef CONFIG_SMP + if (!nl) + printk(KERN_EMERG); printk("SMP "); nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC + if (!nl) + printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); nl = 1; #endif @@ -336,11 +365,11 @@ void die(const char * str, struct pt_regs * regs, long err) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_registers(regs); } else - printk(KERN_ERR "Recursive die() failure, output suppressed\n"); + printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); bust_spinlocks(0); die.lock_owner = -1; - spin_unlock_irq(&die.lock); + spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current)) crash_kexec(regs); @@ -452,7 +481,7 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) @@ -524,8 +553,10 @@ gp_in_kernel: static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips\n"); + printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " + "to continue\n"); + printk(KERN_EMERG "You probably have a hardware problem with your RAM " + "chips\n"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -535,7 +566,7 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) { unsigned long i; - printk("NMI: IOCK error (debug interrupt?)\n"); + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); /* Re-enable the IOCK line, wait for a few seconds */ @@ -577,11 +608,11 @@ void die_nmi (struct pt_regs *regs, const char *msg) * to get a message out. */ bust_spinlocks(1); - printk(msg); + printk(KERN_EMERG "%s", msg); printk(" on CPU%d, eip %08lx, registers:\n", smp_processor_id(), regs->eip); show_registers(regs); - printk("console shuts up ...\n"); + printk(KERN_EMERG "console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); @@ -987,8 +1018,8 @@ asmlinkage void math_state_restore(struct pt_regs regs) asmlinkage void math_emulate(long arg) { - printk("math-emulation not enabled and no coprocessor found.\n"); - printk("killing %s.\n",current->comm); + printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n",current->comm); force_sig(SIGFPE,current); schedule(); } @@ -1075,9 +1106,9 @@ void __init trap_init(void) set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); - set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ + set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ set_system_gate(4,&overflow); - set_system_gate(5,&bounds); + set_trap_gate(5,&bounds); set_trap_gate(6,&invalid_op); set_trap_gate(7,&device_not_available); set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); @@ -1095,6 +1126,28 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); + if (cpu_has_fxsr) { + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generates a compile-time "error: zero width for bit-field" if + * the alignment is wrong. + */ + struct fxsrAlignAssert { + int _:!(offsetof(struct task_struct, + thread.i387.fxsave) & 15); + }; + + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO "Enabling unmasked SIMD FPU exception " + "support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } + set_system_gate(SYSCALL_VECTOR,&system_call); /* diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index fc1993564f9..f51c894a7da 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -4,7 +4,7 @@ * Copyright (C) 1994 Linus Torvalds * * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 - * stack - Manfred Spraul <manfreds@colorfullife.com> + * stack - Manfred Spraul <manfred@colorfullife.com> * * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle * them correctly. Now the emulation will be in a @@ -30,6 +30,7 @@ * */ +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/interrupt.h> @@ -310,7 +311,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk "movl %1,%%ebp\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (tsk->thread_info) : "ax"); + :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax"); /* we never return here */ } |