aboutsummaryrefslogtreecommitdiff
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig54
-rw-r--r--arch/i386/Kconfig.cpu2
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/crypto/aes-i586-asm.S29
-rw-r--r--arch/i386/crypto/aes.c20
-rw-r--r--arch/i386/kernel/Makefile8
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c16
-rw-r--r--arch/i386/kernel/apm.c6
-rw-r--r--arch/i386/kernel/asm-offsets.c7
-rw-r--r--arch/i386/kernel/cpu/amd.c22
-rw-r--r--arch/i386/kernel/cpu/common.c25
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c6
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/i386/kernel/cpu/cyrix.c2
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c123
-rw-r--r--arch/i386/kernel/cpu/proc.c8
-rw-r--r--arch/i386/kernel/cpuid.c2
-rw-r--r--arch/i386/kernel/crash.c12
-rw-r--r--arch/i386/kernel/doublefault.c3
-rw-r--r--arch/i386/kernel/entry.S285
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/i8259.c2
-rw-r--r--arch/i386/kernel/io_apic.c49
-rw-r--r--arch/i386/kernel/irq.c14
-rw-r--r--arch/i386/kernel/kprobes.c95
-rw-r--r--arch/i386/kernel/machine_kexec.c4
-rw-r--r--arch/i386/kernel/msr.c2
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/scx200.c66
-rw-r--r--arch/i386/kernel/setup.c126
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c52
-rw-r--r--arch/i386/kernel/sysenter.c128
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/topology.c28
-rw-r--r--arch/i386/kernel/traps.c70
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S4
-rw-r--r--arch/i386/kernel/vsyscall.lds.S4
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/lib/usercopy.c119
-rw-r--r--arch/i386/mach-default/setup.c43
-rw-r--r--arch/i386/mach-visws/setup.c49
-rw-r--r--arch/i386/mach-voyager/setup.c79
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c2
-rw-r--r--arch/i386/mm/fault.c38
-rw-r--r--arch/i386/mm/init.c5
-rw-r--r--arch/i386/mm/pageattr.c4
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/i386/pci/i386.c2
-rw-r--r--arch/i386/pci/pcbios.c6
72 files changed, 2178 insertions, 2547 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf..3bb221db164 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -229,7 +233,7 @@ config NR_CPUS
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
- depends on SMP
+ depends on X86_HT
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -238,7 +242,7 @@ config SCHED_SMT
config SCHED_MC
bool "Multi-core scheduler support"
- depends on SMP
+ depends on X86_HT
default y
help
Multi-core scheduler support improves the CPU scheduler's decision
@@ -324,6 +328,15 @@ config X86_MCE_P4THERMAL
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
+config VM86
+ default y
+ bool "Enable VM86 support" if EMBEDDED
+ help
+ This option is required by programs like DOSEMU to run 16-bit legacy
+ code on X86 processors. It also may be needed by software like
+ XFree86 to initialize some video cards via BIOS. Disabling this
+ option saves about 6k.
+
config TOSHIBA
tristate "Toshiba Laptop support"
---help---
@@ -721,7 +734,7 @@ config KEXEC
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
- but it is indepedent of the system firmware. And like a reboot
+ but it is independent of the system firmware. And like a reboot
you can start any kernel with it, not just Linux.
The name comes from the similiarity to the exec system call.
@@ -767,6 +780,17 @@ config HOTPLUG_CPU
enable suspend on SMP systems. CPUs can be controlled through
/sys/devices/system/cpu.
+config COMPAT_VDSO
+ bool "Compat VDSO support"
+ default y
+ help
+ Map the VDSO to the predictable old-style address too.
+ ---help---
+ Say N here if you are running a sufficiently recent glibc
+ version (2.3.3 or later), to remove the high-mapped
+ VDSO mapping and to exclusively use the randomized VDSO.
+
+ If unsure, say Y.
endmenu
@@ -1046,13 +1070,27 @@ config SCx200
tristate "NatSemi SCx200 support"
depends on !X86_VOYAGER
help
- This provides basic support for the National Semiconductor SCx200
- processor. Right now this is just a driver for the GPIO pins.
+ This provides basic support for National Semiconductor's
+ (now AMD's) Geode processors. The driver probes for the
+ PCI-IDs of several on-chip devices, so its a good dependency
+ for other scx200_* drivers.
- If you don't know what to do here, say N.
+ If compiled as a module, the driver is named scx200.
- This support is also available as a module. If compiled as a
- module, it will be called scx200.
+config SCx200HR_TIMER
+ tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
+ depends on SCx200 && GENERIC_TIME
+ default y
+ help
+ This driver provides a clocksource built upon the on-chip
+ 27MHz high-resolution timer. Its also a workaround for
+ NSC Geode SC-1100's buggy TSC, which loses time when the
+ processor goes idle (as is done by the scheduler). The
+ other workaround is idle=poll boot option.
+
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64
source "drivers/pcmcia/Kconfig"
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index eb130482ba1..21c9a4e7110 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -41,7 +41,7 @@ config M386
- "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- "Geode GX/LX" For AMD Geode GX and LX processors.
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
+ - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
If you don't know what to do, choose "386".
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e55476381..e9794662606 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
isoimage: $(BOOTIMAGE)
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
- cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
- $(obj)/isoimage
+ for i in lib lib64 share end ; do \
+ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+ break ; \
+ fi ; \
+ if [ $$i = end ] ; then exit 1 ; fi ; \
+ done
cp $(BOOTIMAGE) $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7492a..b2ccd543410 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
#undef memset
#undef memcpy
-
-/*
- * Why do we do this? Don't ask me..
- *
- * Incomprehensible are the ways of bootloaders.
- */
-static void* memset(void *, int, size_t);
-static void* memcpy(void *, __const void *, size_t);
#define memzero(s, n) memset ((s), 0, (n))
typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
#endif
#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-extern char input_data[];
+extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
static void putstr(const char *);
extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
outb_p(0xff & (pos >> 1), vidport+1);
}
-static void* memset(void* s, int c, size_t n)
+static void* memset(void* s, int c, unsigned n)
{
int i;
char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
return s;
}
-static void* memcpy(void* __dest, __const void* __src,
- size_t __n)
+static void* memcpy(void* dest, const void* src, unsigned n)
{
int i;
- char *d = (char *)__dest, *s = (char *)__src;
+ char *d = (char *)dest, *s = (char *)src;
- for (i=0;i<__n;i++) d[i] = s[i];
- return __dest;
+ for (i=0;i<n;i++) d[i] = s[i];
+ return dest;
}
/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
#else
if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
#ifdef STANDARD_MEMORY_BIOS_CALL
if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
- (3*1024))
- error("Less than 4MB of memory");
+ if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
- mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+ mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a808..8c2a6faeeae 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
ret
store_edid:
-#ifdef CONFIG_FB_FIRMWARE_EDID
+#ifdef CONFIG_FIRMWARE_EDID
pushw %es # just save all registers
pushw %ax
pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
rep
stosl
+ pushw %es # save ES
+ xorw %di, %di # Report Capability
+ pushw %di
+ popw %es # ES:DI must be 0:0
+ movw $0x4f15, %ax
+ xorw %bx, %bx
+ xorw %cx, %cx
+ int $0x10
+ popw %es # restore ES
+
+ cmpb $0x00, %ah # call successful
+ jne no_edid
+
+ cmpb $0x4f, %al # function supported
+ jne no_edid
+
movw $0x4f15, %ax # do VBE/DDC
movw $0x01, %bx
movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
movw $0x140, %di
int $0x10
+no_edid:
popw %di # restore all registers
popw %dx
popw %cx
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
index 911b15377f2..f942f0c8f63 100644
--- a/arch/i386/crypto/aes-i586-asm.S
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -36,22 +36,19 @@
.file "aes-i586-asm.S"
.text
-// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-
-#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
+#include <asm/asm-offsets.h>
-// offsets to parameters with one register pushed onto stack
-
-#define in_blk 8 // input byte array address parameter
-#define out_blk 12 // output byte array address parameter
-#define ctx 16 // AES context structure
+#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
-// offsets in context structure
+/* offsets to parameters with one register pushed onto stack */
+#define tfm 8
+#define out_blk 12
+#define in_blk 16
-#define ekey 0 // encryption key schedule base address
-#define nrnd 256 // number of rounds
-#define dkey 260 // decryption key schedule base address
+/* offsets in crypto_tfm structure */
+#define ekey (crypto_tfm_ctx_offset + 0)
+#define nrnd (crypto_tfm_ctx_offset + 256)
+#define dkey (crypto_tfm_ctx_offset + 260)
// register mapping for encrypt and decrypt subroutines
@@ -220,6 +217,7 @@
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
+/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
@@ -230,7 +228,7 @@
aes_enc_blk:
push %ebp
- mov ctx(%esp),%ebp // pointer to context
+ mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
@@ -295,6 +293,7 @@ aes_enc_blk:
ret
// AES (Rijndael) Decryption Subroutine
+/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
@@ -305,7 +304,7 @@ aes_enc_blk:
aes_dec_blk:
push %ebp
- mov ctx(%esp),%ebp // pointer to context
+ mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index a50397b1d5c..d3806daa3de 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -45,8 +45,8 @@
#include <linux/crypto.h>
#include <linux/linkage.h>
-asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
-asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
#define AES_MIN_KEY_SIZE 16
#define AES_MAX_KEY_SIZE 32
@@ -378,12 +378,12 @@ static void gen_tabs(void)
k[8*(i)+11] = ss[3]; \
}
-static int
-aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len, u32 *flags)
{
int i;
u32 ss[8];
- struct aes_ctx *ctx = ctx_arg;
+ struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
const __le32 *key = (const __le32 *)in_key;
/* encryption schedule */
@@ -464,16 +464,16 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
return 0;
}
-static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_enc_blk(src, dst, ctx);
+ aes_enc_blk(tfm, dst, src);
}
-static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_dec_blk(src, dst, ctx);
+ aes_dec_blk(tfm, dst, src);
}
-
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-i586",
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af..5e70c2fb273 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o \
- quirks.o i8237.o topology.o alternative.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
obj-y += cpu/
-obj-y += timers/
obj-y += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
+obj-$(CONFIG_K8_NB) += k8.o
EXTRA_AFLAGS := -traditional
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
$(call if_changed,syscall)
+
+k8-y += ../../x86_64/kernel/k8.o
+
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f99fb2..50eb0e03777 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement = 0;
+static int smp_alt_once = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 1;
+}
+static int __init bootonly(char *str)
+{
+ smp_alt_once = 1;
+ return 1;
+}
+static int __init debug_alt(char *str)
+{
+ debug_alternative = 1;
+ return 1;
+}
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+ printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
intelnops + 1 + 2 + 3 + 4 + 5 + 6,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+ K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+ K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
k7nops + 1 + 2 + 3 + 4 + 5 + 6,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+ return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
static struct nop {
int cpuid;
unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
{ -1, NULL }
};
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
static unsigned char** find_nop_table(void)
{
unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
return noptable;
}
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
unsigned char **noptable = find_nop_table();
struct alt_instr *a;
+ u8 *instr;
int diff, i, k;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
BUG_ON(a->replacementlen > a->instrlen);
if (!boot_cpu_has(a->cpuid))
continue;
- memcpy(a->instr, a->replacement, a->replacementlen);
+ instr = a->instr;
+#ifdef CONFIG_X86_64
+ /* vsyscall code is not mapped yet. resolve it manually. */
+ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+ instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+ DPRINTK("%s: vsyscall fixup: %p => %p\n",
+ __FUNCTION__, a->instr, instr);
+ }
+#endif
+ memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
+ if (no_replacement)
+ return;
+
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
struct smp_alt_module *mod;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
void __init alternative_instructions(void)
{
+ if (no_replacement) {
+ printk(KERN_INFO "(SMP-)alternatives turned off\n");
+ free_init_pages("SMP alternatives",
+ (unsigned long)__smp_alt_begin,
+ (unsigned long)__smp_alt_end);
+ return;
+ }
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 5ab59c12335..7ce09492fc0 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/i8253.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
maxlvt = get_maxlvt();
/*
- * Masking an LVT entry on a P6 can trigger a local APIC error
+ * Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
*/
if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
unsigned long v;
v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ /*
+ * When an illegal vector value (0-15) is written to an LVT
+ * entry and delivery mode is Fixed, the APIC may signal an
+ * illegal vector error, with out regard to whether the mask
+ * bit is set or whether an interrupt is actually seen on input.
+ *
+ * Boot sequence might call this function when the LVTT has
+ * '0' vector value. So make sure vector field is set to
+ * valid value.
+ */
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write_around(APIC_LVTT, v);
}
}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 9e819eb6822..7c5729d1fd0 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
int idled = 0;
int polling;
- polling = test_thread_flag(TIF_POLLING_NRFLAG);
+ polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
}
if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
}
if (polling)
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
if (!idled)
return 0;
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 36d66e2077d..c80271f8f08 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
* to extract and format the required data.
*/
+#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
@@ -13,6 +14,7 @@
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <asm/elf.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -53,6 +55,7 @@ void foo(void)
OFFSET(TI_preempt_count, thread_info, preempt_count);
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
BLANK();
OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
@@ -68,5 +71,7 @@ void foo(void)
sizeof(struct tss_struct));
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+ DEFINE(VDSO_PRELINK, VDSO_PRELINK);
+
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a57048..e6a2d6b80cd 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,22 +224,26 @@ static void __init init_amd(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
/*
- * On a AMD dual core setup the lower bits of the APIC id
- * distingush the cores. Assumes number of cores is a power
- * of two.
+ * On a AMD multi core setup the lower bits of the APIC id
+ * distingush the cores.
*/
if (c->x86_max_cores > 1) {
int cpu = smp_processor_id();
- unsigned bits = 0;
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
- phys_proc_id[cpu] >>= bits;
+ unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+ c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+ c->phys_proc_id >>= bits;
printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, cpu_core_id[cpu]);
+ cpu, c->x86_max_cores, c->cpu_core_id);
}
#endif
+ if (cpuid_eax(0x80000000) >= 0x80000006)
+ num_cache_leaves = 3;
}
static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 44f2c5f2dda..70c87de582c 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -294,7 +294,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
c->x86_mask = tfms & 15;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_HT
c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
#else
c->apicid = (ebx >> 24) & 0xFF;
@@ -319,7 +319,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
early_intel_workaround(c);
#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
}
@@ -477,11 +477,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
- int cpu = smp_processor_id();
cpuid(1, &eax, &ebx, &ecx, &edx);
-
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
@@ -492,16 +490,17 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
} else if (smp_num_siblings > 1 ) {
if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+ printk(KERN_WARNING "CPU: Unsupported number of the "
+ "siblings %d", smp_num_siblings);
smp_num_siblings = 1;
return;
}
index_msb = get_count_order(smp_num_siblings);
- phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id);
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
@@ -509,12 +508,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
core_bits = get_count_order(c->x86_max_cores);
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
((1 << core_bits) - 1);
if (c->x86_max_cores > 1)
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
+ c->cpu_core_id);
}
}
#endif
@@ -613,6 +612,12 @@ void __cpuinit cpu_init(void)
set_in_cr4(X86_CR4_TSD);
}
+ /* The CPU hotplug case */
+ if (cpu_gdt_descr->address) {
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ memset(gdt, 0, PAGE_SIZE);
+ goto old_gdt;
+ }
/*
* This is a horrible hack to allocate the GDT. The problem
* is that cpu_init() is called really early for the boot CPU
@@ -631,7 +636,7 @@ void __cpuinit cpu_init(void)
local_irq_enable();
}
}
-
+old_gdt:
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 05668e3598c..5fd65325b81 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -371,11 +371,11 @@ static int acpi_cpufreq_early_init_acpi(void)
dprintk("acpi_cpufreq_early_init\n");
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
data = kzalloc(sizeof(struct acpi_processor_performance),
GFP_KERNEL);
if (!data) {
- for_each_cpu(j) {
+ for_each_possible_cpu(j) {
kfree(acpi_perf_data[j]);
acpi_perf_data[j] = NULL;
}
@@ -584,7 +584,7 @@ acpi_cpufreq_exit (void)
cpufreq_unregister_driver(&acpi_cpufreq_driver);
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
kfree(acpi_perf_data[i]);
acpi_perf_data[i] = NULL;
}
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 31c3a5baaa7..f7e4356f682 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -361,11 +361,11 @@ static int centrino_cpu_early_init_acpi(void)
unsigned int i, j;
struct acpi_processor_performance *data;
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
data = kzalloc(sizeof(struct acpi_processor_performance),
GFP_KERNEL);
if (!data) {
- for_each_cpu(j) {
+ for_each_possible_cpu(j) {
kfree(acpi_perf_data[j]);
acpi_perf_data[j] = NULL;
}
@@ -805,7 +805,7 @@ static void __exit centrino_exit(void)
cpufreq_unregister_driver(&centrino_driver);
#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- for_each_cpu(j) {
+ for_each_possible_cpu(j) {
kfree(acpi_perf_data[j]);
acpi_perf_data[j] = NULL;
}
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index fc32c8028e2..f03b7f94c30 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -354,7 +354,7 @@ static void __init init_nsc(struct cpuinfo_x86 *c)
* This function only handles the GX processor, and kicks every
* thing else to the Cyrix init function above - that should
* cover any processors that might have been branded differently
- * after NSC aquired Cyrix.
+ * after NSC acquired Cyrix.
*
* If this breaks your GX1 horribly, please e-mail
* info-linux@ldcmail.amd.com to tell us.
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29bb5a..10afc645c54 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
select_idle_routine(c);
l2 = init_intel_cacheinfo(c);
+ if (c->cpuid_level > 9 ) {
+ unsigned eax = cpuid_eax(10);
+ /* Check for version and the number of counters */
+ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+ set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+ }
/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6fa7e..e9f0b928b0a 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ * Andi Kleen : CPUID4 emulation on AMD.
*/
#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
cpumask_t shared_cpu_map;
};
-static unsigned short num_cache_leaves;
+unsigned short num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+ information to the user. This makes some assumptions about the machine:
+ No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+ In theory the TLBs could be reported as fake type (they are in "dummy").
+ Maybe later */
+union l1_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 8;
+ unsigned assoc : 8;
+ unsigned size_in_kb : 8;
+ };
+ unsigned val;
+};
+
+union l2_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 4;
+ unsigned assoc : 4;
+ unsigned size_in_kb : 16;
+ };
+ unsigned val;
+};
+
+static const unsigned short assocs[] = {
+ [1] = 1, [2] = 2, [4] = 4, [6] = 8,
+ [8] = 16,
+ [0xf] = 0xffff // ??
+ };
+static const unsigned char levels[] = { 1, 1, 2 };
+static const unsigned char types[] = { 1, 2, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
+{
+ unsigned dummy;
+ unsigned line_size, lines_per_tag, assoc, size_in_kb;
+ union l1_cache l1i, l1d;
+ union l2_cache l2;
+
+ eax->full = 0;
+ ebx->full = 0;
+ ecx->full = 0;
+
+ cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+ cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
+
+ if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
+ return;
+
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[leaf];
+ eax->split.level = levels[leaf];
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+ if (leaf <= 1) {
+ union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+ assoc = l1->assoc;
+ line_size = l1->line_size;
+ lines_per_tag = l1->lines_per_tag;
+ size_in_kb = l1->size_in_kb;
+ } else {
+ assoc = l2.assoc;
+ line_size = l2.line_size;
+ lines_per_tag = l2.lines_per_tag;
+ /* cpu_data has errata corrections for K7 applied */
+ size_in_kb = current_cpu_data.x86_cache_size;
+ }
+
+ if (assoc == 0xf)
+ eax->split.is_fully_associative = 1;
+ ebx->split.coherency_line_size = line_size - 1;
+ ebx->split.ways_of_associativity = assocs[assoc] - 1;
+ ebx->split.physical_line_partition = lines_per_tag - 1;
+ ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+ (ebx->split.ways_of_associativity + 1) - 1;
+}
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ unsigned edx;
- cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- if (cache_eax.split.type == CACHE_TYPE_NULL)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ amd_cpuid4(index, &eax, &ebx, &ecx);
+ else
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
- this_leaf->eax.full = eax;
- this_leaf->ebx.full = ebx;
- this_leaf->ecx.full = ecx;
- this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
- (this_leaf->ebx.split.coherency_line_size + 1) *
- (this_leaf->ebx.split.physical_line_partition + 1) *
- (this_leaf->ebx.split.ways_of_associativity + 1);
+ this_leaf->eax = eax;
+ this_leaf->ebx = ebx;
+ this_leaf->ecx = ecx;
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
@@ -174,7 +261,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_HT
unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
#endif
@@ -296,14 +383,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
if (new_l2) {
l2 = new_l2;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_HT
cpu_llc_id[cpu] = l2_id;
#endif
}
if (new_l3) {
l3 = new_l3;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_HT
cpu_llc_id[cpu] = l3_id;
#endif
}
@@ -642,7 +729,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
return;
}
-static int cacheinfo_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
@@ -660,7 +747,7 @@ static int cacheinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block cacheinfo_cpu_notifier =
+static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
{
.notifier_call = cacheinfo_cpu_callback,
};
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index a19fcb262db..f54a15268ed 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -18,7 +18,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
* applications want to get the raw CPUID data, they should access
* /dev/cpu/<cpu_nr>/cpuid instead.
*/
- static char *x86_cap_flags[] = {
+ static const char * const x86_cap_flags[] = {
/* Intel-defined */
"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
@@ -62,7 +62,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
- static char *x86_power_flags[] = {
+ static const char * const x86_power_flags[] = {
"ts", /* temperature sensor */
"fid", /* frequency id control */
"vid", /* voltage id control */
@@ -109,9 +109,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
#ifdef CONFIG_X86_HT
if (c->x86_max_cores * smp_num_siblings > 1) {
- seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
+ seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
- seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
+ seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
#endif
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index 1d9a4abcdfc..f6dfa9fb675 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -183,7 +183,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac
return NOTIFY_OK;
}
-static struct notifier_block cpuid_class_cpu_notifier =
+static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
{
.notifier_call = cpuid_class_cpu_callback,
};
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 2b0cfce24a6..48f0f62f781 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -114,19 +114,15 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
halt();
- for(;;);
+ for (;;)
+ cpu_relax();
return 1;
}
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want. AND it does
- * not matter if clustered_apic_mode is set or not.
- */
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(NMI_VECTOR);
}
static void nmi_shootdown_cpus(void)
@@ -162,7 +158,7 @@ static void nmi_shootdown_cpus(void)
void machine_crash_shutdown(struct pt_regs *regs)
{
/* This function is only called after the system
- * has paniced or is otherwise in a critical state.
+ * has panicked or is otherwise in a critical state.
* The minimum amount of code to allow a kexec'd kernel
* to run successfully needs to happen here.
*
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 5edb1d379ad..b4d14c2eb34 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -44,7 +44,8 @@ static void doublefault_fn(void)
}
}
- for (;;) /* nothing */;
+ for (;;)
+ cpu_relax();
}
struct tss_struct doublefault_tss __cacheline_aligned = {
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f153b..fbdb933251b 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
+#include <asm/dwarf2.h>
#include "irq_vectors.h"
#define nr_syscalls ((syscall_table_size)/4)
@@ -82,34 +83,76 @@ VM_MASK = 0x00020000
#define resume_kernel restore_nocheck
#endif
+#ifdef CONFIG_VM86
+#define resume_userspace_sig check_userspace
+#else
+#define resume_userspace_sig resume_userspace
+#endif
+
#define SAVE_ALL \
cld; \
pushl %es; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET es, 0;*/\
pushl %ds; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET eax, 0;\
pushl %ebp; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebp, 0;\
pushl %edi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edi, 0;\
pushl %esi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET esi, 0;\
pushl %edx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edx, 0;\
pushl %ecx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ecx, 0;\
pushl %ebx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es;
#define RESTORE_INT_REGS \
popl %ebx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebx;\
popl %ecx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ecx;\
popl %edx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edx;\
popl %esi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE esi;\
popl %edi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edi;\
popl %ebp; \
- popl %eax
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebp;\
+ popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE eax
#define RESTORE_REGS \
RESTORE_INT_REGS; \
1: popl %ds; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE ds;*/\
2: popl %es; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE es;*/\
.section .fixup,"ax"; \
3: movl $0,(%esp); \
jmp 1b; \
@@ -122,13 +165,43 @@ VM_MASK = 0x00020000
.long 2b,4b; \
.previous
+#define RING0_INT_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 3*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 4*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, OLDESP-EBX;\
+ /*CFI_OFFSET cs, CS-OLDESP;*/\
+ CFI_OFFSET eip, EIP-OLDESP;\
+ /*CFI_OFFSET es, ES-OLDESP;*/\
+ /*CFI_OFFSET ds, DS-OLDESP;*/\
+ CFI_OFFSET eax, EAX-OLDESP;\
+ CFI_OFFSET ebp, EBP-OLDESP;\
+ CFI_OFFSET edi, EDI-OLDESP;\
+ CFI_OFFSET esi, ESI-OLDESP;\
+ CFI_OFFSET edx, EDX-OLDESP;\
+ CFI_OFFSET ecx, ECX-OLDESP;\
+ CFI_OFFSET ebx, EBX-OLDESP
ENTRY(ret_from_fork)
+ CFI_STARTPROC
pushl %eax
+ CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%ebp)
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jmp syscall_exit
+ CFI_ENDPROC
/*
* Return to user mode is not as complex as all this looks,
@@ -139,10 +212,12 @@ ENTRY(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
+ RING0_PTREGS_FRAME
ret_from_exception:
preempt_stop
ret_from_intr:
GET_THREAD_INFO(%ebp)
+check_userspace:
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
movb CS(%esp), %al
testl $(VM_MASK | 3), %eax
@@ -171,20 +246,38 @@ need_resched:
call preempt_schedule_irq
jmp need_resched
#endif
+ CFI_ENDPROC
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
# sysenter call handler stub
ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
sti
pushl $(__USER_DS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ss, 0*/
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esp, 0
pushfl
+ CFI_ADJUST_CFA_OFFSET 4
pushl $(__USER_CS)
- pushl $SYSENTER_RETURN
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET cs, 0*/
+ /*
+ * Push current_thread_info()->sysenter_return to the stack.
+ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+ * pushed above; +8 corresponds to copy_thread's esp0 setting.
+ */
+ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
/*
* Load the potential sixth argument from user stack.
@@ -199,6 +292,7 @@ sysenter_past_esp:
.previous
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
@@ -219,11 +313,14 @@ sysenter_past_esp:
xorl %ebp,%ebp
sti
sysexit
+ CFI_ENDPROC
# system call handler stub
ENTRY(system_call)
+ RING0_INT_FRAME # can't unwind into user space anyway
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +353,12 @@ restore_all:
movb CS(%esp), %al
andl $(VM_MASK | (4 << 8) | 3), %eax
cmpl $((4 << 8) | 3), %eax
+ CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
RESTORE_REGS
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
1: iret
.section .fixup,"ax"
iret_exc:
@@ -273,6 +372,7 @@ iret_exc:
.long 1b,iret_exc
.previous
+ CFI_RESTORE_STATE
ldt_ss:
larl OLDSS(%esp), %eax
jnz restore_nocheck
@@ -285,11 +385,13 @@ ldt_ss:
* CPUs, which we can try to work around to make
* dosemu and wine happy. */
subl $8, %esp # reserve space for switch16 pointer
+ CFI_ADJUST_CFA_OFFSET 8
cli
movl %esp, %eax
/* Set up the 16bit stack frame with switch32 pointer on top,
* and a switch16 pointer on top of the current frame. */
call setup_x86_bogus_stack
+ CFI_ADJUST_CFA_OFFSET -8 # frame has moved
RESTORE_REGS
lss 20+4(%esp), %esp # switch to 16bit stack
1: iret
@@ -297,9 +399,11 @@ ldt_ss:
.align 4
.long 1b,iret_exc
.previous
+ CFI_ENDPROC
# perform work that needs to be done immediately before resumption
ALIGN
+ RING0_PTREGS_FRAME # can't unwind into user space anyway
work_pending:
testb $_TIF_NEED_RESCHED, %cl
jz work_notifysig
@@ -323,18 +427,20 @@ work_notifysig: # deal with pending signals and
# vm86-space
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace
+ jmp resume_userspace_sig
ALIGN
work_notifysig_v86:
#ifdef CONFIG_VM86
pushl %ecx # save ti_flags for do_notify_resume
+ CFI_ADJUST_CFA_OFFSET 4
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
movl %eax, %esp
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace
+ jmp resume_userspace_sig
#endif
# perform syscall exit tracing
@@ -363,19 +469,21 @@ syscall_exit_work:
movl $1, %edx
call do_syscall_trace
jmp resume_userspace
+ CFI_ENDPROC
- ALIGN
+ RING0_INT_FRAME # can't unwind into user space anyway
syscall_fault:
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
movl $-EFAULT,EAX(%esp)
jmp resume_userspace
- ALIGN
syscall_badsys:
movl $-ENOSYS,EAX(%esp)
jmp resume_userspace
+ CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
movl %esp, %eax; \
@@ -387,16 +495,21 @@ syscall_badsys:
movl %eax, %esp;
#define UNWIND_ESPFIX_STACK \
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4; \
movl %ss, %eax; \
/* see if on 16bit stack */ \
cmpw $__ESPFIX_SS, %ax; \
- jne 28f; \
- movl $__KERNEL_DS, %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
+ je 28f; \
+27: popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28: movl $__KERNEL_DS, %eax; \
+ movl %eax, %ds; \
+ movl %eax, %es; \
/* switch to 32bit stack */ \
- FIXUP_ESPFIX_STACK \
-28: popl %eax;
+ FIXUP_ESPFIX_STACK; \
+ jmp 27b; \
+.previous
/*
* Build the entry stubs and pointer table with
@@ -408,9 +521,14 @@ ENTRY(interrupt)
vector=0
ENTRY(irq_entries_start)
+ RING0_INT_FRAME
.rept NR_IRQS
ALIGN
-1: pushl $vector-256
+ .if vector
+ CFI_ADJUST_CFA_OFFSET -4
+ .endif
+1: pushl $~(vector)
+ CFI_ADJUST_CFA_OFFSET 4
jmp common_interrupt
.data
.long 1b
@@ -424,60 +542,99 @@ common_interrupt:
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
+ CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
ENTRY(name) \
- pushl $nr-256; \
- SAVE_ALL \
+ RING0_INT_FRAME; \
+ pushl $~(nr); \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ SAVE_ALL; \
movl %esp,%eax; \
call smp_/**/name; \
- jmp ret_from_intr;
+ jmp ret_from_intr; \
+ CFI_ENDPROC
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
ENTRY(divide_error)
+ RING0_INT_FRAME
pushl $0 # no error code
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_divide_error
+ CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
xorl %eax, %eax
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
decl %eax # eax = -1
pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
cld
pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
UNWIND_ESPFIX_STACK
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_REGISTER es, ecx*/
movl ES(%esp), %edi # get the function address
movl ORIG_EAX(%esp), %edx # get the error code
movl %eax, ORIG_EAX(%esp)
movl %ecx, ES(%esp)
+ /*CFI_REL_OFFSET es, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
+ CFI_ENDPROC
ENTRY(coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(simd_coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_simd_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(device_not_available)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
movl %cr0, %eax
testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +644,12 @@ ENTRY(device_not_available)
jmp ret_from_exception
device_not_available_emulate:
pushl $0 # temporary storage for ORIG_EIP
+ CFI_ADJUST_CFA_OFFSET 4
call math_emulate
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
jmp ret_from_exception
+ CFI_ENDPROC
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +674,19 @@ label: \
pushl $sysenter_past_esp
KPROBE_ENTRY(debug)
+ RING0_INT_FRAME
cmpl $sysenter_entry,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
/*
* NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +697,18 @@ debug_stack_correct:
* fault happened on the sysenter path.
*/
ENTRY(nmi)
+ RING0_INT_FRAME
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
je nmi_16bit_stack
cmpl $sysenter_entry,(%esp)
je nmi_stack_fixup
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
@@ -549,16 +716,19 @@ ENTRY(nmi)
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
cmpl $sysenter_entry,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
jmp restore_all
+ CFI_ENDPROC
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +744,177 @@ nmi_debug_stack_check:
jmp nmi_stack_correct
nmi_16bit_stack:
+ RING0_INT_FRAME
/* create the pointer to lss back */
pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
movzwl %sp, %esp
addw $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
.endr
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
FIXUP_ESPFIX_STACK # %eax == %esp
+ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
xorl %edx,%edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to 16bit stack
1: iret
+ CFI_ENDPROC
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.previous
KPROBE_ENTRY(int3)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
ENTRY(overflow)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_overflow
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(bounds)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_bounds
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_op)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_invalid_op
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(coprocessor_segment_overrun)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_segment_overrun
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_TSS)
+ RING0_EC_FRAME
pushl $do_invalid_TSS
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(segment_not_present)
+ RING0_EC_FRAME
pushl $do_segment_not_present
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(stack_segment)
+ RING0_EC_FRAME
pushl $do_stack_segment
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(general_protection)
+ RING0_EC_FRAME
pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
ENTRY(alignment_check)
+ RING0_EC_FRAME
pushl $do_alignment_check
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(page_fault)
+ RING0_EC_FRAME
pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl machine_check_vector
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
#endif
ENTRY(spurious_interrupt_bug)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_spurious_interrupt_bug
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movl 4(%esp), %edx
+ movl (%esp), %ecx
+ leal 4(%esp), %eax
+ movl %ebx, EBX(%edx)
+ xorl %ebx, %ebx
+ movl %ebx, ECX(%edx)
+ movl %ebx, EDX(%edx)
+ movl %esi, ESI(%edx)
+ movl %edi, EDI(%edx)
+ movl %ebp, EBP(%edx)
+ movl %ebx, EAX(%edx)
+ movl $__USER_DS, DS(%edx)
+ movl $__USER_DS, ES(%edx)
+ movl %ebx, ORIG_EAX(%edx)
+ movl %ecx, EIP(%edx)
+ movl 12(%esp), %ecx
+ movl $__KERNEL_CS, CS(%edx)
+ movl %ebx, EFLAGS(%edx)
+ movl %eax, OLDESP(%edx)
+ movl 8(%esp), %eax
+ movl %ecx, 8(%esp)
+ movl EBX(%edx), %ebx
+ movl $__KERNEL_DS, OLDSS(%edx)
+ jmpl *%eax
+ CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
.section .rodata,"a"
#include "syscall_table.S"
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 00000000000..c6737c35815
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/hpet.h>
+#include <linux/init.h>
+
+#include <asm/hpet.h>
+#include <asm/io.h>
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ return clocksource_register(&clocksource_hpet);
+}
+
+module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 00000000000..477b24daff5
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include <linux/clocksource.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/sysdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/smp.h>
+#include <asm/delay.h>
+#include <asm/i8253.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+void setup_pit_timer(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ udelay(10);
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags;
+ int count;
+ u32 jifs;
+ static int old_count;
+ static u32 old_jifs;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ /*
+ * Although our caller may have the read side of xtime_lock,
+ * this is now a seqlock, and we are cheating in this routine
+ * by having side effects on state that we cannot undo if
+ * there is a collision on the seqlock and our caller has to
+ * retry. (Namely, old_jifs and old_count.) So we must treat
+ * jiffies as volatile despite the lock. We read jiffies
+ * before latching the timer count to guarantee that although
+ * the jiffies value might be older than the count (that is,
+ * the counter may underflow between the last point where
+ * jiffies was incremented and the point where we latch the
+ * count), it cannot be newer.
+ */
+ jifs = jiffies;
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+
+ /*
+ * It's possible for count to appear to go the wrong way for a
+ * couple of reasons:
+ *
+ * 1. The timer counter underflows, but we haven't handled the
+ * resulting interrupt and incremented jiffies yet.
+ * 2. Hardware problem with the timer, not giving us continuous time,
+ * the counter does small "jumps" upwards on some Pentium systems,
+ * (see c't 95/10 page 335 for Neptun bug.)
+ *
+ * Previous attempts to handle these cases intelligently were
+ * buggy, so we just do the simple thing now.
+ */
+ if (count > old_count && jifs == old_jifs) {
+ count = old_count;
+ }
+ old_count = count;
+ old_jifs = jifs;
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ count = (LATCH - 1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = CLOCKSOURCE_MASK(32),
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ return clocksource_register(&clocksource_pit);
+}
+module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index b7636b96e10..c1a42feba28 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -175,7 +175,7 @@ static void mask_and_ack_8259A(unsigned int irq)
* Lightweight spurious IRQ detection. We do not want
* to overdo spurious IRQ handling - it's usually a sign
* of hardware problems, so we only do the checks we can
- * do without slowing down good hardware unnecesserily.
+ * do without slowing down good hardware unnecessarily.
*
* Note that IRQ7 and IRQ15 (the two spurious IRQs
* usually resulting from the 8259A-1|2 PICs) occur
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index a62df3e764c..72ae414e4d4 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/timer.h>
#include <asm/i8259.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
int timer_over_8254 __initdata = 1;
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ unsigned long flags;
+ int vector;
+
+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
- BUG_ON(irq >= NR_IRQ_VECTORS);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ spin_lock_irqsave(&vector_lock, flags);
+
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return IO_APIC_VECTOR(irq);
+ }
next:
current_vector += 8;
if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
if (current_vector >= FIRST_SYSTEM_VECTOR) {
offset++;
- if (!(offset%8))
+ if (!(offset%8)) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return -ENOSPC;
+ }
current_vector = FIRST_DEVICE_VECTOR + offset;
}
- vector_irq[current_vector] = irq;
+ vector = current_vector;
+ vector_irq[vector] = irq;
if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = current_vector;
+ IO_APIC_VECTOR(irq) = vector;
- return current_vector;
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return vector;
}
static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- if (use_pci_vector() && !platform_legacy_irq(irq)) {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[vector].handler = &ioapic_level_type;
- else
- irq_desc[vector].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[vector]);
- } else {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[irq].handler = &ioapic_level_type;
- else
- irq_desc[irq].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[irq]);
- }
+ unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[idx].handler = &ioapic_level_type;
+ else
+ irq_desc[idx].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[idx]);
}
static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 248e922ee13..c703bc7b088 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
*/
fastcall unsigned int do_IRQ(struct pt_regs *regs)
{
- /* high bits used in ret_from_ code */
- int irq = regs->orig_eax & 0xff;
+ /* high bit used in ret_from_ code */
+ int irq = ~regs->orig_eax;
#ifdef CONFIG_4KSTACKS
union irq_ctx *curctx, *irqctx;
u32 *isp;
@@ -95,6 +95,14 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
+ /*
+ * Copy the softirq bits in preempt_count so that the
+ * softirq checks work in the hardirq context.
+ */
+ irqctx->tinfo.preempt_count =
+ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+ (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
asm volatile(
" xchgl %%ebx,%%esp \n"
" call __do_IRQ \n"
@@ -219,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
+ seq_printf(p, "CPU%-8d",j);
seq_putc(p, '\n');
}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 395a9a6dff8..727e419ad78 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
/*
* returns non-zero if opcodes can be boosted.
*/
-static __always_inline int can_boost(kprobe_opcode_t opcode)
+static __always_inline int can_boost(kprobe_opcode_t *opcodes)
{
- switch (opcode & 0xf0 ) {
+#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 32))
+ /*
+ * Undefined/reserved opcodes, conditional jump, Opcode Extension
+ * Groups, and some special opcodes can not be boost.
+ */
+ static const unsigned long twobyte_is_boostable[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
+ W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
+ W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
+ W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
+ W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
+ W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
+ W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
+ W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ };
+#undef W
+ kprobe_opcode_t opcode;
+ kprobe_opcode_t *orig_opcodes = opcodes;
+retry:
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ opcode = *(opcodes++);
+
+ /* 2nd-byte opcode */
+ if (opcode == 0x0f) {
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ return test_bit(*opcodes, twobyte_is_boostable);
+ }
+
+ switch (opcode & 0xf0) {
+ case 0x60:
+ if (0x63 < opcode && opcode < 0x67)
+ goto retry; /* prefixes */
+ /* can't boost Address-size override and bound */
+ return (opcode != 0x62 && opcode != 0x67);
case 0x70:
return 0; /* can't boost conditional jump */
- case 0x90:
- /* can't boost call and pushf */
- return opcode != 0x9a && opcode != 0x9c;
case 0xc0:
- /* can't boost undefined opcodes and soft-interruptions */
- return (0xc1 < opcode && opcode < 0xc6) ||
- (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
+ /* can't boost software-interruptions */
+ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
case 0xd0:
/* can boost AA* and XLAT */
return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
case 0xe0:
- /* can boost in/out and (may be) jmps */
- return (0xe3 < opcode && opcode != 0xe8);
+ /* can boost in/out and absolute jmps */
+ return ((opcode & 0x04) || opcode == 0xea);
case 0xf0:
+ if ((opcode & 0x0c) == 0 && opcode != 0xf1)
+ goto retry; /* lock/rep(ne) prefix */
/* clear and set flags can be boost */
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
default:
- /* currently, can't boost 2 bytes opcodes */
- return opcode != 0x0f;
+ if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
+ goto retry; /* prefixes */
+ /* can't boost CS override and call */
+ return (opcode != 0x2e && opcode != 0x9a);
}
}
-
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
- if (can_boost(p->opcode)) {
+ if (can_boost(p->addr)) {
p->ainsn.boostable = 0;
} else {
p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
struct kprobe_ctlblk *kcb;
#ifdef CONFIG_PREEMPT
unsigned pre_preempt_count = preempt_count();
-#endif /* CONFIG_PREEMPT */
+#else
+ unsigned pre_preempt_count = 1;
+#endif
addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* handler has already set things up, so skip ss setup */
return 1;
- if (p->ainsn.boostable == 1 &&
-#ifdef CONFIG_PREEMPT
- !(pre_preempt_count) && /*
- * This enables booster when the direct
- * execution path aren't preempted.
- */
-#endif /* CONFIG_PREEMPT */
- !p->post_handler && !p->break_handler ) {
+ss_probe:
+ if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
/* Boost up -- we can execute copied instructions directly */
reset_current_kprobe();
regs->eip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return 1;
}
-
-ss_probe:
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index f73d7374a2b..511abe52a94 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
unsigned long start_address,
unsigned int has_pae) ATTRIB_NORET;
-const extern unsigned char relocate_new_kernel[];
+extern const unsigned char relocate_new_kernel[];
extern void relocate_new_kernel_end(void);
-const extern unsigned int relocate_new_kernel_size;
+extern const unsigned int relocate_new_kernel_size;
/*
* A architecture hook called to validate the
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 7a328230e54..d022cb8fd72 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -266,7 +266,7 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long acti
return NOTIFY_OK;
}
-static struct notifier_block msr_class_cpu_notifier =
+static struct notifier_block __cpuinitdata msr_class_cpu_notifier =
{
.notifier_call = msr_class_cpu_callback,
};
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498ec74..a76e9314658 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
*/
#include <linux/config.h>
-#include <linux/mm.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
+#include <linux/percpu.h>
#include <asm/smp.h>
-#include <asm/div64.h>
#include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
@@ -100,6 +96,9 @@ int nmi_active;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
+static void disable_intel_arch_watchdog(void);
+
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ disable_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
return 1;
}
+static void disable_intel_arch_watchdog(void)
+{
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int evntsel;
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+ clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+ clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ return 1;
+}
+
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+ else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+ nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
/* Only P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant */
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075f860..0caf14652ba 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_dsc_disable(void)
+static int __init numaq_tsc_disable(void)
{
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ tsc_disable = 1;
+ }
return 0;
}
-core_initcall(numaq_dsc_disable);
+arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afea46d..6946b06e278 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
local_irq_enable();
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
else
local_irq_enable();
}
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
} else {
while (!need_resched())
cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
cr3 = read_cr3();
cr4 = read_cr4_safe();
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, &regs->esp);
+ show_trace(NULL, regs, &regs->esp);
}
/*
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c
index 321f5fd26e7..9bf590cefc7 100644
--- a/arch/i386/kernel/scx200.c
+++ b/arch/i386/kernel/scx200.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/scx200.h>
@@ -45,11 +46,19 @@ static struct pci_driver scx200_pci_driver = {
.probe = scx200_probe,
};
-static DEFINE_SPINLOCK(scx200_gpio_config_lock);
+static DEFINE_MUTEX(scx200_gpio_config_lock);
-static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static void __devinit scx200_init_shadow(void)
{
int bank;
+
+ /* read the current values driven on the GPIO signals */
+ for (bank = 0; bank < 2; ++bank)
+ scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
+}
+
+static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
unsigned base;
if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
@@ -63,10 +72,7 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
}
scx200_gpio_base = base;
-
- /* read the current values driven on the GPIO signals */
- for (bank = 0; bank < 2; ++bank)
- scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
+ scx200_init_shadow();
} else {
/* find the base of the Configuration Block */
@@ -87,12 +93,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
return 0;
}
-u32 scx200_gpio_configure(int index, u32 mask, u32 bits)
+u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
{
u32 config, new_config;
- unsigned long flags;
- spin_lock_irqsave(&scx200_gpio_config_lock, flags);
+ mutex_lock(&scx200_gpio_config_lock);
outl(index, scx200_gpio_base + 0x20);
config = inl(scx200_gpio_base + 0x24);
@@ -100,45 +105,11 @@ u32 scx200_gpio_configure(int index, u32 mask, u32 bits)
new_config = (config & mask) | bits;
outl(new_config, scx200_gpio_base + 0x24);
- spin_unlock_irqrestore(&scx200_gpio_config_lock, flags);
+ mutex_unlock(&scx200_gpio_config_lock);
return config;
}
-#if 0
-void scx200_gpio_dump(unsigned index)
-{
- u32 config = scx200_gpio_configure(index, ~0, 0);
- printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config);
-
- if (config & 1)
- printk(" OE"); /* output enabled */
- else
- printk(" TS"); /* tristate */
- if (config & 2)
- printk(" PP"); /* push pull */
- else
- printk(" OD"); /* open drain */
- if (config & 4)
- printk(" PUE"); /* pull up enabled */
- else
- printk(" PUD"); /* pull up disabled */
- if (config & 8)
- printk(" LOCKED"); /* locked */
- if (config & 16)
- printk(" LEVEL"); /* level input */
- else
- printk(" EDGE"); /* edge input */
- if (config & 32)
- printk(" HI"); /* trigger on rising edge */
- else
- printk(" LO"); /* trigger on falling edge */
- if (config & 64)
- printk(" DEBOUNCE"); /* debounce */
- printk("\n");
-}
-#endif /* 0 */
-
static int __init scx200_init(void)
{
printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
@@ -159,10 +130,3 @@ EXPORT_SYMBOL(scx200_gpio_base);
EXPORT_SYMBOL(scx200_gpio_shadow);
EXPORT_SYMBOL(scx200_gpio_configure);
EXPORT_SYMBOL(scx200_cb_base);
-
-/*
- Local variables:
- compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules"
- c-basic-offset: 8
- End:
-*/
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index e6023970aa4..4a65040cc62 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -48,7 +48,6 @@
#include <linux/crash_dump.h>
#include <linux/dmi.h>
#include <linux/pfn.h>
-#include <linux/suspend.h>
#include <video/edid.h>
@@ -61,7 +60,7 @@
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/io.h>
-#include "setup_arch_pre.h"
+#include <setup_arch.h>
#include <bios_ebda.h>
/* Forward Declaration. */
@@ -411,8 +410,8 @@ static void __init limit_regions(unsigned long long size)
}
}
-static void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
+void __init add_memory_region(unsigned long long start,
+ unsigned long long size, int type)
{
int x;
@@ -475,7 +474,7 @@ static struct change_member *change_point[2*E820MAX] __initdata;
static struct e820entry *overlap_list[E820MAX] __initdata;
static struct e820entry new_bios[E820MAX] __initdata;
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member *change_tmp;
unsigned long current_type, last_type;
@@ -644,7 +643,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
@@ -702,12 +701,6 @@ static inline void copy_edd(void)
}
#endif
-/*
- * Do NOT EVER look at the BIOS memory size location.
- * It does not work on many machines.
- */
-#define LOWMEMSIZE() (0x9f000)
-
static void __init parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = saved_command_line;
@@ -1424,8 +1417,6 @@ static void __init register_memory(void)
pci_mem_start, gapstart, gapsize);
}
-static char * __init machine_specific_memory_setup(void);
-
#ifdef CONFIG_MCA
static void set_mca_bus(int x)
{
@@ -1435,111 +1426,6 @@ static void set_mca_bus(int x)
static void set_mca_bus(int x) { }
#endif
-#ifdef CONFIG_SOFTWARE_SUSPEND
-static void __init mark_nosave_page_range(unsigned long start, unsigned long end)
-{
- struct page *page;
- while (start <= end) {
- page = pfn_to_page(start);
- SetPageNosave(page);
- start++;
- }
-}
-
-static void __init e820_nosave_reserved_pages(void)
-{
- int i;
- unsigned long r_start = 0, r_end = 0;
-
- /* Assume e820 map is sorted */
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long start, end;
-
- start = PFN_DOWN(ei->addr);
- end = PFN_UP(ei->addr + ei->size);
- if (start >= end)
- continue;
- if (ei->type == E820_RESERVED)
- continue;
- r_end = start;
- /*
- * Highmem 'Reserved' pages are marked as reserved, swsusp
- * will not save/restore them, so we ignore these pages here.
- */
- if (r_end > max_low_pfn)
- r_end = max_low_pfn;
- if (r_end > r_start)
- mark_nosave_page_range(r_start, r_end-1);
- if (r_end >= max_low_pfn)
- break;
- r_start = end;
- }
-}
-
-static void __init e820_save_acpi_pages(void)
-{
- int i;
-
- /* Assume e820 map is sorted */
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long start, end;
-
- start = ei->addr;
- end = ei->addr + ei->size;
- if (start >= end)
- continue;
- if (ei->type != E820_ACPI && ei->type != E820_NVS)
- continue;
- /*
- * If the region is below max_low_pfn, it will be
- * saved/restored by swsusp follow 'RAM' type.
- */
- if (start < (max_low_pfn << PAGE_SHIFT))
- start = max_low_pfn << PAGE_SHIFT;
- /*
- * Highmem pages (ACPI NVS/Data) are reserved, but swsusp
- * highmem save/restore will not save/restore them. We marked
- * them as arch saveable pages here
- */
- if (end > start)
- swsusp_add_arch_pages(start, end);
- }
-}
-
-extern char __start_rodata, __end_rodata;
-/*
- * BIOS reserved region/hole - no save/restore
- * ACPI NVS - save/restore
- * ACPI Data - this is a little tricky, the mem could be used by OS after OS
- * reads tables from the region, but anyway save/restore the memory hasn't any
- * side effect and Linux runtime module load/unload might use it.
- * kernel rodata - no save/restore (kernel rodata isn't changed)
- */
-static int __init mark_nosave_pages(void)
-{
- unsigned long pfn_start, pfn_end;
-
- /* FIXME: provide a version for efi BIOS */
- if (efi_enabled)
- return 0;
- /* BIOS reserved regions & holes */
- e820_nosave_reserved_pages();
-
- /* kernel rodata */
- pfn_start = PFN_UP(virt_to_phys(&__start_rodata));
- pfn_end = PFN_DOWN(virt_to_phys(&__end_rodata));
- mark_nosave_page_range(pfn_start, pfn_end-1);
-
- /* record ACPI Data/NVS as saveable */
- e820_save_acpi_pages();
-
- return 0;
-}
-core_initcall(mark_nosave_pages);
-#endif
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -1689,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
static __init int add_pcspkr(void)
@@ -1708,7 +1595,6 @@ static __init int add_pcspkr(void)
}
device_initcall(add_pcspkr);
-#include "setup_arch_post.h"
/*
* Local Variables:
* mode:c
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 5c352c3a9e7..43002cfb40c 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
goto give_sigsegv;
}
- restorer = &__kernel_sigreturn;
+ restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Set up to return from userspace. */
- restorer = &__kernel_rt_sigreturn;
+ restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
err |= __put_user(restorer, &frame->pretcode);
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e9643a5..c10789d7a9d 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
static inline int __prepare_ICR (unsigned int shortcut, int vector)
{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+ unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+ switch (vector) {
+ default:
+ icr |= APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ icr |= APIC_DM_NMI;
+ break;
+ }
+ return icr;
}
static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 825b2b4ca72..89e7315e539 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
@@ -66,12 +67,6 @@ int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
#endif
-/* Package ID of each logical CPU */
-int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
-/* Core ID of each logical CPU */
-int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
/* Last level cache ID of each logical CPU */
int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
@@ -257,7 +252,7 @@ static void __init synchronize_tsc_bp (void)
* all APs synchronize but they loop on '== num_cpus'
*/
while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
+ cpu_relax();
atomic_set(&tsc_count_stop, 0);
wmb();
/*
@@ -276,7 +271,7 @@ static void __init synchronize_tsc_bp (void)
* Wait for all APs to leave the synchronization point:
*/
while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
+ cpu_relax();
atomic_set(&tsc_count_start, 0);
wmb();
atomic_inc(&tsc_count_stop);
@@ -333,19 +328,21 @@ static void __init synchronize_tsc_ap (void)
* this gets called, so we first wait for the BP to
* finish SMP initialization:
*/
- while (!atomic_read(&tsc_start_flag)) mb();
+ while (!atomic_read(&tsc_start_flag))
+ cpu_relax();
for (i = 0; i < NR_LOOPS; i++) {
atomic_inc(&tsc_count_start);
while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
+ cpu_relax();
rdtscll(tsc_values[smp_processor_id()]);
if (i == NR_LOOPS-1)
write_tsc(0, 0);
atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus())
+ cpu_relax();
}
}
#undef NR_LOOPS
@@ -451,10 +448,12 @@ cpumask_t cpu_coregroup_map(int cpu)
struct cpuinfo_x86 *c = cpu_data + cpu;
/*
* For perf, we return last level cache shared map.
- * TBD: when power saving sched policy is added, we will return
- * cpu_core_map when power saving policy is enabled
+ * And for power savings, we return cpu_core_map
*/
- return c->llc_shared_map;
+ if (sched_mc_power_savings || sched_smt_power_savings)
+ return cpu_core_map[cpu];
+ else
+ return c->llc_shared_map;
}
/* representing cpus for which sibling maps can be computed */
@@ -470,8 +469,8 @@ set_cpu_sibling_map(int cpu)
if (smp_num_siblings > 1) {
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (phys_proc_id[cpu] == phys_proc_id[i] &&
- cpu_core_id[cpu] == cpu_core_id[i]) {
+ if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
+ c[cpu].cpu_core_id == c[i].cpu_core_id) {
cpu_set(i, cpu_sibling_map[cpu]);
cpu_set(cpu, cpu_sibling_map[i]);
cpu_set(i, cpu_core_map[cpu]);
@@ -498,7 +497,7 @@ set_cpu_sibling_map(int cpu)
cpu_set(i, c[cpu].llc_shared_map);
cpu_set(cpu, c[i].llc_shared_map);
}
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
/*
@@ -1053,6 +1052,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
struct warm_boot_cpu_info info;
struct work_struct task;
int apicid, ret;
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
apicid = x86_cpu_to_apicid[cpu];
if (apicid == BAD_APICID) {
@@ -1060,6 +1060,18 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
goto exit;
}
+ /*
+ * the CPU isn't initialized at boot time, allocate gdt table here.
+ * cpu_init will initialize it
+ */
+ if (!cpu_gdt_descr->address) {
+ cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL);
+ if (!cpu_gdt_descr->address)
+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+ ret = -ENOMEM;
+ goto exit;
+ }
+
info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
@@ -1337,8 +1349,8 @@ remove_siblinginfo(int cpu)
cpu_clear(cpu, cpu_sibling_map[sibling]);
cpus_clear(cpu_sibling_map[cpu]);
cpus_clear(cpu_core_map[cpu]);
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
+ c[cpu].phys_proc_id = 0;
+ c[cpu].cpu_core_id = 0;
cpu_clear(cpu, cpu_sibling_setup_map);
}
@@ -1433,7 +1445,7 @@ int __devinit __cpu_up(unsigned int cpu)
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
- mb();
+ cpu_relax();
return 0;
}
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 0bada1870bd..c60419dee01 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -2,6 +2,8 @@
* linux/arch/i386/kernel/sysenter.c
*
* (C) Copyright 2002 Linus Torvalds
+ * Portions based on the vdso-randomization code from exec-shield:
+ * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
*
* This file contains the needed initializations to support sysenter.
*/
@@ -13,12 +15,31 @@
#include <linux/gfp.h>
#include <linux/string.h>
#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/module.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+EXPORT_SYMBOL_GPL(vdso_enabled);
+
+static int __init vdso_setup(char *s)
+{
+ vdso_enabled = simple_strtoul(s, NULL, 0);
+
+ return 1;
+}
+
+__setup("vdso=", vdso_setup);
+
extern asmlinkage void sysenter_entry(void);
void enable_sep_cpu(void)
@@ -45,23 +66,122 @@ void enable_sep_cpu(void)
*/
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
int __init sysenter_setup(void)
{
- void *page = (void *)get_zeroed_page(GFP_ATOMIC);
+ syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
- __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
+#ifdef CONFIG_COMPAT_VDSO
+ __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
+ printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+#else
+ /*
+ * In the non-compat case the ELF coredumping code needs the fixmap:
+ */
+ __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
+#endif
if (!boot_cpu_has(X86_FEATURE_SEP)) {
- memcpy(page,
+ memcpy(syscall_page,
&vsyscall_int80_start,
&vsyscall_int80_end - &vsyscall_int80_start);
return 0;
}
- memcpy(page,
+ memcpy(syscall_page,
&vsyscall_sysenter_start,
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
return 0;
}
+
+static struct page *syscall_nopage(struct vm_area_struct *vma,
+ unsigned long adr, int *type)
+{
+ struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+ get_page(p);
+ return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct syscall_vm_ops = {
+ .close = syscall_vma_close,
+ .nopage = syscall_nopage,
+};
+
+/* Defined in vsyscall-sysenter.S */
+extern void SYSENTER_RETURN;
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr;
+ int ret;
+
+ down_write(&mm->mmap_sem);
+ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto up_fail;
+ }
+
+ vma->vm_start = addr;
+ vma->vm_end = addr + PAGE_SIZE;
+ /* MAYWRITE to allow gdb to COW and set breakpoints */
+ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_ops = &syscall_vm_ops;
+ vma->vm_mm = mm;
+
+ ret = insert_vm_struct(mm, vma);
+ if (ret)
+ goto free_vma;
+
+ current->mm->context.vdso = (void *)addr;
+ current_thread_info()->sysenter_return =
+ (void *)VDSO_SYM(&SYSENTER_RETURN);
+ mm->total_vm++;
+up_fail:
+ up_write(&mm->mmap_sem);
+ return ret;
+
+free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+ return ret;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+ return "[vdso]";
+ return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d307475985..5f43d041012 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include <asm/i8253.h>
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
}
EXPORT_SYMBOL(rtc_cmos_write);
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;
/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
int timer_ack;
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
#endif
/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
*/
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+ /*
+ * Here we are in the timer irq handler. We just have irqs locally
+ * disabled but we don't know if the timer_bh is running on the other
+ * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+ * the irq version of write_lock because as just said we have irq
+ * locally disabled. -arca
+ */
+ write_seqlock(&xtime_lock);
+
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
irq = inb_p( 0x61 ); /* read the current state */
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
write_sequnlock(&xtime_lock);
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
static long clock_cmos_diff, sleep_start;
-static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
return 0;
}
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
jiffies_64 += sleep_length;
wall_jiffies += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
touch_softlockup_watchdog();
return 0;
}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
printk("Using HPET for base-timer\n");
}
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658d..00000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f..00000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f..00000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c94..00000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7166e..00000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbb..00000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
- /* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
- return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
- return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
- .name = "none",
- .mark_offset = mark_offset_none,
- .get_offset = get_offset_none,
- .monotonic_clock = monotonic_clock_none,
- .delay = delay_none,
-};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd56b9b..00000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- /* check clock override */
- if (override[0] && strncmp(override,"pit",3))
- printk(KERN_ERR "Warning: clock= override failed. Defaulting "
- "to PIT\n");
- init_cpu_khz();
- count_p = LATCH;
- return 0;
-}
-
-static void mark_offset_pit(void)
-{
- /* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
- return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
- int count;
- unsigned long flags;
- static unsigned long jiffies_p = 0;
-
- /*
- * cache volatile jiffies temporarily; we have xtime_lock.
- */
- unsigned long jiffies_t;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* timer count may underflow right here */
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
- jiffies_t = jiffies;
-
- count |= inb_p(PIT_CH0) << 8;
-
- /* VIA686a test code... reset the latch if count > max + 1 */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- /*
- * avoiding timer inconsistencies (they are rare, but they happen)...
- * there are two kinds of problems that must be avoided here:
- * 1. the timer counter underflows
- * 2. hardware problem with the timer, not giving us continuous time,
- * the counter does small "jumps" upwards on some Pentium systems,
- * (see c't 95/10 page 335 for Neptun bug.)
- */
-
- if( jiffies_t == jiffies_p ) {
- if( count > count_p ) {
- /* the nutcase */
- count = do_timer_overflow(count);
- }
- } else
- jiffies_p = jiffies_t;
-
- count_p = count;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
-
- return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
- .name = "pit",
- .mark_offset = mark_offset_pit,
- .get_offset = get_offset_pit,
- .monotonic_clock = monotonic_clock_pit,
- .delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
- .init = init_pit,
- .opts = &timer_pit,
-};
-
-void setup_pit_timer(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a0493..00000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-static int pmtmr_need_workaround __read_mostly = 1;
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
- if (pmtmr_need_workaround) {
- u32 v1, v2, v3;
-
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
- }
-
- return inl(pmtmr_ioport) & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
- u32 value1, value2;
- unsigned long count, delta;
-
- mach_prepare_counter();
- value1 = read_pmtmr();
- mach_countup(&count);
- value2 = read_pmtmr();
- delta = (value2 - value1) & ACPI_PM_MASK;
-
- /* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
- u32 value1, value2;
- unsigned int i;
-
- if (override[0] && strncmp(override,"pmtmr",5))
- return -ENODEV;
-
- if (!pmtmr_ioport)
- return -ENODEV;
-
- /* we use the TSC for delay_pmtmr, so make sure it exists */
- if (!cpu_has_tsc)
- return -ENODEV;
-
- /* "verify" this timing source */
- value1 = read_pmtmr();
- for (i = 0; i < 10000; i++) {
- value2 = read_pmtmr();
- if (value2 == value1)
- continue;
- if (value2 > value1)
- goto pm_good;
- if ((value2 < value1) && ((value2) < 0xFFF))
- goto pm_good;
- printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
- return -EINVAL;
- }
- printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
- return -ENODEV;
-
-pm_good:
- if (verify_pmtmr_rate() != 0)
- return -ENODEV;
-
- init_cpu_khz();
- return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- * - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
- u32 lost, delta, last_offset;
- static int first_run = 1;
- last_offset = offset_tick;
-
- write_seqlock(&monotonic_lock);
-
- offset_tick = read_pmtmr();
-
- /* calculate tick interval */
- delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
- /* convert to usecs */
- delta = cyc2us(delta);
-
- /* update the monotonic base value */
- monotonic_base += delta * NSEC_PER_USEC;
- write_sequnlock(&monotonic_lock);
-
- /* convert to ticks */
- delta += offset_delay;
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
- /* compensate for lost ticks */
- if (lost >= 2)
- jiffies_64 += lost - 1;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-}
-
-static int pmtmr_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- offset_tick = read_pmtmr();
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
- u32 last_offset, this_offset;
- unsigned long long base, ret;
- unsigned seq;
-
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = offset_tick;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the pmtmr */
- this_offset = read_pmtmr();
-
- /* convert to nanoseconds */
- ret = (this_offset - last_offset) & ACPI_PM_MASK;
- ret = base + (cyc2us(ret) * NSEC_PER_USEC);
- return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- * - Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
- u32 now, offset, delta = 0;
-
- offset = offset_tick;
- now = read_pmtmr();
- delta = (now - offset)&ACPI_PM_MASK;
-
- return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
- .name = "pmtmr",
- .mark_offset = mark_offset_pmtmr,
- .get_offset = get_offset_pmtmr,
- .monotonic_clock = monotonic_clock_pmtmr,
- .delay = delay_pmtmr,
- .read_timer = read_timer_tsc,
- .resume = pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
- .init = init_pmtmr,
- .opts = &timer_pmtmr,
-};
-
-#ifdef CONFIG_PCI
-/*
- * PIIX4 Errata:
- *
- * The power management timer may return improper results when read.
- * Although the timer value settles properly after incrementing,
- * while incrementing there is a 3 ns window every 69.8 ns where the
- * timer value is indeterminate (a 4.2% chance that the data will be
- * incorrect when read). As a result, the ACPI free running count up
- * timer specification is violated due to erroneous reads.
- */
-static int __init pmtmr_bug_check(void)
-{
- static struct pci_device_id gray_list[] __initdata = {
- /* these chipsets may have bug. */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801DB_0) },
- { },
- };
- struct pci_dev *dev;
- int pmtmr_has_bug = 0;
- u8 rev;
-
- if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
- return 0;
-
- dev = pci_get_device(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
- if (dev) {
- pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
- /* the bug has been fixed in PIIX4M */
- if (rev < 3) {
- printk(KERN_WARNING "* Found PM-Timer Bug on this "
- "chipset. Due to workarounds for a bug,\n"
- "* this time source is slow. Consider trying "
- "other time sources (clock=)\n");
- pmtmr_has_bug = 1;
- }
- pci_dev_put(dev);
- }
-
- if (pci_dev_present(gray_list)) {
- printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
- " to workarounds for a bug,\n"
- "* this time source is slow. If you are sure your timer"
- " does not have\n"
- "* this bug, please use \"pmtmr_good\" to disable the "
- "workaround\n");
- pmtmr_has_bug = 1;
- }
-
- if (!pmtmr_has_bug)
- pmtmr_need_workaround = 0;
-
- return 0;
-}
-device_initcall(pmtmr_bug_check);
-#endif
-
-static int __init pmtr_good_setup(char *__str)
-{
- pmtmr_need_workaround = 0;
- return 1;
-}
-__setup("pmtmr_good", pmtr_good_setup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187ddb0d0..00000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25 Jesper Juhl
- * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- * failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* Avoid compensating for lost ticks before TSCs are synched */
-static int detect_lost_ticks;
-static int __init start_lost_tick_compensation(void)
-{
- detect_lost_ticks = 1;
- return 0;
-}
-late_initcall(start_lost_tick_compensation);
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
- && detect_lost_ticks) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- if (!freq->old){
- ref_freq = freq->new;
- goto end;
- }
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz);
- }
- }
-#endif
- }
-
-end:
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned int cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- local_irq_disable();
- init_cpu_khz();
- local_irq_enable();
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-static void mark_offset_tsc(void)
-{
- unsigned long lost,delay;
- unsigned long delta = last_tsc_low;
- int count;
- int countmp;
- static int count1 = 0;
- unsigned long long this_offset, last_offset;
- static int lost_count = 0;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
-
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
-
- /* read Pentium cycle counter */
-
- rdtsc(last_tsc_low, last_tsc_high);
-
- spin_lock(&i8253_lock);
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb(PIT_CH0) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- spin_unlock(&i8253_lock);
-
- if (pit_latch_buggy) {
- /* get center value of last 3 time lutch */
- if ((count2 >= count && count >= count1)
- || (count1 >= count && count >= count2)) {
- count2 = count1; count1 = count;
- } else if ((count1 >= count2 && count2 >= count)
- || (count >= count2 && count2 >= count1)) {
- countmp = count;count = count2;
- count2 = count1;count1 = countmp;
- } else {
- count2 = count1; count1 = count; count = count1;
- }
- }
-
- /* lost tick compensation */
- delta = last_tsc_low - delta;
- {
- register unsigned long eax, edx;
- eax = delta;
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
- delta = edx;
- }
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2 && detect_lost_ticks) {
- jiffies_64 += lost-1;
-
- /* sanity check to ensure we're not always losing ticks */
- if (lost_count++ > 100) {
- printk(KERN_WARNING "Losing too many ticks!\n");
- printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
- printk(KERN_WARNING "Possible reasons for this are:\n");
- printk(KERN_WARNING " You're running with Speedstep,\n");
- printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
- printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
- printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
- clock_fallback();
- }
- /* ... but give the TSC a fair chance */
- if (lost_count > 25)
- cpufreq_delayed_get();
- } else
- lost_count = 0;
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
- /* catch corner case where tick rollover occured
- * between tsc and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
- /* check clock override */
- if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled()) {
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
- } else
-#endif
- {
- return -ENODEV;
- }
- }
-
- /*
- * If we have APM enabled or the CPU clock speed is variable
- * (CPU stops clock on HLT or slows clock to save power)
- * then the TSC timestamps may diverge by up to 1 jiffy from
- * 'real time' but nothing will break.
- * The most frequent case is that the CPU is "woken" from a halt
- * state by the timer interrupt itself, so we get 0 error. In the
- * rare cases where a driver would "wake" the CPU and request a
- * timestamp, the maximum error is < 1 jiffy. But timestamps are
- * still perfectly ordered.
- * Note that the TSC counter will be reset if APM suspends
- * to disk; this won't break the kernel, though, 'cuz we're
- * smart. See arch/i386/kernel/apm.c.
- */
- /*
- * Firstly we have to do a CPU check for chips with
- * a potentially buggy TSC. At this point we haven't run
- * the ident/bugs checks so we must run this hook as it
- * may turn off the TSC flag.
- *
- * NOTE: this doesn't yet handle SMP 486 machines where only
- * some CPU's have a TSC. Thats never worked and nobody has
- * moaned if you have the only one in the world - you fix it!
- */
-
- count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
- if (cpu_has_tsc) {
- unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer) {
- unsigned long result, remain;
- printk("Using TSC for gettimeofday\n");
- tsc_quotient = calibrate_tsc_hpet(NULL);
- timer_tsc.mark_offset = &mark_offset_tsc_hpet;
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_tsc_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick,
- 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
-
- hpet_usec_quotient = result;
- } else
-#endif
- {
- tsc_quotient = calibrate_tsc();
- }
-
- if (tsc_quotient) {
- fast_gettimeoffset_quotient = tsc_quotient;
- use_tsc = 1;
- /*
- * We could be more selective here I suspect
- * and just enable this for the next intel chips ?
- */
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static int tsc_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer)
- hpet_last = hpet_readl(HPET_COUNTER);
-#endif
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
- tsc_disable = 1;
- return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
- return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
- .read_timer = read_timer_tsc,
- .resume = tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index 296355292c7..e2e281d4bcc 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -32,15 +32,8 @@
static struct i386_cpu cpu_devices[NR_CPUS];
-int arch_register_cpu(int num){
- struct node *parent = NULL;
-
-#ifdef CONFIG_NUMA
- int node = cpu_to_node(num);
- if (node_online(node))
- parent = &node_devices[node].node;
-#endif /* CONFIG_NUMA */
-
+int arch_register_cpu(int num)
+{
/*
* CPU0 cannot be offlined due to several
* restrictions and assumptions in kernel. This basically
@@ -50,21 +43,13 @@ int arch_register_cpu(int num){
if (!num)
cpu_devices[num].cpu.no_control = 1;
- return register_cpu(&cpu_devices[num].cpu, num, parent);
+ return register_cpu(&cpu_devices[num].cpu, num);
}
#ifdef CONFIG_HOTPLUG_CPU
void arch_unregister_cpu(int num) {
- struct node *parent = NULL;
-
-#ifdef CONFIG_NUMA
- int node = cpu_to_node(num);
- if (node_online(node))
- parent = &node_devices[node].node;
-#endif /* CONFIG_NUMA */
-
- return unregister_cpu(&cpu_devices[num].cpu, parent);
+ return unregister_cpu(&cpu_devices[num].cpu);
}
EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
@@ -74,16 +59,13 @@ EXPORT_SYMBOL(arch_unregister_cpu);
#ifdef CONFIG_NUMA
#include <linux/mmzone.h>
-#include <asm/node.h>
-
-struct i386_node node_devices[MAX_NUMNODES];
static int __init topology_init(void)
{
int i;
for_each_online_node(i)
- arch_register_node(i);
+ register_one_node(i);
for_each_present_cpu(i)
arch_register_cpu(i);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index dcc14477af1..78464097470 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
#include <linux/utsname.h>
#include <linux/kprobes.h>
#include <linux/kexec.h>
+#include <linux/unwind.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -47,7 +48,7 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/nmi.h>
-
+#include <asm/unwind.h>
#include <asm/smp.h>
#include <asm/arch_hooks.h>
#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
+static int call_trace = 1;
ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
return ebp;
}
-static void show_trace_log_lvl(struct task_struct *task,
+static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+{
+ int n = 0;
+ int printed = 0; /* nr of entries already printed on current line */
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ ++n;
+ printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+ if (arch_unw_user_mode(info))
+ break;
+ }
+ if (printed)
+ printk("\n");
+ return n;
+}
+
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
if (!task)
task = current;
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, task, regs) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ } else if (task == current)
+ unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+ else {
+ if (unwind_init_blocked(&info, task) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ }
+ if (unw_ret > 0) {
+ if (call_trace > 0)
+ return;
+ printk("%sLegacy call trace:\n", log_lvl);
+ }
+ }
+
if (task == current) {
/* Grab ebp right from our regs */
asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
}
}
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
{
- show_trace_log_lvl(task, stack, "");
+ show_trace_log_lvl(task, regs, stack, "");
}
-static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
- char *log_lvl)
+static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *esp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
printk("%08lx ", *stack++);
}
printk("\n%sCall Trace:\n", log_lvl);
- show_trace_log_lvl(task, esp, log_lvl);
+ show_trace_log_lvl(task, regs, esp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
printk(" ");
- show_stack_log_lvl(task, esp, "");
+ show_stack_log_lvl(task, NULL, esp, "");
}
/*
@@ -241,7 +279,7 @@ void dump_stack(void)
{
unsigned long stack;
- show_trace(current, &stack);
+ show_trace(current, NULL, &stack);
}
EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
u8 __user *eip;
printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
return 1;
}
__setup("kstack=", kstack_setup);
+
+static int __init call_trace_setup(char *s)
+{
+ if (strcmp(s, "old") == 0)
+ call_trace = -1;
+ else if (strcmp(s, "both") == 0)
+ call_trace = 0;
+ else if (strcmp(s, "new") == 0)
+ call_trace = 1;
+ return 1;
+}
+__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 00000000000..7e0d8dab207
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
+/*
+ * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
+ * which was originally moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/workqueue.h>
+#include <linux/cpufreq.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/delay.h>
+#include <asm/tsc.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+
+#include "mach_timer.h"
+
+/*
+ * On some systems the TSC frequency does not
+ * change with the cpu frequency. So we need
+ * an extra value to store the TSC freq
+ */
+unsigned int tsc_khz;
+
+int tsc_disable __cpuinitdata = 0;
+
+#ifdef CONFIG_X86_TSC
+static int __init tsc_setup(char *str)
+{
+ printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+ "cannot disable TSC.\n");
+ return 1;
+}
+#else
+/*
+ * disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c
+ */
+static int __init tsc_setup(char *str)
+{
+ tsc_disable = 1;
+
+ return 1;
+}
+#endif
+
+__setup("notsc", tsc_setup);
+
+/*
+ * code to mark and check if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+
+static inline int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+/* Accellerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale __read_mostly;
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+{
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+ unsigned long long this_offset;
+
+ /*
+ * in the NUMA case we dont use the TSC as they are not
+ * synchronized across all CPUs.
+ */
+#ifndef CONFIG_NUMA
+ if (!cpu_khz || check_tsc_unstable())
+#endif
+ /* no locking but a rare wrong value is not a big deal */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+ /* read the Time Stamp Counter: */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return cycles_2_ns(this_offset);
+}
+
+static unsigned long calculate_cpu_khz(void)
+{
+ unsigned long long start, end;
+ unsigned long count;
+ u64 delta64;
+ int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ /* run 3 times to ensure the cache is warm */
+ for (i = 0; i < 3; i++) {
+ mach_prepare_counter();
+ rdtscll(start);
+ mach_countup(&count);
+ rdtscll(end);
+ }
+ /*
+ * Error: ECTCNEVERSET
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ if (count <= 1)
+ goto err;
+
+ delta64 = end - start;
+
+ /* cpu freq too fast: */
+ if (delta64 > (1ULL<<32))
+ goto err;
+
+ /* cpu freq too slow: */
+ if (delta64 <= CALIBRATE_TIME_MSEC)
+ goto err;
+
+ delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+ do_div(delta64,CALIBRATE_TIME_MSEC);
+
+ local_irq_restore(flags);
+ return (unsigned long)delta64;
+err:
+ local_irq_restore(flags);
+ return 0;
+}
+
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+ unsigned long cpu_khz_old = cpu_khz;
+
+ if (cpu_has_tsc) {
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+ cpu_data[0].loops_per_jiffy =
+ cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+ return 0;
+ } else
+ return -ENODEV;
+#else
+ return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+void tsc_init(void)
+{
+ if (!cpu_has_tsc || tsc_disable)
+ return;
+
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+
+ if (!cpu_khz)
+ return;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz);
+ use_tsc_delay();
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ cpufreq_get(cpu);
+
+ cpufreq_delayed_issched = 0;
+}
+
+/*
+ * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static inline void cpufreq_delayed_get(void)
+{
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
+/*
+ * if the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+static unsigned long cpu_khz_ref = 0;
+
+static int
+time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_seqlock_irq(&xtime_lock);
+
+ if (!ref_freq) {
+ if (!freq->old){
+ ref_freq = freq->new;
+ goto end;
+ }
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+ cpu_khz_ref = cpu_khz;
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ cpu_data[freq->cpu].loops_per_jiffy =
+ cpufreq_scale(loops_per_jiffy_ref,
+ ref_freq, freq->new);
+
+ if (cpu_khz) {
+
+ if (num_online_cpus() == 1)
+ cpu_khz = cpufreq_scale(cpu_khz_ref,
+ ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+ tsc_khz = cpu_khz;
+ set_cyc2ns_scale(cpu_khz);
+ /*
+ * TSC based sched_clock turns
+ * to junk w/ cpufreq
+ */
+ mark_tsc_unstable();
+ }
+ }
+ }
+end:
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_sequnlock_irq(&xtime_lock);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ int ret;
+
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (!ret)
+ cpufreq_init = 1;
+
+ return ret;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif
+
+/* clock source code */
+
+static unsigned long current_tsc_khz = 0;
+static int tsc_update_callback(void);
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret;
+
+ rdtscll(ret);
+
+ return ret;
+}
+
+static struct clocksource clocksource_tsc = {
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+ .is_continuous = 1,
+};
+
+static int tsc_update_callback(void)
+{
+ int change = 0;
+
+ /* check to see if we should switch to the safe clocksource: */
+ if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+ clocksource_tsc.rating = 50;
+ clocksource_reselect();
+ change = 1;
+ }
+
+ /* only update if tsc_khz has changed: */
+ if (current_tsc_khz != tsc_khz) {
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ change = 1;
+ }
+
+ return change;
+}
+
+static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+ d->ident);
+ mark_tsc_unstable();
+ return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+ {
+ .callback = dmi_mark_tsc_unstable,
+ .ident = "IBM Thinkpad 380XD",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+ },
+ },
+ {}
+};
+
+#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
+static struct timer_list verify_tsc_freq_timer;
+
+/* XXX - Probably should add locking */
+static void verify_tsc_freq(unsigned long unused)
+{
+ static u64 last_tsc;
+ static unsigned long last_jiffies;
+
+ u64 now_tsc, interval_tsc;
+ unsigned long now_jiffies, interval_jiffies;
+
+
+ if (check_tsc_unstable())
+ return;
+
+ rdtscll(now_tsc);
+ now_jiffies = jiffies;
+
+ if (!last_jiffies) {
+ goto out;
+ }
+
+ interval_jiffies = now_jiffies - last_jiffies;
+ interval_tsc = now_tsc - last_tsc;
+ interval_tsc *= HZ;
+ do_div(interval_tsc, cpu_khz*1000);
+
+ if (interval_tsc < (interval_jiffies * 3 / 4)) {
+ printk("TSC appears to be running slowly. "
+ "Marking it as unstable\n");
+ mark_tsc_unstable();
+ return;
+ }
+
+out:
+ last_tsc = now_tsc;
+ last_jiffies = now_jiffies;
+ /* set us up to go off on the next interval: */
+ mod_timer(&verify_tsc_freq_timer,
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
+}
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+static __init int unsynchronized_tsc(void)
+{
+ /*
+ * Intel systems are normally all synchronized.
+ * Exceptions must mark TSC as unstable:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return 0;
+
+ /* assume multi socket systems are not synchronized: */
+ return num_possible_cpus() > 1;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+
+ if (cpu_has_tsc && tsc_khz && !tsc_disable) {
+ /* check blacklist */
+ dmi_check_system(bad_tsc_dmi_table);
+
+ if (unsynchronized_tsc()) /* mark unstable if unsynced */
+ mark_tsc_unstable();
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable())
+ clocksource_tsc.rating = 50;
+
+ init_timer(&verify_tsc_freq_timer);
+ verify_tsc_freq_timer.function = verify_tsc_freq;
+ verify_tsc_freq_timer.expires =
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
+ add_timer(&verify_tsc_freq_timer);
+
+ return clocksource_register(&clocksource_tsc);
+ }
+
+ return 0;
+}
+
+module_init(init_tsc_clocksource);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7512f39c9f2..2d4f1386e2b 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -71,6 +71,15 @@ SECTIONS
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
+#ifdef CONFIG_STACK_UNWIND
+ . = ALIGN(4);
+ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+ __start_unwind = .;
+ *(.eh_frame)
+ __end_unwind = .;
+ }
+#endif
+
. = ALIGN(THREAD_SIZE); /* init_task */
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
*(.data.init_task)
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
index 3b62baa6a37..1a36d26e15e 100644
--- a/arch/i386/kernel/vsyscall-sysenter.S
+++ b/arch/i386/kernel/vsyscall-sysenter.S
@@ -42,10 +42,10 @@ __kernel_vsyscall:
/* 7: align return point with nop's to make disassembly easier */
.space 7,0x90
- /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
+ /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
jmp .Lenter_kernel
/* 16: System call normal return point is here! */
- .globl SYSENTER_RETURN /* Symbol used by entry.S. */
+ .globl SYSENTER_RETURN /* Symbol used by sysenter.c */
SYSENTER_RETURN:
pop %ebp
.Lpop_ebp:
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
index 98699ca6e52..e26975fc68b 100644
--- a/arch/i386/kernel/vsyscall.lds.S
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -7,7 +7,7 @@
SECTIONS
{
- . = VSYSCALL_BASE + SIZEOF_HEADERS;
+ . = VDSO_PRELINK + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.dynsym : { *(.dynsym) }
@@ -20,7 +20,7 @@ SECTIONS
For the layouts to match, we need to skip more than enough
space for the dynamic symbol table et al. If this amount
is insufficient, ld -shared will barf. Just increase it here. */
- . = VSYSCALL_BASE + 0x400;
+ . = VDSO_PRELINK + 0x400;
.text : { *(.text) } :text =0x90909090
.note : { *(.note.*) } :text :note
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6acbee5..3c0714c4b66 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
* we have to worry about.
*/
+#include <linux/module.h>
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/delay.h>
-#include <linux/module.h>
+
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#ifdef CONFIG_SMP
-#include <asm/smp.h>
+# include <asm/smp.h>
#endif
-extern struct timer_opts* timer;
+/* simple loop based delay: */
+static void delay_loop(unsigned long loops)
+{
+ int d0;
+
+ __asm__ __volatile__(
+ "\tjmp 1f\n"
+ ".align 16\n"
+ "1:\tjmp 2f\n"
+ ".align 16\n"
+ "2:\tdecl %0\n\tjns 2b"
+ :"=&a" (d0)
+ :"0" (loops));
+}
+
+/* TSC based delay: */
+static void delay_tsc(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+/*
+ * Since we calibrate only once at boot, this
+ * function should be set once at boot and not changed
+ */
+static void (*delay_fn)(unsigned long) = delay_loop;
+
+void use_tsc_delay(void)
+{
+ delay_fn = delay_tsc;
+}
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (delay_fn == delay_tsc) {
+ rdtscl(*timer_val);
+ return 0;
+ }
+ return -1;
+}
void __delay(unsigned long loops)
{
- cur_timer->delay(loops);
+ delay_fn(loops);
}
inline void __const_udelay(unsigned long xloops)
{
int d0;
+
xloops *= 4;
__asm__("mull %0"
:"=d" (xloops), "=&a" (d0)
- :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
- __delay(++xloops);
+ :"1" (xloops), "0"
+ (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+
+ __delay(++xloops);
}
void __udelay(unsigned long usecs)
{
- __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
void __ndelay(unsigned long nsecs)
{
- __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+ __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 6979297ce27..c5aa65f7c02 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -528,6 +528,97 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to,
return size;
}
+static unsigned long __copy_user_intel_nocache(void *to,
+ const void __user *from, unsigned long size)
+{
+ int d0, d1;
+
+ __asm__ __volatile__(
+ " .align 2,0x90\n"
+ "0: movl 32(%4), %%eax\n"
+ " cmpl $67, %0\n"
+ " jbe 2f\n"
+ "1: movl 64(%4), %%eax\n"
+ " .align 2,0x90\n"
+ "2: movl 0(%4), %%eax\n"
+ "21: movl 4(%4), %%edx\n"
+ " movnti %%eax, 0(%3)\n"
+ " movnti %%edx, 4(%3)\n"
+ "3: movl 8(%4), %%eax\n"
+ "31: movl 12(%4),%%edx\n"
+ " movnti %%eax, 8(%3)\n"
+ " movnti %%edx, 12(%3)\n"
+ "4: movl 16(%4), %%eax\n"
+ "41: movl 20(%4), %%edx\n"
+ " movnti %%eax, 16(%3)\n"
+ " movnti %%edx, 20(%3)\n"
+ "10: movl 24(%4), %%eax\n"
+ "51: movl 28(%4), %%edx\n"
+ " movnti %%eax, 24(%3)\n"
+ " movnti %%edx, 28(%3)\n"
+ "11: movl 32(%4), %%eax\n"
+ "61: movl 36(%4), %%edx\n"
+ " movnti %%eax, 32(%3)\n"
+ " movnti %%edx, 36(%3)\n"
+ "12: movl 40(%4), %%eax\n"
+ "71: movl 44(%4), %%edx\n"
+ " movnti %%eax, 40(%3)\n"
+ " movnti %%edx, 44(%3)\n"
+ "13: movl 48(%4), %%eax\n"
+ "81: movl 52(%4), %%edx\n"
+ " movnti %%eax, 48(%3)\n"
+ " movnti %%edx, 52(%3)\n"
+ "14: movl 56(%4), %%eax\n"
+ "91: movl 60(%4), %%edx\n"
+ " movnti %%eax, 56(%3)\n"
+ " movnti %%edx, 60(%3)\n"
+ " addl $-64, %0\n"
+ " addl $64, %4\n"
+ " addl $64, %3\n"
+ " cmpl $63, %0\n"
+ " ja 0b\n"
+ " sfence \n"
+ "5: movl %0, %%eax\n"
+ " shrl $2, %0\n"
+ " andl $3, %%eax\n"
+ " cld\n"
+ "6: rep; movsl\n"
+ " movl %%eax,%0\n"
+ "7: rep; movsb\n"
+ "8:\n"
+ ".section .fixup,\"ax\"\n"
+ "9: lea 0(%%eax,%0,4),%0\n"
+ "16: jmp 8b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,16b\n"
+ " .long 1b,16b\n"
+ " .long 2b,16b\n"
+ " .long 21b,16b\n"
+ " .long 3b,16b\n"
+ " .long 31b,16b\n"
+ " .long 4b,16b\n"
+ " .long 41b,16b\n"
+ " .long 10b,16b\n"
+ " .long 51b,16b\n"
+ " .long 11b,16b\n"
+ " .long 61b,16b\n"
+ " .long 12b,16b\n"
+ " .long 71b,16b\n"
+ " .long 13b,16b\n"
+ " .long 81b,16b\n"
+ " .long 14b,16b\n"
+ " .long 91b,16b\n"
+ " .long 6b,9b\n"
+ " .long 7b,16b\n"
+ ".previous"
+ : "=&c"(size), "=&D" (d0), "=&S" (d1)
+ : "1"(to), "2"(from), "0"(size)
+ : "eax", "edx", "memory");
+ return size;
+}
+
#else
/*
@@ -694,6 +785,19 @@ unsigned long __copy_from_user_ll(void *to, const void __user *from,
}
EXPORT_SYMBOL(__copy_from_user_ll);
+unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+ if (movsl_is_ok(to, from, n))
+ __copy_user(to, from, n);
+ else
+ n = __copy_user_intel((void __user *)to,
+ (const void *)from, n);
+ return n;
+}
+EXPORT_SYMBOL(__copy_from_user_ll_nozero);
+
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long n)
{
@@ -709,6 +813,21 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
return n;
}
+unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ if ( n > 64 && cpu_has_xmm2)
+ n = __copy_user_intel_nocache(to, from, n);
+ else
+ __copy_user(to, from, n);
+#else
+ __copy_user(to, from, n);
+#endif
+ return n;
+}
+
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
index b4a7455c699..004837c5879 100644
--- a/arch/i386/mach-default/setup.c
+++ b/arch/i386/mach-default/setup.c
@@ -8,6 +8,8 @@
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/arch_hooks.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
@@ -130,3 +132,44 @@ static int __init print_ipi_mode(void)
}
late_initcall(print_ipi_mode);
+
+/**
+ * machine_specific_memory_setup - Hook for machine specific memory setup.
+ *
+ * Description:
+ * This is included late in kernel/setup.c so that it can make
+ * use of all of the static functions.
+ **/
+
+char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+
+
+ who = "BIOS-e820";
+
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+ * Otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+ unsigned long mem_size;
+
+ /* compare results from other methods and take the greater */
+ if (ALT_MEM_K < EXT_MEM_K) {
+ mem_size = EXT_MEM_K;
+ who = "BIOS-88";
+ } else {
+ mem_size = ALT_MEM_K;
+ who = "BIOS-e801";
+ }
+
+ e820.nr_map = 0;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+ return who;
+}
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c
index 07fac7e749c..8a9e1a6f745 100644
--- a/arch/i386/mach-visws/setup.c
+++ b/arch/i386/mach-visws/setup.c
@@ -10,6 +10,8 @@
#include <asm/fixmap.h>
#include <asm/arch_hooks.h>
#include <asm/io.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
#include "cobalt.h"
#include "piix4.h"
@@ -133,3 +135,50 @@ void __init time_init_hook(void)
/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
setup_irq(0, &irq0);
}
+
+/* Hook for machine specific memory setup. */
+
+#define MB (1024 * 1024)
+
+static unsigned long sgivwfb_mem_phys;
+static unsigned long sgivwfb_mem_size;
+
+long long mem_size __initdata = 0;
+
+char * __init machine_specific_memory_setup(void)
+{
+ long long gfx_mem_size = 8 * MB;
+
+ mem_size = ALT_MEM_K;
+
+ if (!mem_size) {
+ printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
+ mem_size = 128 * MB;
+ }
+
+ /*
+ * this hardcodes the graphics memory to 8 MB
+ * it really should be sized dynamically (or at least
+ * set as a boot param)
+ */
+ if (!sgivwfb_mem_size) {
+ printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
+ sgivwfb_mem_size = 8 * MB;
+ }
+
+ /*
+ * Trim to nearest MB
+ */
+ sgivwfb_mem_size &= ~((1 << 20) - 1);
+ sgivwfb_mem_phys = mem_size - gfx_mem_size;
+
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+ add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+
+ return "PROM";
+
+ /* Remove gcc warnings */
+ (void) sanitize_e820_map(NULL, NULL);
+ (void) copy_e820_map(NULL, 0);
+}
diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c
index 7d8a3acb944..defc6ebbd56 100644
--- a/arch/i386/mach-voyager/setup.c
+++ b/arch/i386/mach-voyager/setup.c
@@ -5,8 +5,11 @@
#include <linux/config.h>
#include <linux/init.h>
#include <linux/interrupt.h>
-#include <asm/acpi.h>
#include <asm/arch_hooks.h>
+#include <asm/voyager.h>
+#include <asm/e820.h>
+#include <asm/io.h>
+#include <asm/setup.h>
void __init pre_intr_init_hook(void)
{
@@ -24,8 +27,7 @@ void __init intr_init_hook(void)
smp_intr_init();
#endif
- if (!acpi_ioapic)
- setup_irq(2, &irq2);
+ setup_irq(2, &irq2);
}
void __init pre_setup_arch_hook(void)
@@ -45,3 +47,74 @@ void __init time_init_hook(void)
{
setup_irq(0, &irq0);
}
+
+/* Hook for machine specific memory setup. */
+
+char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+
+ who = "NOT VOYAGER";
+
+ if(voyager_level == 5) {
+ __u32 addr, length;
+ int i;
+
+ who = "Voyager-SUS";
+
+ e820.nr_map = 0;
+ for(i=0; voyager_memory_detect(i, &addr, &length); i++) {
+ add_memory_region(addr, length, E820_RAM);
+ }
+ return who;
+ } else if(voyager_level == 4) {
+ __u32 tom;
+ __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+ /* select the DINO config space */
+ outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
+ /* Read DINO top of memory register */
+ tom = ((inb(catbase + 0x4) & 0xf0) << 16)
+ + ((inb(catbase + 0x5) & 0x7f) << 24);
+
+ if(inb(catbase) != VOYAGER_DINO) {
+ printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+ tom = (EXT_MEM_K)<<10;
+ }
+ who = "Voyager-TOM";
+ add_memory_region(0, 0x9f000, E820_RAM);
+ /* map from 1M to top of memory */
+ add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM);
+ /* FIXME: Should check the ASICs to see if I need to
+ * take out the 8M window. Just do it at the moment
+ * */
+ add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED);
+ return who;
+ }
+
+ who = "BIOS-e820";
+
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+ * Otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+ unsigned long mem_size;
+
+ /* compare results from other methods and take the greater */
+ if (ALT_MEM_K < EXT_MEM_K) {
+ mem_size = EXT_MEM_K;
+ who = "BIOS-88";
+ } else {
+ mem_size = ALT_MEM_K;
+ who = "BIOS-e801";
+ }
+
+ e820.nr_map = 0;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+ return who;
+}
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 70e560a1b79..8242af9ebc6 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -661,6 +661,7 @@ do_boot_cpu(__u8 cpu)
print_cpu_info(&cpu_data[cpu]);
wmb();
cpu_set(cpu, cpu_callout_map);
+ cpu_set(cpu, cpu_present_map);
}
else {
printk("CPU%d FAILED TO BOOT: ", cpu);
@@ -1912,6 +1913,7 @@ void __devinit smp_prepare_boot_cpu(void)
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
cpu_set(smp_processor_id(), cpu_possible_map);
+ cpu_set(smp_processor_id(), cpu_present_map);
}
int __devinit
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index bd6fe96cc16..6ee7faaf2c1 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
extern void die(const char *,struct pt_regs *,long);
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+
/*
* Unlock any spinlocks which will prevent us from getting the
* message out
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
return;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
/*
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index bf19513f0ce..f84b16e007f 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/poison.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
@@ -654,7 +655,7 @@ void __init mem_init(void)
*/
#ifdef CONFIG_MEMORY_HOTPLUG
#ifndef CONFIG_NEED_MULTIPLE_NODES
-int add_memory(u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata = &contig_page_data;
struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
@@ -753,7 +754,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
- memset((void *)addr, 0xcc, PAGE_SIZE);
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 0887b34bc59..353a836ed63 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -229,8 +229,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
if (PageHighMem(page))
return;
if (!enable)
- mutex_debug_check_no_locks_freed(page_address(page),
- numpages * PAGE_SIZE);
+ debug_check_no_locks_freed(page_address(page),
+ numpages * PAGE_SIZE);
/* the return value is ignored - the calls cannot fail,
* large pages are disabled at boot time.
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3cfa77..fa8a37bcb39 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry * dir;
- char buf[2];
+ char buf[4];
- snprintf(buf, 2, "%d", i);
+ snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72a503..693bdea4a52 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
#include <linux/oprofile.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066035c..7c61d357b82 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015fc04..5c3ab4b027a 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
index 7852827a599..a151f7a99f5 100644
--- a/arch/i386/pci/i386.c
+++ b/arch/i386/pci/i386.c
@@ -285,8 +285,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
/* Leave vm_pgoff as-is, the PCI space address is the physical
* address on this platform.
*/
- vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
-
prot = pgprot_val(vma->vm_page_prot);
if (boot_cpu_data.x86 > 3)
prot |= _PAGE_PCD | _PAGE_PWT;
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec0868f4b..ed1512a175a 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
list_for_each(ln, &pci_devices) {
d = pci_dev_g(ln);
if (d->bus->number == bus && d->devfn == devfn) {
- list_del(&d->global_list);
- list_add_tail(&d->global_list, &sorted_devices);
+ list_move_tail(&d->global_list, &sorted_devices);
if (d == dev)
found = 1;
break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
if (!found) {
printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
pci_name(dev));
- list_del(&dev->global_list);
- list_add_tail(&dev->global_list, &sorted_devices);
+ list_move_tail(&dev->global_list, &sorted_devices);
}
}
list_splice(&sorted_devices, &pci_devices);