From a86f34b49f32b238d16b2e3bf6c9a5391a3f683f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: revert x86_64-mm-fix-the-irqbalance-quirk-for-e7320-e7520-e7525 Obsoleted by Ingo's genapic stuff. Cc: Ingo Molnar Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/genapic.h | 2 +- include/asm-i386/irq.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index fd2be593b06..8ffbb0f0745 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -122,6 +122,6 @@ struct genapic { APICFUNC(phys_pkg_id) \ } -extern struct genapic *genapic, apic_default; +extern struct genapic *genapic; #endif diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 11761cdaae1..9e15ce0006e 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -37,8 +37,6 @@ static __inline__ int irq_canonicalize(int irq) extern int irqbalance_disable(char *str); #endif -extern void quirk_intel_irqbalance(void); - #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); #endif -- cgit v1.2.3 From 3c43f03908de98fa8f7a9e8fc9411ebf4c2de298 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: default to physical mode on hotplug CPU kernels Default to physical mode on hotplug CPU kernels. Furher simplify and clean up the APIC initialization code. Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- include/asm-i386/genapic.h | 4 ++-- include/asm-i386/mach-bigsmp/mach_apic.h | 2 +- include/asm-i386/mach-default/mach_apic.h | 2 +- include/asm-i386/mach-es7000/mach_apic.h | 2 +- include/asm-i386/mach-generic/mach_apic.h | 2 +- include/asm-i386/mach-numaq/mach_apic.h | 2 +- include/asm-i386/mach-summit/mach_apic.h | 2 +- include/asm-i386/mach-visws/mach_apic.h | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index 8ffbb0f0745..33e3ffe1766 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -36,7 +36,7 @@ struct genapic { void (*init_apic_ldr)(void); physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - void (*clustered_apic_check)(void); + void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); @@ -99,7 +99,7 @@ struct genapic { APICFUNC(check_apicid_present) \ APICFUNC(init_apic_ldr) \ APICFUNC(ioapic_phys_id_map) \ - APICFUNC(clustered_apic_check) \ + APICFUNC(setup_apic_routing) \ APICFUNC(multi_timer_check) \ APICFUNC(apicid_to_node) \ APICFUNC(cpu_to_logical_apicid) \ diff --git a/include/asm-i386/mach-bigsmp/mach_apic.h b/include/asm-i386/mach-bigsmp/mach_apic.h index 18b19a77344..ebd319f838a 100644 --- a/include/asm-i386/mach-bigsmp/mach_apic.h +++ b/include/asm-i386/mach-bigsmp/mach_apic.h @@ -71,7 +71,7 @@ static inline void init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Physflat", nr_ioapics); diff --git a/include/asm-i386/mach-default/mach_apic.h b/include/asm-i386/mach-default/mach_apic.h index 3ef6292db78..6db1c3babe9 100644 --- a/include/asm-i386/mach-default/mach_apic.h +++ b/include/asm-i386/mach-default/mach_apic.h @@ -54,7 +54,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) return phys_map; } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Flat", nr_ioapics); diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h index 26333685a7f..8e8b3949173 100644 --- a/include/asm-i386/mach-es7000/mach_apic.h +++ b/include/asm-i386/mach-es7000/mach_apic.h @@ -81,7 +81,7 @@ static inline void enable_apic_mode(void) } extern int apic_version [MAX_APICS]; -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { int apic = bios_cpu_apicid[smp_processor_id()]; printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", diff --git a/include/asm-i386/mach-generic/mach_apic.h b/include/asm-i386/mach-generic/mach_apic.h index d9dc039da94..a236e702152 100644 --- a/include/asm-i386/mach-generic/mach_apic.h +++ b/include/asm-i386/mach-generic/mach_apic.h @@ -13,7 +13,7 @@ #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define ioapic_phys_id_map (genapic->ioapic_phys_id_map) -#define clustered_apic_check (genapic->clustered_apic_check) +#define setup_apic_routing (genapic->setup_apic_routing) #define multi_timer_check (genapic->multi_timer_check) #define apicid_to_node (genapic->apicid_to_node) #define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) diff --git a/include/asm-i386/mach-numaq/mach_apic.h b/include/asm-i386/mach-numaq/mach_apic.h index 9d158095da8..5e5e7dd2692 100644 --- a/include/asm-i386/mach-numaq/mach_apic.h +++ b/include/asm-i386/mach-numaq/mach_apic.h @@ -34,7 +34,7 @@ static inline void init_apic_ldr(void) /* Already done in NUMA-Q firmware */ } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "NUMA-Q", nr_ioapics); diff --git a/include/asm-i386/mach-summit/mach_apic.h b/include/asm-i386/mach-summit/mach_apic.h index 43e5bd8f4a1..732f776aab8 100644 --- a/include/asm-i386/mach-summit/mach_apic.h +++ b/include/asm-i386/mach-summit/mach_apic.h @@ -80,7 +80,7 @@ static inline int apic_id_registered(void) return 1; } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); diff --git a/include/asm-i386/mach-visws/mach_apic.h b/include/asm-i386/mach-visws/mach_apic.h index 18afe6b6fc4..efac6f0d139 100644 --- a/include/asm-i386/mach-visws/mach_apic.h +++ b/include/asm-i386/mach-visws/mach_apic.h @@ -47,7 +47,7 @@ static inline void summit_check(char *oem, char *productid) { } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { } -- cgit v1.2.3 From 00f1ea696702163b7411d2316264525996c66ed3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: adjust inclusion of asm/fixmap.h Move inclusion of asm/fixmap.h to where it is really used rather than where it may have been used long ago (requires a few other adjustments to includes due to previous implicit dependencies). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-i386/hpet.h | 2 -- include/asm-i386/kexec.h | 5 ----- include/asm-i386/pgalloc.h | 1 - include/asm-i386/smp.h | 7 ++----- 4 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/hpet.h b/include/asm-i386/hpet.h index fc03cf9de5c..dddeedf504b 100644 --- a/include/asm-i386/hpet.h +++ b/include/asm-i386/hpet.h @@ -28,8 +28,6 @@ #include -#include - /* * Documentation on HPET can be found at: * http://www.intel.com/ial/home/sp/pcmmspec.htm diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index 4dfc9f5ed03..c5b4ab95bdc 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -21,7 +21,6 @@ #ifndef __ASSEMBLY__ -#include #include #include @@ -29,10 +28,6 @@ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. * I.e. Maximum page that is mapped directly into kernel memory, * and kmap is not required. - * - * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct - * calculation for the amount of memory directly mappable into the - * kernel memory space. */ /* Maximum physical address we can use pages from */ diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h index c8dc2d0141a..47430175b75 100644 --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h @@ -1,7 +1,6 @@ #ifndef _I386_PGALLOC_H #define _I386_PGALLOC_H -#include #include #include /* for struct page */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 6bf0033a301..9cab1531c61 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -11,16 +11,13 @@ #include #endif -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) #include #include +#include #ifdef CONFIG_X86_IO_APIC #include #endif -#include -#endif #endif #define BAD_APICID 0xFFu -- cgit v1.2.3 From f76c392380a40008ee6ecaea4e5a51a3a10282c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: No need to use -traditional for processing asm in i386/kernel/ No need to use -traditional for processing asm in i386/kernel/ Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index 510ae1d3486..a10e7c68ae9 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -20,10 +20,10 @@ #ifdef CONFIG_SMP #define PER_CPU(var, cpu) \ movl __per_cpu_offset(,cpu,4), cpu; \ - addl $per_cpu__/**/var, cpu; + addl $per_cpu__##var, cpu; #else /* ! SMP */ #define PER_CPU(var, cpu) \ - movl $per_cpu__/**/var, cpu; + movl $per_cpu__##var, cpu; #endif /* SMP */ #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From f5e8861583a591020176c90c10c6a130fed4f3ec Mon Sep 17 00:00:00 2001 From: takada Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: pit_latch_buggy has no effect Eliminated the arch/i386/kernel/timers in 2.6.18, use clocksoures instead. pit_latch_buggy was referred in timers/timer_tsc.c, and currently removed. Therefore nobody refer it. Until 2.6.17, MediaGX's TSC works correctly. after 2.6.18, warned "TSC appears to be running slowly. Marking it as unstable". So marked unstable TSC when CS55x0. Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 12dd67bf760..153770e25fa 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -9,8 +9,6 @@ void setup_pit_timer(void); unsigned long long native_sched_clock(void); unsigned long native_calculate_cpu_khz(void); -/* Modifiers for buggy PIT handling */ -extern int pit_latch_buggy; extern int timer_ack; extern int no_timer_check; extern int no_sync_cmos_clock; -- cgit v1.2.3 From 0949be35095b53dbaa72db700cb5074c5c249629 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: Add an option for the VIA C7 which sets appropriate L1 cache The VIA C7 is a 686 (with TSC) that supports MMX, SSE and SSE2, it also has a cache line length of 64 according to http://www.digit-life.com/articles2/cpu/rmma-via-c7.html. This patch sets gcc to -march=686 and select s the correct cache shift. Signed-off-by: Simon Arlott Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Dave Jones Cc: Alan Cox Signed-off-by: Andrew Morton --- include/asm-i386/module.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index 02f8f541cbe..7e5fda6c397 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -54,6 +54,8 @@ struct mod_arch_specific #define MODULE_PROC_FAMILY "CYRIXIII " #elif defined CONFIG_MVIAC3_2 #define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " #elif defined CONFIG_MGEODEGX1 #define MODULE_PROC_FAMILY "GEODEGX1 " #elif defined CONFIG_MGEODE_LX -- cgit v1.2.3 From 6d1c426158131b11d05d66e7dd6bf91e5b1b4fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: Update __copy_to_user_inatomic linuxdoc description Explicity specify that the caller should pin the user memory otherwise the function will sleep Signed-off-by: Aneesh Kumar K.V Signed-off-by: Andi Kleen --- include/asm-i386/uaccess.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h index 70829ae3ad5..e2aa5e0d0cc 100644 --- a/include/asm-i386/uaccess.h +++ b/include/asm-i386/uaccess.h @@ -397,7 +397,19 @@ unsigned long __must_check __copy_from_user_ll_nocache(void *to, unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n); -/* +/** + * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * The caller should also make sure he pins the user space address + * so that the we don't result in page fault and sleep. + * * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault * we return the initial request size (1, 2 or 4), as copy_*_user should do. * If a store crosses a page boundary and gets a fault, the x86 will not write -- cgit v1.2.3 From 973efae21beb2feda138f152ed06d4204774d93c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: clean up mach_reboot_fixups The reboot_fixups stuff seems to be a bit of a mess, specifically the header is in linux/ when its a purely i386-specific piece of code. I'm not sure why it has its config option; its only currently needed for "geode-gx1/cs5530a", so perhaps whatever config option controls that hardware should enable this? Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/reboot_fixups.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/asm-i386/reboot_fixups.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/reboot_fixups.h b/include/asm-i386/reboot_fixups.h new file mode 100644 index 00000000000..0cb7d87c2b6 --- /dev/null +++ b/include/asm-i386/reboot_fixups.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_REBOOT_FIXUPS_H +#define _LINUX_REBOOT_FIXUPS_H + +extern void mach_reboot_fixups(void); + +#endif /* _LINUX_REBOOT_FIXUPS_H */ -- cgit v1.2.3 From 1833d6bc72893265f22addd79cf52e6987496e0f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] i386: modpost apic related warning fixes o Modpost generates warnings for i386 if compiled with CONFIG_RELOCATABLE=y WARNING: vmlinux - Section mismatch: reference to .init.text:find_unisys_acpi_oem_table from .text between 'acpi_madt_oem_check' (at offset 0xc0101eda) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:acpi_get_table_header_early from .text between 'acpi_madt_oem_check' (at offset 0xc0101ef0) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:parse_unisys_oem from .text between 'acpi_madt_oem_check' (at offset 0xc0101f2e) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:setup_unisys from .text between 'acpi_madt_oem_check' (at offset 0xc0101f37) and 'enable_apic_mode'WARNING: vmlinux - Section mismatch: reference to .init.text:parse_unisys_oem from .text between 'mps_oem_check' (at offset 0xc0101ec7) and 'acpi_madt_oem_check' WARNING: vmlinux - Section mismatch: reference to .init.text:es7000_sw_apic from .text between 'enable_apic_mode' (at offset 0xc0101f48) and 'check_apicid_present' o Some functions which are inline (acpi_madt_oem_check) are not inlined by compiler as these functions are accessed using function pointer. These functions are put in .text section and they in-turn access __init type functions hence modpost generates warnings. o Do not iniline acpi_madt_oem_check, instead make it __init. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Len Brown Signed-off-by: Andrew Morton --- include/asm-i386/mach-es7000/mach_apic.h | 7 ------- include/asm-i386/mach-es7000/mach_mpparse.h | 32 ----------------------------- 2 files changed, 39 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h index 8e8b3949173..2d978928a39 100644 --- a/include/asm-i386/mach-es7000/mach_apic.h +++ b/include/asm-i386/mach-es7000/mach_apic.h @@ -73,13 +73,6 @@ static inline void init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -extern void es7000_sw_apic(void); -static inline void enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { diff --git a/include/asm-i386/mach-es7000/mach_mpparse.h b/include/asm-i386/mach-es7000/mach_mpparse.h index 24990e546da..b9fb784e1fd 100644 --- a/include/asm-i386/mach-es7000/mach_mpparse.h +++ b/include/asm-i386/mach-es7000/mach_mpparse.h @@ -18,18 +18,6 @@ extern int parse_unisys_oem (char *oemptr); extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void setup_unisys(void); -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} - #ifdef CONFIG_ACPI static inline int es7000_check_dsdt(void) @@ -41,26 +29,6 @@ static inline int es7000_check_dsdt(void) return 1; return 0; } - -/* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} #endif #endif /* __ASM_MACH_MPPARSE_H */ -- cgit v1.2.3 From 5a90cf205c922707ffed2d8f87cefd942e96b0ba Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86: Log reason why TSC was marked unstable Change mark_tsc_unstable() so it takes a string argument, which holds the reason the TSC was marked unstable. This is then displayed the first time mark_tsc_unstable is called. This should help us better debug why the TSC was marked unstable on certain systems and allow us to make sure we're not being overly paranoid when throwing out this troublesome clocksource. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/mach-summit/mach_mpparse.h | 4 ++-- include/asm-i386/tsc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/mach-summit/mach_mpparse.h b/include/asm-i386/mach-summit/mach_mpparse.h index 94268399170..c2520539d93 100644 --- a/include/asm-i386/mach-summit/mach_mpparse.h +++ b/include/asm-i386/mach-summit/mach_mpparse.h @@ -30,7 +30,7 @@ static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable(); + mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; @@ -44,7 +44,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable(); + mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 84016ff481b..346976632e1 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -53,7 +53,7 @@ static __always_inline cycles_t get_cycles_sync(void) } extern void tsc_init(void); -extern void mark_tsc_unstable(void); +extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern void init_tsc_clocksource(void); -- cgit v1.2.3 From 692174b97d5b871f4b0f648b1fb17aa37b955876 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Initialize esp0 properly all the time Whenever we schedule, __switch_to calls load_esp0 which does: tss->esp0 = thread->esp0; This is never initialized for the initial thread (ie "swapper"), so when we're scheduling that, we end up setting esp0 to 0. This is fine: the swapper never leaves ring 0, so this field is never used. lguest, however, gets upset that we're trying to used an unmapped page as our kernel stack. Rather than work around it there, let's initialize it. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 11bf899de8a..01ae0ffcf23 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -421,6 +421,7 @@ struct thread_struct { }; #define INIT_THREAD { \ + .esp0 = sizeof(init_stack) + (long)&init_stack, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ -- cgit v1.2.3 From 79e030114a8d97a1dcd593ab84fb986f8c91c536 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Allow i386 crash kernels to handle x86_64 dumps The specific case I am encountering is kdump under Xen with a 64 bit hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to the hypervisor but the dump kernel is 32 bit for maximum compatibility. It's possibly less likely to be useful in a purely native scenario but I see no reason to disallow it. [akpm@linux-foundation.org: build fix] Signed-off-by: Ian Campbell Signed-off-by: Andi Kleen Acked-by: Vivek Goyal Cc: Horms Cc: Magnus Damm Cc: "Eric W. Biederman" Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/kexec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index c5b4ab95bdc..bcb5b21de2d 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -42,6 +42,9 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_386 +/* We can also handle crash dumps from 64 bit kernel. */ +#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) + #define MAX_NOTE_BYTES 1024 /* CPU does not save ss and esp on stack if execution is already -- cgit v1.2.3 From ae1ee11be77f51cedb6c569887dddc70c163ab6d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use per-cpu variables for GDT, PDA Allocating PDA and GDT at boot is a pain. Using simple per-cpu variables adds happiness (although we need the GDT page-aligned for Xen, which we do in a followup patch). [akpm@linux-foundation.org: build fix] Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 1 + include/asm-i386/pda.h | 7 +++---- include/asm-i386/processor.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 050831f34f7..53c5916687b 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -22,6 +22,7 @@ struct Xgt_desc_struct { extern struct Xgt_desc_struct idt_descr; DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); extern struct Xgt_desc_struct early_gdt_descr; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index b12d59a318b..aef7f732f77 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -8,6 +8,7 @@ #include #include +#include struct i386_pda { @@ -18,10 +19,8 @@ struct i386_pda struct pt_regs *irq_regs; }; -extern struct i386_pda *_cpu_pda[]; - -#define cpu_pda(i) (_cpu_pda[i]) - +DECLARE_PER_CPU(struct i386_pda, _cpu_pda); +#define cpu_pda(i) (&per_cpu(_cpu_pda, (i))) #define pda_offset(field) offsetof(struct i386_pda, field) extern void __bad_pda_field(void); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 01ae0ffcf23..cd940befef5 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -743,7 +743,7 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern int init_gdt(int cpu, struct task_struct *idle); +extern void init_gdt(int cpu, struct task_struct *idle); extern void cpu_set_gdt(int); extern void secondary_cpu_init(void); -- cgit v1.2.3 From bf50467204b435421d8de33ad080fa46c6f3d50b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use per-cpu GDT immediately upon boot Now we are no longer dynamically allocating the GDT, we don't need the "cpu_gdt_table" at all: we can switch straight from "boot_gdt_table" to the per-cpu GDT. This means initializing the cpu_gdt array in C. The boot CPU uses the per-cpu var directly, then in smp_prepare_cpus() it switches to the per-cpu copy just allocated. For secondary CPUs, the early_gdt_descr is set to point directly to their per-cpu copy. For UP the code is very simple: it keeps using the "per-cpu" GDT as per SMP, but we never have to move. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 2 -- include/asm-i386/processor.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 53c5916687b..a75ae6b9786 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -12,8 +12,6 @@ #include -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; - struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index cd940befef5..b25a2f5b537 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -743,7 +743,6 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern void init_gdt(int cpu, struct task_struct *idle); extern void cpu_set_gdt(int); extern void secondary_cpu_init(void); -- cgit v1.2.3 From d2cbcc49e2bfd6eaa44d7e4e5e5f171aaa5ec80d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: clean up cpu_init() We now have cpu_init() and secondary_cpu_init() doing nothing but calling _cpu_init() with the same arguments. Rename _cpu_init() to cpu_init() and use it as a replcement for secondary_cpu_init(). Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index b25a2f5b537..80f7e8a1e87 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -744,6 +744,6 @@ extern void enable_sep_cpu(void); extern int sysenter_setup(void); extern void cpu_set_gdt(int); -extern void secondary_cpu_init(void); +extern void cpu_init(void); #endif /* __ASM_I386_PROCESSOR_H */ -- cgit v1.2.3 From 90a0a06aa81692028864c21f981905fda46b1208 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: rationalize paravirt wrappers paravirt.c used to implement native versions of all low-level functions. Far cleaner is to have the native versions exposed in the headers and as inline native_XXX, and if !CONFIG_PARAVIRT, then simply #define XXX native_XXX. There are several nice side effects: 1) write_dt_entry() now takes the correct "struct Xgt_desc_struct *" not "void *". 2) load_TLS is reintroduced to the for loop, not manually unrolled with a #error in case the bounds ever change. 3) Macros become inlines, with type checking. 4) Access to the native versions is trivial for KVM, lguest, Xen and others who might want it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Avi Kivity Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 82 +++++++++++++++------- include/asm-i386/io.h | 15 ++-- include/asm-i386/irqflags.h | 61 +++++++++++----- include/asm-i386/msr.h | 163 +++++++++++++++++++++++++++++-------------- include/asm-i386/paravirt.h | 17 ++--- include/asm-i386/processor.h | 94 +++++++++++++++++++------ include/asm-i386/system.h | 139 ++++++++++++++++++++++-------------- 7 files changed, 384 insertions(+), 187 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index a75ae6b9786..13f701ea9a8 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -57,45 +57,33 @@ static inline void pack_gate(__u32 *a, __u32 *b, #ifdef CONFIG_PARAVIRT #include #else -#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) - -#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) -#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) -#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) -#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) -#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) -#if TLS_SIZE != 24 -# error update this code. -#endif - -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) -{ -#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] - C(0); C(1); C(2); -#undef C -} +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +#endif -static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) +static inline void write_dt_entry(struct desc_struct *dt, + int entry, u32 entry_low, u32 entry_high) { - __u32 *lp = (__u32 *)((char *)dt + entry*8); - *lp = entry_a; - *(lp+1) = entry_b; + dt[entry].a = entry_low; + dt[entry].b = entry_high; } -#define set_ldt native_set_ldt -#endif /* CONFIG_PARAVIRT */ - -static inline fastcall void native_set_ldt(const void *addr, - unsigned int entries) +static inline void native_set_ldt(const void *addr, unsigned int entries) { if (likely(entries == 0)) __asm__ __volatile__("lldt %w0"::"q" (0)); @@ -111,6 +99,48 @@ static inline fastcall void native_set_ldt(const void *addr, } } + +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static inline void native_load_idt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct Xgt_desc_struct *dtr) +{ + asm ("sgdt %0":"=m" (*dtr)); +} + +static inline void native_store_idt(struct Xgt_desc_struct *dtr) +{ + asm ("sidt %0":"=m" (*dtr)); +} + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + asm ("str %0":"=r" (tr)); + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) { __u32 a, b; diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 59fe616933c..e797586a5bf 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -250,19 +250,22 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ +static inline void native_io_delay(void) +{ + asm volatile("outb %%al,$0x80" : : : "memory"); +} + #if defined(CONFIG_PARAVIRT) #include #else -#define __SLOW_DOWN_IO "outb %%al,$0x80;" - static inline void slow_down_io(void) { - __asm__ __volatile__( - __SLOW_DOWN_IO + native_io_delay(); #ifdef REALLY_SLOW_IO - __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); #endif - : : ); } #endif diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index 17b18cf4fe9..c1cdd094938 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -10,6 +10,42 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#ifndef __ASSEMBLY__ +static inline unsigned long native_save_fl(void) +{ + unsigned long f; + asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); + return f; +} + +static inline void native_restore_fl(unsigned long f) +{ + asm volatile("pushl %0 ; popfl": /* no output */ + :"g" (f) + :"memory", "cc"); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} +#endif /* __ASSEMBLY__ */ + #ifdef CONFIG_PARAVIRT #include #else @@ -17,35 +53,22 @@ static inline unsigned long __raw_local_save_flags(void) { - unsigned long flags; - - __asm__ __volatile__( - "pushfl ; popl %0" - : "=g" (flags) - : /* no input */ - ); - - return flags; + return native_save_fl(); } static inline void raw_local_irq_restore(unsigned long flags) { - __asm__ __volatile__( - "pushl %0 ; popfl" - : /* no output */ - :"g" (flags) - :"memory", "cc" - ); + native_restore_fl(flags); } static inline void raw_local_irq_disable(void) { - __asm__ __volatile__("cli" : : : "memory"); + native_irq_disable(); } static inline void raw_local_irq_enable(void) { - __asm__ __volatile__("sti" : : : "memory"); + native_irq_enable(); } /* @@ -54,7 +77,7 @@ static inline void raw_local_irq_enable(void) */ static inline void raw_safe_halt(void) { - __asm__ __volatile__("sti; hlt" : : : "memory"); + native_safe_halt(); } /* @@ -63,7 +86,7 @@ static inline void raw_safe_halt(void) */ static inline void halt(void) { - __asm__ __volatile__("hlt": : :"memory"); + native_halt(); } /* diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 2ad3f30b1a6..00acaa8b36b 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,74 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#include + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + asm volatile("rdmsr" : "=A" (val) : "c" (msr)); + return val; +} + +static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) +{ + unsigned long long val; + + asm volatile("2: rdmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=r" (*err), "=A" (val) + : "c" (msr), "i" (-EFAULT)); + + return val; +} + +static inline void native_write_msr(unsigned int msr, unsigned long long val) +{ + asm volatile("wrmsr" : : "c" (msr), "A"(val)); +} + +static inline int native_write_msr_safe(unsigned int msr, + unsigned long long val) +{ + int err; + asm volatile("2: wrmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %4,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=a" (err) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + "i" (-EFAULT)); + return err; +} + +static inline unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +static inline unsigned long long native_read_pmc(void) +{ + unsigned long long val; + asm volatile("rdpmc" : "=A" (val)); + return val; +} + #ifdef CONFIG_PARAVIRT #include #else @@ -11,22 +79,20 @@ * pointer indirection), this allows gcc to optimize better */ -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) +#define rdmsr(msr,val1,val2) \ + do { \ + unsigned long long __val = native_read_msr(msr); \ + val1 = __val; \ + val2 = __val >> 32; \ + } while(0) -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) +#define wrmsr(msr,val1,val2) \ + native_write_msr(msr, ((unsigned long long)val2 << 32) | val1) -#define rdmsrl(msr,val) do { \ - unsigned long l__,h__; \ - rdmsr (msr, l__, h__); \ - val = l__; \ - val |= ((u64)h__<<32); \ -} while(0) +#define rdmsrl(msr,val) \ + do { \ + (val) = native_read_msr(msr); \ + } while(0) static inline void wrmsrl (unsigned long msr, unsigned long long val) { @@ -37,50 +103,41 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) } /* wrmsr with exception handling */ -#define wrmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: wrmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n\t" \ - " .long 2b,3b\n\t" \ - ".previous" \ - : "=a" (ret__) \ - : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT));\ - ret__; }) +#define wrmsr_safe(msr,val1,val2) \ + (native_write_msr_safe(msr, ((unsigned long long)val2 << 32) | val1)) /* rdmsr with exception handling */ -#define rdmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: rdmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n\t" \ - " .long 2b,3b\n\t" \ - ".previous" \ - : "=r" (ret__), "=a" (*(a)), "=d" (*(b)) \ - : "c" (msr), "i" (-EFAULT));\ - ret__; }) - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) +#define rdmsr_safe(msr,p1,p2) \ + ({ \ + int __err; \ + unsigned long long __val = native_read_msr_safe(msr, &__err);\ + (*p1) = __val; \ + (*p2) = __val >> 32; \ + __err; \ + }) + +#define rdtsc(low,high) \ + do { \ + u64 _l = native_read_tsc(); \ + (low) = (u32)_l; \ + (high) = _l >> 32; \ + } while(0) + +#define rdtscl(low) \ + do { \ + (low) = native_read_tsc(); \ + } while(0) + +#define rdtscll(val) ((val) = native_read_tsc()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) +#define rdpmc(counter,low,high) \ + do { \ + u64 _l = native_read_pmc(); \ + low = (u32)_l; \ + high = _l >> 32; \ + } while(0) #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_SMP diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e63f1e444fc..32acebce9ae 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -29,6 +29,7 @@ struct thread_struct; struct Xgt_desc_struct; struct tss_struct; struct mm_struct; +struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; @@ -105,14 +106,13 @@ struct paravirt_ops void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); - void (*write_ldt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_gdt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_idt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*load_esp0)(struct tss_struct *tss, - struct thread_struct *thread); + void (*write_ldt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_idt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -232,6 +232,7 @@ static inline void halt(void) #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +/* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr,val1,val2) do { \ int _err; \ u64 _l = paravirt_ops.read_msr(msr,&_err); \ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 80f7e8a1e87..96edfdfe32d 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -147,7 +147,7 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ @@ -545,13 +545,7 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() -#ifdef CONFIG_PARAVIRT -#include -#else -#define paravirt_enabled() 0 -#define __cpuid native_cpuid - -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->esp0 = thread->esp0; /* This can only happen when SEP is enabled, no need to test "SEP"arately */ @@ -561,24 +555,60 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa } } -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - __asm__("movl %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) -#define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("movl %%db0, %0" :"=r" (val)); break; + case 1: + asm("movl %%db1, %0" :"=r" (val)); break; + case 2: + asm("movl %%db2, %0" :"=r" (val)); break; + case 3: + asm("movl %%db3, %0" :"=r" (val)); break; + case 6: + asm("movl %%db6, %0" :"=r" (val)); break; + case 7: + asm("movl %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("movl %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movl %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movl %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movl %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movl %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movl %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} /* * Set IOPL bits in EFLAGS from given mask */ -static fastcall inline void native_set_iopl_mask(unsigned mask) +static inline void native_set_iopl_mask(unsigned mask) { unsigned int reg; __asm__ __volatile__ ("pushfl;" @@ -591,6 +621,28 @@ static fastcall inline void native_set_iopl_mask(unsigned mask) : "i" (~X86_EFLAGS_IOPL), "r" (mask)); } +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + +static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +{ + native_load_esp0(tss, thread); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + /* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a6d20d9a1a3..c3a58c08c49 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -88,65 +88,96 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) + +static inline void native_clts(void) +{ + asm volatile ("clts"); +} + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("movl %0,%%cr0": :"r" (val)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("movl %0,%%cr2": :"r" (val)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("movl %0,%%cr3": :"r" (val)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist */ + asm("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val): "0" (0)); + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("movl %0,%%cr4": :"r" (val)); +} + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + + #ifdef CONFIG_PARAVIRT #include #else -#define read_cr0() ({ \ - unsigned int __dummy; \ - __asm__ __volatile__( \ - "movl %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr0(x) \ - __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) - -#define read_cr2() ({ \ - unsigned int __dummy; \ - __asm__ __volatile__( \ - "movl %%cr2,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr2(x) \ - __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) - -#define read_cr3() ({ \ - unsigned int __dummy; \ - __asm__ ( \ - "movl %%cr3,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr3(x) \ - __asm__ __volatile__("movl %0,%%cr3": :"r" (x)) - -#define read_cr4() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr4,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define read_cr4_safe() ({ \ - unsigned int __dummy; \ - /* This could fault if %cr4 does not exist */ \ - __asm__("1: movl %%cr4, %0 \n" \ - "2: \n" \ - ".section __ex_table,\"a\" \n" \ - ".long 1b,2b \n" \ - ".previous \n" \ - : "=r" (__dummy): "0" (0)); \ - __dummy; \ -}) -#define write_cr4(x) \ - __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") +#define read_cr0() (native_read_cr0()) +#define write_cr0(x) (native_write_cr0(x)) +#define read_cr2() (native_read_cr2()) +#define write_cr2(x) (native_write_cr2(x)) +#define read_cr3() (native_read_cr3()) +#define write_cr3(x) (native_write_cr3(x)) +#define read_cr4() (native_read_cr4()) +#define read_cr4_safe() (native_read_cr4_safe()) +#define write_cr4(x) (native_write_cr4(x)) +#define wbinvd() (native_wbinvd()) /* Clear the 'TS' bit */ -#define clts() __asm__ __volatile__ ("clts") +#define clts() (native_clts()) + #endif/* CONFIG_PARAVIRT */ /* Set the 'TS' bit */ -- cgit v1.2.3 From d01ad8dd56527be72947b4b9997bb2c05783c3ed Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: Improve handling of kernel mappings in change_page_attr Fix various broken corner cases in i386 and x86-64 change_page_attr. AK: split off from tighten kernel image access rights Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index c3b58d473a5..143ddc42b86 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -159,6 +159,7 @@ void paging_init(void); extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -166,6 +167,7 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) -- cgit v1.2.3 From b4531e863dbd06b5d336afefdb37483b690dea59 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use X86_EFLAGS_IF in irqflags.h. Move X86_EFLAGS_IF et al out to a new header: processor-flags.h, so we can include it from irqflags.h and use it in raw_irqs_disabled_flags(). As a side-effect, we could now use these flags in .S files. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-i386/irqflags.h | 3 ++- include/asm-i386/processor-flags.h | 26 ++++++++++++++++++++++++++ include/asm-i386/processor.h | 22 +--------------------- 3 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 include/asm-i386/processor-flags.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index c1cdd094938..eff8585cb74 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -9,6 +9,7 @@ */ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#include #ifndef __ASSEMBLY__ static inline unsigned long native_save_fl(void) @@ -119,7 +120,7 @@ static inline unsigned long __raw_local_irq_save(void) static inline int raw_irqs_disabled_flags(unsigned long flags) { - return !(flags & (1 << 9)); + return !(flags & X86_EFLAGS_IF); } static inline int raw_irqs_disabled(void) diff --git a/include/asm-i386/processor-flags.h b/include/asm-i386/processor-flags.h new file mode 100644 index 00000000000..b4711c222e2 --- /dev/null +++ b/include/asm-i386/processor-flags.h @@ -0,0 +1,26 @@ +#ifndef __ASM_I386_PROCESSOR_FLAGS_H +#define __ASM_I386_PROCESSOR_FLAGS_H +/* Various flags defined: can be included from assembler. */ + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +#endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 96edfdfe32d..11838df8860 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -21,6 +21,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -126,27 +127,6 @@ extern void detect_ht(struct cpuinfo_x86 *c); static inline void detect_ht(struct cpuinfo_x86 *c) {} #endif -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -- cgit v1.2.3 From 4fbb5968810b237e81977f131986b9efd5245368 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: cleanup GDT Access Now we have an explicit per-cpu GDT variable, we don't need to keep the descriptors around to use them to find the GDT: expose cpu_gdt directly. We could go further and make load_gdt() pack the descriptor for us, or even assume it means "load the current cpu's GDT" which is what it always does. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 13f701ea9a8..4a974064e92 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -18,16 +18,13 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -extern struct Xgt_desc_struct idt_descr; -DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); -extern struct Xgt_desc_struct early_gdt_descr; - static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; + return per_cpu(cpu_gdt, cpu); } +extern struct Xgt_desc_struct idt_descr; extern struct desc_struct idt_table[]; extern void set_intr_gate(unsigned int irq, void * addr); -- cgit v1.2.3 From 01a2f435564b4baab61328b4018d36464468f57b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: Add smp_ops interface Add a smp_ops interface. This abstracts the API defined by for use within arch/i386. The primary intent is that it be used by a paravirtualizing hypervisor to implement SMP, but it could also be used by non-APIC-using sub-architectures. This is related to CONFIG_PARAVIRT, but is implemented unconditionally since it is simpler that way and not a highly performance-sensitive interface. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Ingo Molnar Cc: James Bottomley --- include/asm-i386/smp.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 9cab1531c61..2d083cb4ca9 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -49,6 +49,59 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif +struct smp_ops +{ + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); + int (*cpu_up)(unsigned cpu); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*smp_send_stop)(void); + void (*smp_send_reschedule)(int cpu); + int (*smp_call_function_mask)(cpumask_t mask, + void (*func)(void *info), void *info, + int wait); +}; + +extern struct smp_ops smp_ops; + +static inline void smp_prepare_boot_cpu(void) +{ + smp_ops.smp_prepare_boot_cpu(); +} +static inline void smp_prepare_cpus(unsigned int max_cpus) +{ + smp_ops.smp_prepare_cpus(max_cpus); +} +static inline int __cpu_up(unsigned int cpu) +{ + return smp_ops.cpu_up(cpu); +} +static inline void smp_cpus_done(unsigned int max_cpus) +{ + smp_ops.smp_cpus_done(max_cpus); +} + +static inline void smp_send_stop(void) +{ + smp_ops.smp_send_stop(); +} +static inline void smp_send_reschedule(int cpu) +{ + smp_ops.smp_send_reschedule(cpu); +} +static inline int smp_call_function_mask(cpumask_t mask, + void (*func) (void *info), void *info, + int wait) +{ + return smp_ops.smp_call_function_mask(mask, func, info, wait); +} + +void native_smp_prepare_boot_cpu(void); +void native_smp_prepare_cpus(unsigned int max_cpus); +int native_cpu_up(unsigned int cpunum); +void native_smp_cpus_done(unsigned int max_cpus); + #ifndef CONFIG_PARAVIRT #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ do { } while (0) -- cgit v1.2.3 From 07f3331c6bfd27a06dfb0ca9fa4f06dec6606876 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: Add machine_ops interface to abstract halting and rebooting machine_ops is an interface for the machine_* functions defined in . This is intended to allow hypervisors to intercept the reboot process, but it could be used to implement other x86 subarchtecture reboots. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/reboot.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/asm-i386/reboot.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/reboot.h b/include/asm-i386/reboot.h new file mode 100644 index 00000000000..e9e3ffc22c0 --- /dev/null +++ b/include/asm-i386/reboot.h @@ -0,0 +1,20 @@ +#ifndef _ASM_REBOOT_H +#define _ASM_REBOOT_H + +struct pt_regs; + +struct machine_ops +{ + void (*restart)(char *cmd); + void (*halt)(void); + void (*power_off)(void); + void (*shutdown)(void); + void (*crash_shutdown)(struct pt_regs *); + void (*emergency_restart)(void); +}; + +extern struct machine_ops machine_ops; + +void machine_real_restart(unsigned char *code, int length); + +#endif /* _ASM_REBOOT_H */ -- cgit v1.2.3 From b92e9fac400d4ae5bc7a75c568e9844ec53ea329 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86: fix amd64-agp aperture validation Under CONFIG_DISCONTIGMEM, assuming that a !pfn_valid() implies all subsequent pfn-s are also invalid is wrong. Thus replace this by explicitly checking against the E820 map. AK: make e820 on x86-64 not initdata Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Acked-by: Mark Langsdorf --- include/asm-i386/e820.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index c5b8fc6109d..096a2a8eb1d 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -38,6 +38,7 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void e820_register_memory(void); -- cgit v1.2.3 From 1353ebb4b48151e3810d9a60449edd43a90ea3c3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Clean up asm-i386/bugs.h Most of asm-i386/bugs.h is code which should be in a C file, so put it there. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Linus Torvalds --- include/asm-i386/alternative.h | 1 + include/asm-i386/bugs.h | 194 +---------------------------------------- 2 files changed, 5 insertions(+), 190 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index b8fa9557c53..dbc1a29284f 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -16,6 +16,7 @@ struct alt_instr { u8 pad; }; +extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); struct module; diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index c90c7c49930..df539b39044 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -1,198 +1,12 @@ -/* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ - /* * This is included by init/main.c to check for architecture-dependent bugs. * * Needs: * void check_bugs(void); */ +#ifndef _ASM_I386_BUG_H +#define _ASM_I386_BUG_H -#include -#include -#include -#include -#include - -static int __init no_halt(char *s) -{ - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init mca_pentium(char *s) -{ - mca_pentium_flag = 1; - return 1; -} - -__setup("mca-pentium", mca_pentium); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) - : "m" (*&x), "m" (*&y)); - if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk("OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif - -/* - * If we configured ourselves for a TSC, we'd better have one! - */ -#ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) - panic("Kernel compiled for Pentium+, requires TSC feature!"); -#endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif -} - -extern void alternative_instructions(void); +extern void __init check_bugs(void); -static void __init check_bugs(void) -{ - identify_cpu(&boot_cpu_data); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} +#endif /* _ASM_I386_BUG_H */ -- cgit v1.2.3 From a6c4e076ee4c1ea670e4faa55814e63dd08e3f29 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: clean up identify_cpu identify_cpu() is used to identify both the boot CPU and secondary CPUs, but it performs some actions which only apply to the boot CPU. Those functions are therefore really __init functions, but because they're called by identify_cpu(), they must be marked __cpuinit. This patch splits identify_cpu() into identify_boot_cpu() and identify_secondary_cpu(), and calls the appropriate init functions from each. Also, identify_boot_cpu() and all the functions it dominates are marked __init. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 11838df8860..9d895cc2f31 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -116,7 +116,8 @@ extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); -extern void identify_cpu(struct cpuinfo_x86 *); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; -- cgit v1.2.3 From d4f7a2c18e59e0304a1c733589ce14fc02fec1bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs as well as sections. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- include/asm-i386/elf.h | 28 ++++++++++------------------ include/asm-i386/fixmap.h | 8 ++------ 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 952b3ee3c9b..d304ab4161f 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_BASE ((unsigned long)current->mm->context.vdso) - -#ifdef CONFIG_COMPAT_VDSO -# define VDSO_COMPAT_BASE VDSO_HIGH_BASE -# define VDSO_PRELINK VDSO_HIGH_BASE -#else -# define VDSO_COMPAT_BASE VDSO_BASE -# define VDSO_PRELINK 0 -#endif +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_PRELINK 0 #define VDSO_SYM(x) \ - (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) extern void __kernel_vsyscall; #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) -#ifndef CONFIG_COMPAT_VDSO -#define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); -#endif extern unsigned int vdso_enabled; -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } while (0) #endif diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index 3e9f610c35d..e5651b2a585 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,13 +19,9 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#ifndef CONFIG_COMPAT_VDSO extern unsigned long __FIXADDR_TOP; -#else -#define __FIXADDR_TOP 0xfffff000 -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#endif +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From 1dbf527c51c6c20c19869c8125cb5b87c3d09506 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Make COMPAT_VDSO runtime selectable. Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. +From: Hugh Dickins Fix oops from i386-make-compat_vdso-runtime-selectable.patch. Even mingetty at system startup finds it easy to trigger an oops while reading /proc/PID/maps: though it has a good hold on the mm itself, that cannot stop exit_mm() from resetting tsk->mm to NULL. (It is usually show_map()'s call to get_gate_vma() which oopses, and I expect we could change that to check priv->tail_vma instead; but no matter, even m_start()'s call just after get_task_mm() is racy.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- include/asm-i386/page.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 7b19f454761..fd3f64ace24 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pagenr); #include #include -#ifndef CONFIG_COMPAT_VDSO #define __HAVE_ARCH_GATE_AREA 1 -#endif #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ -- cgit v1.2.3 From f039b754714a422959027cb18bb33760eb8153f0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Don't use MWAIT on AMD Family 10 It doesn't put the CPU into deeper sleep states, so it's better to use the standard idle loop to save power. But allow to reenable it anyways for benchmarking. I also removed the obsolete idle=halt on i386 Cc: andreas.herrmann@amd.com Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 9d895cc2f31..882d3f8fbba 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -779,4 +779,6 @@ extern int sysenter_setup(void); extern void cpu_set_gdt(int); extern void cpu_init(void); +extern int force_mwait; + #endif /* __ASM_I386_PROCESSOR_H */ -- cgit v1.2.3 From 4bc5aa91fb1e544ad37805520030a0d9fc6e11d3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Clean up x86 control register and MSR macros (corrected) This patch is based on Rusty's recent cleanup of the EFLAGS-related macros; it extends the same kind of cleanup to control registers and MSRs. It also unifies these between i386 and x86-64; at least with regards to MSRs, the two had definitely gotten out of sync. Signed-off-by: H. Peter Anvin Signed-off-by: Andi Kleen --- include/asm-i386/Kbuild | 2 + include/asm-i386/msr-index.h | 273 +++++++++++++++++++++++++++++++++++++ include/asm-i386/msr.h | 237 +------------------------------- include/asm-i386/processor-flags.h | 65 +++++++++ include/asm-i386/processor.h | 35 ----- 5 files changed, 347 insertions(+), 265 deletions(-) create mode 100644 include/asm-i386/msr-index.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 5ae93afc67e..cbf6e8f1087 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -3,8 +3,10 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += debugreg.h header-y += ldt.h +header-y += msr-index.h header-y += ptrace-abi.h header-y += ucontext.h +unifdef-y += msr.h unifdef-y += mtrr.h unifdef-y += vm86.h diff --git a/include/asm-i386/msr-index.h b/include/asm-i386/msr-index.h new file mode 100644 index 00000000000..f1190802283 --- /dev/null +++ b/include/asm-i386/msr-index.h @@ -0,0 +1,273 @@ +#ifndef __ASM_MSR_INDEX_H +#define __ASM_MSR_INDEX_H + +/* CPU model specific register (MSR) numbers */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) + +/* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* K7/K8 MSRs. Not complete. See the architecture manual for a more + complete list. */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K8_HWCR 0xc0010015 +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K8_ENABLE_C1E 0xc0010055 + +/* K6 MSRs */ +#define MSR_K6_EFER 0xc0000080 +#define MSR_K6_STAR 0xc0000081 +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b +#define MSR_IA32_THERM_STATUS 0x0000019c +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +/* Intel Model 6 */ +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +#endif /* __ASM_MSR_INDEX_H */ diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 00acaa8b36b..9559894c765 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,11 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#include + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + #include static inline unsigned long long native_read_msr(unsigned int msr) @@ -153,234 +158,6 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); } #endif /* CONFIG_SMP */ - -/* symbolic names for some interesting MSRs */ -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_P6_PERFCTR0 0xc1 -#define MSR_P6_PERFCTR1 0xc2 -#define MSR_FSB_FREQ 0xcd - - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_SYSENTER_CS 0x174 -#define MSR_IA32_SYSENTER_ESP 0x175 -#define MSR_IA32_SYSENTER_EIP 0x176 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -/* P4/Xeon+ specific */ -#define MSR_IA32_MCG_EAX 0x180 -#define MSR_IA32_MCG_EBX 0x181 -#define MSR_IA32_MCG_ECX 0x182 -#define MSR_IA32_MCG_EDX 0x183 -#define MSR_IA32_MCG_ESI 0x184 -#define MSR_IA32_MCG_EDI 0x185 -#define MSR_IA32_MCG_EBP 0x186 -#define MSR_IA32_MCG_ESP 0x187 -#define MSR_IA32_MCG_EFLAGS 0x188 -#define MSR_IA32_MCG_EIP 0x189 -#define MSR_IA32_MCG_RESERVED 0x18A - -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -#define MSR_IA32_PERF_STATUS 0x198 -#define MSR_IA32_PERF_CTL 0x199 - -#define MSR_IA32_MPERF 0xE7 -#define MSR_IA32_APERF 0xE8 - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_IA32_PEBS_ENABLE 0x3f1 -#define MSR_IA32_DS_AREA 0x600 -#define MSR_IA32_PERF_CAPABILITIES 0x345 - -/* Pentium IV performance counter MSRs */ -#define MSR_P4_BPU_PERFCTR0 0x300 -#define MSR_P4_BPU_PERFCTR1 0x301 -#define MSR_P4_BPU_PERFCTR2 0x302 -#define MSR_P4_BPU_PERFCTR3 0x303 -#define MSR_P4_MS_PERFCTR0 0x304 -#define MSR_P4_MS_PERFCTR1 0x305 -#define MSR_P4_MS_PERFCTR2 0x306 -#define MSR_P4_MS_PERFCTR3 0x307 -#define MSR_P4_FLAME_PERFCTR0 0x308 -#define MSR_P4_FLAME_PERFCTR1 0x309 -#define MSR_P4_FLAME_PERFCTR2 0x30a -#define MSR_P4_FLAME_PERFCTR3 0x30b -#define MSR_P4_IQ_PERFCTR0 0x30c -#define MSR_P4_IQ_PERFCTR1 0x30d -#define MSR_P4_IQ_PERFCTR2 0x30e -#define MSR_P4_IQ_PERFCTR3 0x30f -#define MSR_P4_IQ_PERFCTR4 0x310 -#define MSR_P4_IQ_PERFCTR5 0x311 -#define MSR_P4_BPU_CCCR0 0x360 -#define MSR_P4_BPU_CCCR1 0x361 -#define MSR_P4_BPU_CCCR2 0x362 -#define MSR_P4_BPU_CCCR3 0x363 -#define MSR_P4_MS_CCCR0 0x364 -#define MSR_P4_MS_CCCR1 0x365 -#define MSR_P4_MS_CCCR2 0x366 -#define MSR_P4_MS_CCCR3 0x367 -#define MSR_P4_FLAME_CCCR0 0x368 -#define MSR_P4_FLAME_CCCR1 0x369 -#define MSR_P4_FLAME_CCCR2 0x36a -#define MSR_P4_FLAME_CCCR3 0x36b -#define MSR_P4_IQ_CCCR0 0x36c -#define MSR_P4_IQ_CCCR1 0x36d -#define MSR_P4_IQ_CCCR2 0x36e -#define MSR_P4_IQ_CCCR3 0x36f -#define MSR_P4_IQ_CCCR4 0x370 -#define MSR_P4_IQ_CCCR5 0x371 -#define MSR_P4_ALF_ESCR0 0x3ca -#define MSR_P4_ALF_ESCR1 0x3cb -#define MSR_P4_BPU_ESCR0 0x3b2 -#define MSR_P4_BPU_ESCR1 0x3b3 -#define MSR_P4_BSU_ESCR0 0x3a0 -#define MSR_P4_BSU_ESCR1 0x3a1 -#define MSR_P4_CRU_ESCR0 0x3b8 -#define MSR_P4_CRU_ESCR1 0x3b9 -#define MSR_P4_CRU_ESCR2 0x3cc -#define MSR_P4_CRU_ESCR3 0x3cd -#define MSR_P4_CRU_ESCR4 0x3e0 -#define MSR_P4_CRU_ESCR5 0x3e1 -#define MSR_P4_DAC_ESCR0 0x3a8 -#define MSR_P4_DAC_ESCR1 0x3a9 -#define MSR_P4_FIRM_ESCR0 0x3a4 -#define MSR_P4_FIRM_ESCR1 0x3a5 -#define MSR_P4_FLAME_ESCR0 0x3a6 -#define MSR_P4_FLAME_ESCR1 0x3a7 -#define MSR_P4_FSB_ESCR0 0x3a2 -#define MSR_P4_FSB_ESCR1 0x3a3 -#define MSR_P4_IQ_ESCR0 0x3ba -#define MSR_P4_IQ_ESCR1 0x3bb -#define MSR_P4_IS_ESCR0 0x3b4 -#define MSR_P4_IS_ESCR1 0x3b5 -#define MSR_P4_ITLB_ESCR0 0x3b6 -#define MSR_P4_ITLB_ESCR1 0x3b7 -#define MSR_P4_IX_ESCR0 0x3c8 -#define MSR_P4_IX_ESCR1 0x3c9 -#define MSR_P4_MOB_ESCR0 0x3aa -#define MSR_P4_MOB_ESCR1 0x3ab -#define MSR_P4_MS_ESCR0 0x3c0 -#define MSR_P4_MS_ESCR1 0x3c1 -#define MSR_P4_PMH_ESCR0 0x3ac -#define MSR_P4_PMH_ESCR1 0x3ad -#define MSR_P4_RAT_ESCR0 0x3bc -#define MSR_P4_RAT_ESCR1 0x3bd -#define MSR_P4_SAAT_ESCR0 0x3ae -#define MSR_P4_SAAT_ESCR1 0x3af -#define MSR_P4_SSU_ESCR0 0x3be -#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */ -#define MSR_P4_TBPU_ESCR0 0x3c2 -#define MSR_P4_TBPU_ESCR1 0x3c3 -#define MSR_P4_TC_ESCR0 0x3c4 -#define MSR_P4_TC_ESCR1 0x3c5 -#define MSR_P4_U2L_ESCR0 0x3b0 -#define MSR_P4_U2L_ESCR1 0x3b1 - -/* AMD Defined MSRs */ -#define MSR_K6_EFER 0xC0000080 -#define MSR_K6_STAR 0xC0000081 -#define MSR_K6_WHCR 0xC0000082 -#define MSR_K6_UWCCR 0xC0000085 -#define MSR_K6_EPMR 0xC0000086 -#define MSR_K6_PSOR 0xC0000087 -#define MSR_K6_PFIR 0xC0000088 - -#define MSR_K7_EVNTSEL0 0xC0010000 -#define MSR_K7_EVNTSEL1 0xC0010001 -#define MSR_K7_EVNTSEL2 0xC0010002 -#define MSR_K7_EVNTSEL3 0xC0010003 -#define MSR_K7_PERFCTR0 0xC0010004 -#define MSR_K7_PERFCTR1 0xC0010005 -#define MSR_K7_PERFCTR2 0xC0010006 -#define MSR_K7_PERFCTR3 0xC0010007 -#define MSR_K7_HWCR 0xC0010015 -#define MSR_K7_CLK_CTL 0xC001001b -#define MSR_K7_FID_VID_CTL 0xC0010041 -#define MSR_K7_FID_VID_STATUS 0xC0010042 - -#define MSR_K8_ENABLE_C1E 0xC0010055 - -/* extended feature register */ -#define MSR_EFER 0xc0000080 - -/* EFER bits: */ - -/* Execute Disable enable */ -#define _EFER_NX 11 -#define EFER_NX (1<<_EFER_NX) - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x107 -#define MSR_IDT_FCR2 0x108 -#define MSR_IDT_FCR3 0x109 -#define MSR_IDT_FCR4 0x10a - -#define MSR_IDT_MCR0 0x110 -#define MSR_IDT_MCR1 0x111 -#define MSR_IDT_MCR2 0x112 -#define MSR_IDT_MCR3 0x113 -#define MSR_IDT_MCR4 0x114 -#define MSR_IDT_MCR5 0x115 -#define MSR_IDT_MCR6 0x116 -#define MSR_IDT_MCR7 0x117 -#define MSR_IDT_MCR_CTRL 0x120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x1107 -#define MSR_VIA_LONGHAUL 0x110a -#define MSR_VIA_RNG 0x110b -#define MSR_VIA_BCR2 0x1147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -/* Intel Core-based CPU performance counters */ -#define MSR_CORE_PERF_FIXED_CTR0 0x309 -#define MSR_CORE_PERF_FIXED_CTR1 0x30a -#define MSR_CORE_PERF_FIXED_CTR2 0x30b -#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d -#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e -#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 - -/* Geode defined MSRs */ -#define MSR_GEODE_BUSCONT_CONF0 0x1900 - +#endif +#endif #endif /* __ASM_MSR_H */ diff --git a/include/asm-i386/processor-flags.h b/include/asm-i386/processor-flags.h index b4711c222e2..5404e90edd5 100644 --- a/include/asm-i386/processor-flags.h +++ b/include/asm-i386/processor-flags.h @@ -23,4 +23,69 @@ #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE 0x00000001 /* Protection Enable */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ +#define X86_CR0_EM 0x00000004 /* Emulation */ +#define X86_CR0_TS 0x00000008 /* Task Switched */ +#define X86_CR0_ET 0x00000010 /* Extension Type */ +#define X86_CR0_NE 0x00000020 /* Numeric Error */ +#define X86_CR0_WP 0x00010000 /* Write Protect */ +#define X86_CR0_AM 0x00040000 /* Alignment Mask */ +#define X86_CR0_NW 0x20000000 /* Not Write-through */ +#define X86_CR0_CD 0x40000000 /* Cache Disable */ +#define X86_CR0_PG 0x80000000 /* Paging */ + +/* + * Paging options in CR3 + */ +#define X86_CR3_PWT 0x00000008 /* Page Write Through */ +#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x00000008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x00000040 /* Machine check enable */ +#define X86_CR4_PGE 0x00000080 /* enable global pages */ +#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ +#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ + +/* + * x86-64 Task Priority Register, CR8 + */ +#define X86_CR8_TPR 0x00000007 /* task priority register */ + +/* + * AMD and Transmeta use MSRs for configuration; see + */ + +/* + * NSC/Cyrix CPU configuration register indexes + */ +#define CX86_PCR0 0x20 +#define CX86_GCR 0xb8 +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_PCR1 0xf0 +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 882d3f8fbba..77e263267aa 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -142,21 +142,6 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, #define load_cr3(pgdir) write_cr3(__pa(pgdir)) -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - /* * Save the cr4 feature set we're using (ie * Pentium 4MB enable and PPro Global page @@ -183,26 +168,6 @@ static inline void clear_in_cr4 (unsigned long mask) write_cr4(cr4); } -/* - * NSC/Cyrix CPU configuration register indexes - */ - -#define CX86_PCR0 0x20 -#define CX86_GCR 0xb8 -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_PCR1 0xf0 -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - /* * NSC/Cyrix CPU indexed register access macros */ -- cgit v1.2.3 From d0175ab64412aabc93da8682aaa99124d6815056 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: Remove smp_alt_instructions The .smp_altinstructions section and its corresponding symbols are completely unused, so remove them. Also, remove stray #ifdef __KENREL__ in asm-i386/alternative.h Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- include/asm-i386/alternative.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index dbc1a29284f..4d518eebe46 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -1,8 +1,6 @@ #ifndef _I386_ALTERNATIVE_H #define _I386_ALTERNATIVE_H -#ifdef __KERNEL__ - #include #include #include @@ -32,9 +30,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} -#endif - -#endif +#endif /* CONFIG_SMP */ /* * Alternative instructions for different CPU types or capabilities. -- cgit v1.2.3 From a75c54f933bd8db9f4a609bd128663c179b3e6a1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: i386 separate hardware-defined TSS from Linux additions On Thu, 2007-03-29 at 13:16 +0200, Andi Kleen wrote: > Please clean it up properly with two structs. Not sure about this, now I've done it. Running it here. If you like it, I can do x86-64 as well. == lguest defines its own TSS struct because the "struct tss_struct" contains linux-specific additions. Andi asked me to split the struct in processor.h. Unfortunately it makes usage a little awkward. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 77e263267aa..92226047464 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -291,7 +291,8 @@ typedef struct { struct thread_struct; -struct tss_struct { +/* This is the TSS defined by the hardware. */ +struct i386_hw_tss { unsigned short back_link,__blh; unsigned long esp0; unsigned short ss0,__ss0h; @@ -315,6 +316,11 @@ struct tss_struct { unsigned short gs, __gsh; unsigned short ldt, __ldth; unsigned short trace, io_bitmap_base; +} __attribute__((packed)); + +struct tss_struct { + struct i386_hw_tss x86_tss; + /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission @@ -381,10 +387,12 @@ struct thread_struct { * be within the limit. */ #define INIT_TSS { \ - .esp0 = sizeof(init_stack) + (long)&init_stack, \ - .ss0 = __KERNEL_DS, \ - .ss1 = __KERNEL_CS, \ - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + .x86_tss = { \ + .esp0 = sizeof(init_stack) + (long)&init_stack, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + }, \ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } @@ -493,10 +501,10 @@ static inline void rep_nop(void) static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) { - tss->esp0 = thread->esp0; + tss->x86_tss.esp0 = thread->esp0; /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->ss1 != thread->sysenter_cs)) { - tss->ss1 = thread->sysenter_cs; + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { + tss->x86_tss.ss1 = thread->sysenter_cs; wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } } -- cgit v1.2.3 From 45876233605c268e929a7875081e129debe34bdc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: use paravirt_nop to consistently mark no-op operations Add a _paravirt_nop function for use as a stub for no-op operations, and paravirt_nop #defined void * version to make using it easier (since all its uses are as a void *). This is useful to allow the patcher to automatically identify noop operations so it can simply nop out the callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar [mingo] but only as a cleanup of the current open-coded (void *) casts. My problem with this is that it loses the types. Not that there is much to check for, but still, this adds some assumptions about how function calls look like --- include/asm-i386/paravirt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 32acebce9ae..f0bdaea6235 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -434,6 +434,9 @@ static inline void pmd_clear(pmd_t *pmdp) #define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) #define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +void _paravirt_nop(void); +#define paravirt_nop ((void *)_paravirt_nop) + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ -- cgit v1.2.3 From 3dc494e86d1c93afd4c66385f270899dbfae483d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Add pagetable accessors to pack and unpack pagetable entries Add a set of accessors to pack, unpack and modify page table entries (at all levels). This allows a paravirt implementation to control the contents of pgd/pmd/pte entries. For example, Xen uses this to convert the (pseudo-)physical address into a machine address when populating a pagetable entry, and converting back to pphys address when an entry is read. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/page.h | 79 +++++++++++++++++++++++++++++++++------ include/asm-i386/paravirt.h | 52 ++++++++++++++++++++------ include/asm-i386/pgtable-2level.h | 26 ++++++++++--- include/asm-i386/pgtable-3level.h | 63 ++++++++++++++++++------------- include/asm-i386/pgtable.h | 2 + 5 files changed, 169 insertions(+), 53 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index fd3f64ace24..818ac8bf01e 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -12,7 +12,6 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ - #ifdef CONFIG_X86_USE_3DNOW #include @@ -42,26 +41,81 @@ * These are used to make use of C type-checking.. */ extern int nx_enabled; + #ifdef CONFIG_X86_PAE extern unsigned long long __supported_pte_mask; typedef struct { unsigned long pte_low, pte_high; } pte_t; typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgprot; } pgprot_t; -#define pmd_val(x) ((x).pmd) -#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) -#define __pmd(x) ((pmd_t) { (x) } ) + +static inline unsigned long long native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline unsigned long long native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} + +static inline unsigned long long native_pte_val(pte_t pte) +{ + return pte.pte_low | ((unsigned long long)pte.pte_high << 32); +} + +static inline pgd_t native_make_pgd(unsigned long long val) +{ + return (pgd_t) { val }; +} + +static inline pmd_t native_make_pmd(unsigned long long val) +{ + return (pmd_t) { val }; +} + +static inline pte_t native_make_pte(unsigned long long val) +{ + return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ; +} + +#ifndef CONFIG_PARAVIRT +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + #define HPAGE_SHIFT 21 #include -#else +#else /* !CONFIG_X86_PAE */ typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) ((x).pte_low) + +static inline unsigned long native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline unsigned long native_pte_val(pte_t pte) +{ + return pte.pte_low; +} + +static inline pgd_t native_make_pgd(unsigned long val) +{ + return (pgd_t) { val }; +} + +static inline pte_t native_make_pte(unsigned long val) +{ + return (pte_t) { .pte_low = val }; +} + #define HPAGE_SHIFT 22 #include -#endif +#endif /* CONFIG_X86_PAE */ + #define PTE_MASK PAGE_MASK #ifdef CONFIG_HUGETLB_PAGE @@ -71,13 +125,16 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif -#define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) +#ifndef CONFIG_PARAVIRT +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) +#endif + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f0bdaea6235..0aacb13bb92 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,7 +2,6 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ -#include #include #include @@ -25,6 +24,8 @@ #define CLBR_ANY 0x7 #ifndef __ASSEMBLY__ +#include + struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -55,11 +56,6 @@ struct paravirt_ops int (*set_wallclock)(unsigned long); void (*time_init)(void); - /* All the function pointers here are declared as "fastcall" - so that we get a specific register-based calling - convention. This makes it easier to implement inline - assembler replacements. */ - void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); @@ -139,16 +135,33 @@ struct paravirt_ops void (*release_pd)(u32 pfn); void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + + pte_t (*ptep_get_and_clear)(pte_t *ptep); + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); void (*set_pud)(pud_t *pudp, pud_t pudval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pmd_clear)(pmd_t *pmdp); + + unsigned long long (*pte_val)(pte_t); + unsigned long long (*pmd_val)(pmd_t); + unsigned long long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long long pte); + pmd_t (*make_pmd)(unsigned long long pmd); + pgd_t (*make_pgd)(unsigned long long pgd); +#else + unsigned long (*pte_val)(pte_t); + unsigned long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long pte); + pgd_t (*make_pgd)(unsigned long pgd); #endif void (*set_lazy_mode)(int mode); @@ -219,6 +232,8 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define read_cr4_safe(x) paravirt_ops.read_cr4_safe() #define write_cr4(x) paravirt_ops.write_cr4(x) +#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) + static inline void raw_safe_halt(void) { paravirt_ops.safe_halt(); @@ -304,6 +319,17 @@ static inline void halt(void) (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) #define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) +#define __pte(x) paravirt_ops.make_pte(x) +#define __pgd(x) paravirt_ops.make_pgd(x) + +#define pte_val(x) paravirt_ops.pte_val(x) +#define pgd_val(x) paravirt_ops.pgd_val(x) + +#ifdef CONFIG_X86_PAE +#define __pmd(x) paravirt_ops.make_pmd(x) +#define pmd_val(x) paravirt_ops.pmd_val(x) +#endif + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { paravirt_ops.io_delay(); @@ -344,6 +370,7 @@ static inline void setup_secondary_clock(void) } #endif + #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) @@ -371,7 +398,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) paravirt_ops.set_pte(ptep, pteval); } -static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) { paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 38c3fcc0676..043a2bcfa86 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -11,10 +11,23 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ +static inline void native_set_pte(pte_t *ptep , pte_t pte) +{ + *ptep = pte; +} +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} #ifndef CONFIG_PARAVIRT -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval) +#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval) +#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval) #endif #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) @@ -23,11 +36,14 @@ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ + return __pte(xchg(&xp->pte_low, 0)); +} #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) +#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 7a2318f3830..be6017f37a9 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -42,20 +42,23 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } -#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is * not possible, use pte_get_and_clear to obtain the old pte * value and then use set_pte to update it. -ben */ -static inline void set_pte(pte_t *ptep, pte_t pte) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} /* * Since this is only called on user PTEs, and the page fault handler @@ -63,7 +66,8 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * we are justified in merely clearing the PTE present bit, followed * by a set. The ordering here is important. */ -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { ptep->pte_low = 0; smp_wmb(); @@ -72,32 +76,48 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte ptep->pte_low = pte.pte_low; } -#define set_pte_atomic(pteptr,pteval) \ - set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) -#define set_pud(pudptr,pudval) \ - (*(pudptr) = (pudval)) +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long long *)(ptep),native_pte_val(pte)); +} +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + set_64bit((unsigned long long *)(pmdp),native_pmd_val(pmd)); +} +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; +} /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler * barrier. */ -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep->pte_low = 0; smp_wmb(); ptep->pte_high = 0; } -static inline void pmd_clear(pmd_t *pmd) +static inline void native_pmd_clear(pmd_t *pmd) { u32 *tmp = (u32 *)pmd; *tmp = 0; smp_wmb(); *(tmp + 1) = 0; } + +#ifndef CONFIG_PARAVIRT +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) +#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte) +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) +#define set_pud(pudp, pud) native_set_pud(pudp, pud) +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) #endif /* @@ -119,7 +139,7 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) +static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -146,28 +166,21 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return (pte.pte_low >> PAGE_SHIFT) | - (pte.pte_high << (32 - PAGE_SHIFT)); + return pte_val(pte) >> PAGE_SHIFT; } extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \ - (pgprot_val(pgprot) >> 32); - pte.pte_high &= (__supported_pte_mask >> 32); - pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \ - __supported_pte_mask; - return pte; + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } /* diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 143ddc42b86..147f2553784 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -266,6 +266,8 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) #define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) + +#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* -- cgit v1.2.3 From b239fb2501117bf3aeb4dd6926edd855be92333d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Hooks to set up initial pagetable This patch introduces paravirt_ops hooks to control how the kernel's initial pagetable is set up. In the case of a native boot, the very early bootstrap code creates a simple non-PAE pagetable to map the kernel and physical memory. When the VM subsystem is initialized, it creates a proper pagetable which respects the PAE mode, large pages, etc. When booting under a hypervisor, there are many possibilities for what paging environment the hypervisor establishes for the guest kernel, so the constructon of the kernel's pagetable depends on the hypervisor. In the case of Xen, the hypervisor boots the kernel with a fully constructed pagetable, which is already using PAE if necessary. Also, Xen requires particular care when constructing pagetables to make sure all pagetables are always mapped read-only. In order to make this easier, kernel's initial pagetable construction has been changed to only allocate and initialize a pagetable page if there's no page already present in the pagetable. This allows the Xen paravirt backend to make a copy of the hypervisor-provided pagetable, allowing the kernel to establish any more mappings it needs while keeping the existing ones. A slightly subtle point which is worth highlighting here is that Xen requires all kernel mappings to share the same pte_t pages between all pagetables, so that updating a kernel page's mapping in one pagetable is reflected in all other pagetables. This makes it possible to allocate a page and attach it to a pagetable without having to explicitly enumerate that page's mapping in all pagetables. And: +From: "Eric W. Biederman" If we don't set the leaf page table entries it is quite possible that will inherit and incorrect page table entry from the initial boot page table setup in head.S. So we need to redo the effort here, so we pick up PSE, PGE and the like. Hypervisors like Xen require that their page tables be read-only, which is slightly incompatible with our low identity mappings, however I discussed this with Jeremy he has modified the Xen early set_pte function to avoid problems in this area. Signed-off-by: Eric W. Biederman Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: William Irwin Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 17 ++++++++++++++++- include/asm-i386/pgtable.h | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 0aacb13bb92..c49b44cdd8e 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,10 +2,11 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ + +#ifdef CONFIG_PARAVIRT #include #include -#ifdef CONFIG_PARAVIRT /* These are the most performance critical ops, so we want to be able to patch * callers */ #define PARAVIRT_IRQ_DISABLE 0 @@ -50,6 +51,9 @@ struct paravirt_ops char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*pagetable_setup_start)(pgd_t *pgd_base); + void (*pagetable_setup_done)(pgd_t *pgd_base); + void (*banner)(void); unsigned long (*get_wallclock)(void); @@ -370,6 +374,17 @@ static inline void setup_secondary_clock(void) } #endif +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_start) + (*paravirt_ops.pagetable_setup_start)(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_done) + (*paravirt_ops.pagetable_setup_done)(base); +} #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 147f2553784..0790ad6ed44 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -514,6 +514,22 @@ do { \ * tables contain all the necessary information. */ #define update_mmu_cache(vma,address,pte) do { } while (0) + +void native_pagetable_setup_start(pgd_t *base); +void native_pagetable_setup_done(pgd_t *base); + +#ifndef CONFIG_PARAVIRT +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} +#endif /* !CONFIG_PARAVIRT */ + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_FLATMEM -- cgit v1.2.3 From 90caccb9758e88db68a69553689baee38254287b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allocate a fixmap slot Allocate a fixmap slot for use by a paravirt_ops implementation. This is intended for early-boot bootstrap mappings. Once the zones and allocator have been set up, it would be better to use get_vm_area() to allocate some virtual space. Xen uses this to map the hypervisor's shared info page, which doesn't have a pseudo-physical page number, and therefore can't be mapped ordinarily. It is needed early because it contains the vcpu state, including the interrupt mask. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/fixmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index e5651b2a585..80ea052ee3a 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -83,6 +83,9 @@ enum fixed_addresses { #endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, +#endif +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, /* temporary boot-time mappings, used before ioremap() is functional */ -- cgit v1.2.3 From 5311ab62cdc7788784971ed816ce85e926f3e994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing Normally when running in PAE mode, the 4th PMD maps the kernel address space, which can be shared among all processes (since they all need the same kernel mappings). Xen, however, does not allow guests to have the kernel pmd shared between page tables, so parameterize pgtable.c to allow both modes of operation. There are several side-effects of this. One is that vmalloc will update the kernel address space mappings, and those updates need to be propagated into all processes if the kernel mappings are not intrinsically shared. In the non-PAE case, this is done by maintaining a pgd_list of all processes; this list is used when all process pagetables must be updated. pgd_list is threaded via otherwise unused entries in the page structure for the pgd, which means that the pgd must be page-sized for this to work. Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE pgd to page aligned anyway, so this patch forces the pgd to be page aligned+sized when the kernel pmd is unshared, to accomodate both these requirements. Also, since there may be several distinct kernel pmds (if the user/kernel split is below 3G), there's no point in allocating them from a slab cache; they're just allocated with get_free_page and initialized appropriately. (Of course the could be cached if there is just a single kernel pmd - which is the default with a 3G user/kernel split - but it doesn't seem worthwhile to add yet another case into this code). [ Many thanks to wli for review comments. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: William Lee Irwin III Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: Christoph Lameter Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 1 + include/asm-i386/pgtable-2level-defs.h | 2 ++ include/asm-i386/pgtable-2level.h | 2 -- include/asm-i386/pgtable-3level-defs.h | 6 ++++++ include/asm-i386/pgtable-3level.h | 2 -- include/asm-i386/pgtable.h | 2 ++ 6 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c49b44cdd8e..f93599dc775 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -35,6 +35,7 @@ struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; + int shared_kernel_pmd; int paravirt_enabled; const char *name; diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h index 02518079f81..0f71c9f13da 100644 --- a/include/asm-i386/pgtable-2level-defs.h +++ b/include/asm-i386/pgtable-2level-defs.h @@ -1,6 +1,8 @@ #ifndef _I386_PGTABLE_2LEVEL_DEFS_H #define _I386_PGTABLE_2LEVEL_DEFS_H +#define SHARED_KERNEL_PMD 0 + /* * traditional i386 two-level paging structure: */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 043a2bcfa86..781fe4bcc96 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -82,6 +82,4 @@ static inline int pte_exec_kernel(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -void vmalloc_sync_all(void); - #endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h index eb3a1ea8867..c0df89f66e8 100644 --- a/include/asm-i386/pgtable-3level-defs.h +++ b/include/asm-i386/pgtable-3level-defs.h @@ -1,6 +1,12 @@ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H #define _I386_PGTABLE_3LEVEL_DEFS_H +#ifdef CONFIG_PARAVIRT +#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd) +#else +#define SHARED_KERNEL_PMD 1 +#endif + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index be6017f37a9..664bfee5a2f 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) #define __pmd_free_tlb(tlb, x) do { } while (0) -#define vmalloc_sync_all() ((void)0) - #endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 0790ad6ed44..5b88a6a1278 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } +extern void vmalloc_sync_all(void); + #ifdef CONFIG_X86_PAE # include #else -- cgit v1.2.3 From d6dd61c831226f9cd7750885da04d360d6455101 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] x86: PARAVIRT: add hooks to intercept mm creation and destruction Add hooks to allow a paravirt implementation to track the lifetime of an mm. Paravirtualization requires three hooks, but only two are needed in common code. They are: arch_dup_mmap, which is called when a new mmap is created at fork arch_exit_mmap, which is called when the last process reference to an mm is dropped, which typically happens on exit and exec. The third hook is activate_mm, which is called from the arch-specific activate_mm() macro/function, and so doesn't need stub versions for other architectures. It's called when an mm is first used. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: linux-arch@vger.kernel.org Cc: James Bottomley Acked-by: Ingo Molnar --- include/asm-i386/mmu_context.h | 17 +++++++++++++++-- include/asm-i386/paravirt.h | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index e6aa30f8de5..8198d1cca1f 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -5,6 +5,16 @@ #include #include #include +#include +#ifndef CONFIG_PARAVIRT +#include + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + /* * Used for LDT copy/destruction. @@ -65,7 +75,10 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) +#define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm(prev, next); \ + switch_mm((prev),(next),NULL); \ + } while(0); #endif diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f93599dc775..61c03f1e0c2 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -119,6 +119,12 @@ struct paravirt_ops void (*io_delay)(void); + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); @@ -395,6 +401,23 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + paravirt_ops.activate_mm(prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_ops.dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_ops.exit_mmap(mm); +} + #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) -- cgit v1.2.3 From 98de032b681d8a7532d44dfc66aa5c0c1c755a9d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: rename struct paravirt_patch to paravirt_patch_site for clarity Rename struct paravirt_patch to paravirt_patch_site, so that it clearly refers to a callsite, and not the patch which may be applied to that callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/alternative.h | 8 +++++--- include/asm-i386/paravirt.h | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 4d518eebe46..5b59d07e9d2 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -115,12 +115,14 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif -struct paravirt_patch; +struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); #else static inline void -apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) {} #define __start_parainstructions NULL #define __stop_parainstructions NULL diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 61c03f1e0c2..b4cc2fc4031 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -505,13 +505,16 @@ void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) /* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch { +struct paravirt_patch_site { u8 *instr; /* original instructions */ u8 instrtype; /* type of this instruction */ u8 len; /* length of original instruction */ u16 clobbers; /* what registers you may clobber */ }; +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + #define paravirt_alt(insn_string, typenum, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ -- cgit v1.2.3 From d582203578a1f3d408e27bb9042e8635954cd320 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Use patch site IDs computed from offset in paravirt_ops structure Use patch type identifiers derived from the offset of the operation in the paravirt_ops structure. This avoids having to maintain a separate enum for patch site types. Also, since the identifier is derived from the offset into paravirt_ops, the offset can be derived from the identifier. This is used to remove replicated information in the various callsite macros, which has been a source of bugs in the past. This patch also drops the fused save_fl+cli operation, which doesn't really add much and makes things more complex - specifically because it breaks the 1:1 relationship between identifiers and offsets. If this operation turns out to be particularly beneficial, then the right answer is to define a new entrypoint for it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 177 +++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 85 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index b4cc2fc4031..1dbc01f4ed4 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -4,19 +4,8 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include #include -/* These are the most performance critical ops, so we want to be able to patch - * callers */ -#define PARAVIRT_IRQ_DISABLE 0 -#define PARAVIRT_IRQ_ENABLE 1 -#define PARAVIRT_RESTORE_FLAGS 2 -#define PARAVIRT_SAVE_FLAGS 3 -#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4 -#define PARAVIRT_INTERRUPT_RETURN 5 -#define PARAVIRT_STI_SYSEXIT 6 - /* Bitmask of what can be clobbered: usually at least eax. */ #define CLBR_NONE 0x0 #define CLBR_EAX 0x1 @@ -191,6 +180,28 @@ struct paravirt_ops extern struct paravirt_ops paravirt_ops; +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_ops, x) / sizeof(void *)) + +#define paravirt_type(type) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(type)) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" + +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + " .long 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + ".popsection\n" + +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + #define paravirt_enabled() (paravirt_ops.paravirt_enabled) static inline void load_esp0(struct tss_struct *tss, @@ -515,93 +526,89 @@ struct paravirt_patch_site { extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; -#define paravirt_alt(insn_string, typenum, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - " .long 771b\n" \ - " .byte " __stringify(typenum) "\n" \ - " .byte 772b-771b\n" \ - " .short " __stringify(clobber) "\n" \ - ".popsection" - static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS, CLBR_NONE) - : "=a"(f): "m"(paravirt_ops.save_fl) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : paravirt_type(save_fl), + paravirt_clobber(CLBR_NONE) + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_RESTORE_FLAGS, CLBR_EAX) - : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : "0"(f), + paravirt_type(restore_fl), + paravirt_clobber(CLBR_EAX) + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_DISABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_disable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_disable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_ENABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_enable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_enable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1; pushl %%eax;" - "call *%2; popl %%eax;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS_IRQ_DISABLE, - CLBR_NONE) - : "=a"(f) - : "m" (paravirt_ops.save_fl), - "m" (paravirt_ops.irq_disable) - : "memory", "cc"); + f = __raw_local_save_flags(); + raw_local_irq_disable(); return f; } -#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_disable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_DISABLE, CLBR_EAX) +#define CLI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_cli_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_cli_type]", "%c[paravirt_clobber]") + +#define STI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_sti_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_sti_type]", "%c[paravirt_clobber]") -#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_enable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_ENABLE, CLBR_EAX) #define CLI_STI_CLOBBERS , "%eax" -#define CLI_STI_INPUT_ARGS \ +#define CLI_STI_INPUT_ARGS \ , \ - [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ - [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) + [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \ + [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ + paravirt_clobber(CLBR_EAX) + +#undef PARAVIRT_CALL #else /* __ASSEMBLY__ */ -#define PARA_PATCH(ptype, clobbers, ops) \ +#define PARA_PATCH(off) ((off) / 4) + +#define PARA_SITE(ptype, clobbers, ops) \ 771:; \ ops; \ 772:; \ @@ -612,25 +619,25 @@ static inline unsigned long __raw_local_irq_save(void) .short clobbers; \ .popsection -#define INTERRUPT_RETURN \ - PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_iret) - -#define DISABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ - -#define ENABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) +#define INTERRUPT_RETURN \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_iret) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ + popl %edx; popl %ecx) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ + popl %edx; popl %ecx) + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ call *paravirt_ops+PARAVIRT_read_cr0 -- cgit v1.2.3 From 42c24fa22e86365055fc931d833f26165e687c19 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Fix patch site clobbers to include return register Fix a few clobbers to include the return register. The clobbers set is the set of all registers modified (or may be modified) by the code snippet, regardless of whether it was deliberate or accidental. Also, make sure that callsites which are used in contexts which don't allow clobbers actually save and restore all clobberable registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 1dbc01f4ed4..87fd4317bee 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -535,7 +535,7 @@ static inline unsigned long __raw_local_save_flags(void) "popl %%edx; popl %%ecx") : "=a"(f) : paravirt_type(save_fl), - paravirt_clobber(CLBR_NONE) + paravirt_clobber(CLBR_EAX) : "memory", "cc"); return f; } @@ -620,27 +620,29 @@ static inline unsigned long __raw_local_irq_save(void) .popsection #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_iret) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ + popl %edx; popl %ecx; popl %eax) \ #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) + popl %edx; popl %ecx; popl %eax) #define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ - call *paravirt_ops+PARAVIRT_read_cr0 + push %ecx; push %edx; \ + call *paravirt_ops+PARAVIRT_read_cr0; \ + pop %edx; pop %ecx #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -- cgit v1.2.3 From f8822f42019eceed19cc6c0f985a489e17796ed8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Consistently wrap paravirt ops callsites to make them patchable Wrap a set of interesting paravirt_ops calls in a wrapper which makes the callsites available for patching. Unfortunately this is pretty ugly because there's no way to get gcc to generate a function call, but also wrap just the callsite itself with the necessary labels. This patch supports functions with 0-4 arguments, and either void or returning a value. 64-bit arguments must be split into a pair of 32-bit arguments (lower word first). Small structures are returned in registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori --- include/asm-i386/paravirt.h | 686 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 560 insertions(+), 126 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 87fd4317bee..837457b42db 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -124,7 +124,7 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(u32 addr); + void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -188,7 +188,7 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ @@ -199,26 +199,234 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" -#define paravirt_alt(insn_string) \ +#define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") -#define paravirt_enabled() (paravirt_ops.paravirt_enabled) +#define PVOP_CALL0(__rettype, __op) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL0(__op) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL1(__rettype, __op, arg1) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL1(__op, arg1) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL2(__op, arg1, arg2) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + [_arg4] "mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc",); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +static inline int paravirt_enabled(void) +{ + return paravirt_ops.paravirt_enabled; +} static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) { - paravirt_ops.load_esp0(tss, thread); + PVOP_VCALL2(load_esp0, tss, thread); } #define ARCH_SETUP paravirt_ops.arch_setup(); static inline unsigned long get_wallclock(void) { - return paravirt_ops.get_wallclock(); + return PVOP_CALL0(unsigned long, get_wallclock); } static inline int set_wallclock(unsigned long nowtime) { - return paravirt_ops.set_wallclock(nowtime); + return PVOP_CALL1(int, set_wallclock, nowtime); } static inline void (*choose_time_init(void))(void) @@ -230,127 +438,208 @@ static inline void (*choose_time_init(void))(void) static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - paravirt_ops.cpuid(eax, ebx, ecx, edx); + PVOP_VCALL4(cpuid, eax, ebx, ecx, edx); } /* * These special macros can be used to get or set a debugging register */ -#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) -#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) +static inline unsigned long paravirt_get_debugreg(int reg) +{ + return PVOP_CALL1(unsigned long, get_debugreg, reg); +} +#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) +static inline void set_debugreg(unsigned long val, int reg) +{ + PVOP_VCALL2(set_debugreg, reg, val); +} -#define clts() paravirt_ops.clts() +static inline void clts(void) +{ + PVOP_VCALL0(clts); +} -#define read_cr0() paravirt_ops.read_cr0() -#define write_cr0(x) paravirt_ops.write_cr0(x) +static inline unsigned long read_cr0(void) +{ + return PVOP_CALL0(unsigned long, read_cr0); +} -#define read_cr2() paravirt_ops.read_cr2() -#define write_cr2(x) paravirt_ops.write_cr2(x) +static inline void write_cr0(unsigned long x) +{ + PVOP_VCALL1(write_cr0, x); +} + +static inline unsigned long read_cr2(void) +{ + return PVOP_CALL0(unsigned long, read_cr2); +} + +static inline void write_cr2(unsigned long x) +{ + PVOP_VCALL1(write_cr2, x); +} + +static inline unsigned long read_cr3(void) +{ + return PVOP_CALL0(unsigned long, read_cr3); +} -#define read_cr3() paravirt_ops.read_cr3() -#define write_cr3(x) paravirt_ops.write_cr3(x) +static inline void write_cr3(unsigned long x) +{ + PVOP_VCALL1(write_cr3, x); +} -#define read_cr4() paravirt_ops.read_cr4() -#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() -#define write_cr4(x) paravirt_ops.write_cr4(x) +static inline unsigned long read_cr4(void) +{ + return PVOP_CALL0(unsigned long, read_cr4); +} +static inline unsigned long read_cr4_safe(void) +{ + return PVOP_CALL0(unsigned long, read_cr4_safe); +} -#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) +static inline void write_cr4(unsigned long x) +{ + PVOP_VCALL1(write_cr4, x); +} static inline void raw_safe_halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); } static inline void halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); +} + +static inline void wbinvd(void) +{ + PVOP_VCALL0(wbinvd); } -#define wbinvd() paravirt_ops.wbinvd() #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +static inline u64 paravirt_read_msr(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, read_msr, msr, err); +} +static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +{ + return PVOP_CALL3(int, write_msr, msr, low, high); +} + /* These should all do BUG_ON(_err), but our headers are too tangled. */ -#define rdmsr(msr,val1,val2) do { \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - val1 = (u32)_l; \ - val2 = _l >> 32; \ +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ } while(0) -#define wrmsr(msr,val1,val2) do { \ - u64 _l = ((u64)(val2) << 32) | (val1); \ - paravirt_ops.write_msr((msr), _l); \ +#define wrmsr(msr,val1,val2) do { \ + paravirt_write_msr(msr, val1, val2); \ } while(0) -#define rdmsrl(msr,val) do { \ - int _err; \ - val = paravirt_ops.read_msr((msr),&_err); \ +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_read_msr(msr, &_err); \ } while(0) -#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) -#define wrmsr_safe(msr,a,b) ({ \ - u64 _l = ((u64)(b) << 32) | (a); \ - paravirt_ops.write_msr((msr),_l); \ -}) +#define wrmsrl(msr,val) ((void)paravirt_write_msr(msr, val, 0)) +#define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b) /* rdmsr with exception handling */ -#define rdmsr_safe(msr,a,b) ({ \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - (*a) = (u32)_l; \ - (*b) = _l >> 32; \ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ _err; }) -#define rdtsc(low,high) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (u32)_l; \ - high = _l >> 32; \ + +static inline u64 paravirt_read_tsc(void) +{ + return PVOP_CALL0(u64, read_tsc); +} +#define rdtsc(low,high) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ } while(0) -#define rdtscl(low) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (int)_l; \ +#define rdtscl(low) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (int)_l; \ } while(0) -#define rdtscll(val) (val = paravirt_ops.read_tsc()) +#define rdtscll(val) (val = paravirt_read_tsc()) #define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) #define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) -#define rdpmc(counter,low,high) do { \ - u64 _l = paravirt_ops.read_pmc(); \ - low = (u32)_l; \ - high = _l >> 32; \ -} while(0) +static inline unsigned long long paravirt_read_pmc(int counter) +{ + return PVOP_CALL1(u64, read_pmc, counter); +} -#define load_TR_desc() (paravirt_ops.load_tr_desc()) -#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) -#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) -#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) -#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) -#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) -#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) -#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) -#define write_ldt_entry(dt, entry, low, high) \ - (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) -#define write_gdt_entry(dt, entry, low, high) \ - (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) -#define write_idt_entry(dt, entry, low, high) \ - (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) -#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) - -#define __pte(x) paravirt_ops.make_pte(x) -#define __pgd(x) paravirt_ops.make_pgd(x) - -#define pte_val(x) paravirt_ops.pte_val(x) -#define pgd_val(x) paravirt_ops.pgd_val(x) +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_read_pmc(counter); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) -#ifdef CONFIG_X86_PAE -#define __pmd(x) paravirt_ops.make_pmd(x) -#define pmd_val(x) paravirt_ops.pmd_val(x) -#endif +static inline void load_TR_desc(void) +{ + PVOP_VCALL0(load_tr_desc); +} +static inline void load_gdt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_gdt, dtr); +} +static inline void load_idt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_idt, dtr); +} +static inline void set_ldt(const void *addr, unsigned entries) +{ + PVOP_VCALL2(set_ldt, addr, entries); +} +static inline void store_gdt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_gdt, dtr); +} +static inline void store_idt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_idt, dtr); +} +static inline unsigned long paravirt_store_tr(void) +{ + return PVOP_CALL0(unsigned long, store_tr); +} +#define store_tr(tr) ((tr) = paravirt_store_tr()) +static inline void load_TLS(struct thread_struct *t, unsigned cpu) +{ + PVOP_VCALL2(load_tls, t, cpu); +} +static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_ldt_entry, dt, entry, low, high); +} +static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_gdt_entry, dt, entry, low, high); +} +static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_idt_entry, dt, entry, low, high); +} +static inline void set_iopl_mask(unsigned mask) +{ + PVOP_VCALL1(set_iopl_mask, mask); +} /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { @@ -368,27 +657,27 @@ static inline void slow_down_io(void) { */ static inline void apic_write(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write(reg,v); + PVOP_VCALL2(apic_write, reg, v); } static inline void apic_write_atomic(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write_atomic(reg,v); + PVOP_VCALL2(apic_write_atomic, reg, v); } static inline unsigned long apic_read(unsigned long reg) { - return paravirt_ops.apic_read(reg); + return PVOP_CALL1(unsigned long, apic_read, reg); } static inline void setup_boot_clock(void) { - paravirt_ops.setup_boot_clock(); + PVOP_VCALL0(setup_boot_clock); } static inline void setup_secondary_clock(void) { - paravirt_ops.setup_secondary_clock(); + PVOP_VCALL0(setup_secondary_clock); } #endif @@ -408,93 +697,205 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base) static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { - return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); + PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp); } #endif static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - paravirt_ops.activate_mm(prev, next); + PVOP_VCALL2(activate_mm, prev, next); } static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - paravirt_ops.dup_mmap(oldmm, mm); + PVOP_VCALL2(dup_mmap, oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { - paravirt_ops.exit_mmap(mm); + PVOP_VCALL1(exit_mmap, mm); } -#define __flush_tlb() paravirt_ops.flush_tlb_user() -#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() -#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(flush_tlb_user); +} +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(flush_tlb_kernel); +} +static inline void __flush_tlb_single(unsigned long addr) +{ + PVOP_VCALL1(flush_tlb_single, addr); +} -#define paravirt_map_pt_hook(type, va, pfn) paravirt_ops.map_pt_hook(type, va, pfn) +static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + PVOP_VCALL3(map_pt_hook, type, va, pfn); +} -#define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) -#define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) +static inline void paravirt_alloc_pt(unsigned pfn) +{ + PVOP_VCALL1(alloc_pt, pfn); +} +static inline void paravirt_release_pt(unsigned pfn) +{ + PVOP_VCALL1(release_pt, pfn); +} -#define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) -#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ - paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) -#define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) +static inline void paravirt_alloc_pd(unsigned pfn) +{ + PVOP_VCALL1(alloc_pd, pfn); +} -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, + unsigned start, unsigned count) +{ + PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count); +} +static inline void paravirt_release_pd(unsigned pfn) { - paravirt_ops.set_pte(ptep, pteval); + PVOP_VCALL1(release_pd, pfn); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) +static inline void pte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pte_at(mm, addr, ptep, pteval); + PVOP_VCALL3(pte_update, mm, addr, ptep); } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pmd(pmdp, pmdval); + PVOP_VCALL3(pte_update_defer, mm, addr, ptep); } -static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +#ifdef CONFIG_X86_PAE +static inline pte_t __pte(unsigned long long val) { - paravirt_ops.pte_update(mm, addr, ptep); + unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte, + val, val >> 32); + return (pte_t) { ret, ret >> 32 }; } -static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +static inline pmd_t __pmd(unsigned long long val) { - paravirt_ops.pte_update_defer(mm, addr, ptep); + return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) }; +} + +static inline pgd_t __pgd(unsigned long long val) +{ + return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) }; +} + +static inline unsigned long long pte_val(pte_t x) +{ + return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high); +} + +static inline unsigned long long pmd_val(pmd_t x) +{ + return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32); +} + +static inline unsigned long long pgd_val(pgd_t x) +{ + return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + /* 5 arg words */ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } -#ifdef CONFIG_X86_PAE static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) { - paravirt_ops.set_pte_atomic(ptep, pteval); + PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high); } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { + /* 5 arg words */ paravirt_ops.set_pte_present(mm, addr, ptep, pte); } +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32); +} + static inline void set_pud(pud_t *pudp, pud_t pudval) { - paravirt_ops.set_pud(pudp, pudval); + PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - paravirt_ops.pte_clear(mm, addr, ptep); + PVOP_VCALL3(pte_clear, mm, addr, ptep); } static inline void pmd_clear(pmd_t *pmdp) { - paravirt_ops.pmd_clear(pmdp); + PVOP_VCALL1(pmd_clear, pmdp); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); + return (pte_t) { val, val >> 32 }; +} +#else /* !CONFIG_X86_PAE */ +static inline pte_t __pte(unsigned long val) +{ + return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; } -#endif + +static inline pgd_t __pgd(unsigned long val) +{ + return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) }; +} + +static inline unsigned long pte_val(pte_t x) +{ + return PVOP_CALL1(unsigned long, pte_val, x.pte_low); +} + +static inline unsigned long pgd_val(pgd_t x) +{ + return PVOP_CALL1(unsigned long, pgd_val, x.pgd); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL2(set_pte, ptep, pteval.pte_low); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; +} +#endif /* CONFIG_X86_PAE */ /* Lazy mode for batching updates / context switch */ #define PARAVIRT_LAZY_NONE 0 @@ -503,14 +904,37 @@ static inline void pmd_clear(pmd_t *pmdp) #define PARAVIRT_LAZY_FLUSH 3 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) -#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU); +} + +static inline void arch_leave_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) -#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) @@ -603,6 +1027,16 @@ static inline unsigned long __raw_local_irq_save(void) paravirt_clobber(CLBR_EAX) #undef PARAVIRT_CALL +#undef PVOP_VCALL0 +#undef PVOP_CALL0 +#undef PVOP_VCALL1 +#undef PVOP_CALL1 +#undef PVOP_VCALL2 +#undef PVOP_CALL2 +#undef PVOP_VCALL3 +#undef PVOP_CALL3 +#undef PVOP_VCALL4 +#undef PVOP_CALL4 #else /* __ASSEMBLY__ */ -- cgit v1.2.3 From 294688c028e80fd467cdd22da79f62c5f311eaf5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Document asm-i386/paravirt.h Clean things up, and broadly document: - the paravirt_ops functions themselves - the patching mechanism Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell --- include/asm-i386/paravirt.h | 131 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 121 insertions(+), 10 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 837457b42db..8bfaf10d996 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -21,6 +21,14 @@ struct Xgt_desc_struct; struct tss_struct; struct mm_struct; struct desc_struct; + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE = 0, + PARAVIRT_LAZY_MMU = 1, + PARAVIRT_LAZY_CPU = 2, +}; + struct paravirt_ops { unsigned int kernel_rpl; @@ -37,22 +45,33 @@ struct paravirt_ops */ unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + /* Basic arch-specific setup */ void (*arch_setup)(void); char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*time_init)(void); + /* + * Called before/after init_mm pagetable setup. setup_start + * may reset %cr3, and may pre-install parts of the pagetable; + * pagetable setup is expected to preserve any existing + * mapping. + */ void (*pagetable_setup_start)(pgd_t *pgd_base); void (*pagetable_setup_done)(pgd_t *pgd_base); + /* Print a banner to identify the environment */ void (*banner)(void); + /* Set and set time of day */ unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long); - void (*time_init)(void); + /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); + /* hooks for various privileged instructions */ unsigned long (*get_debugreg)(int regno); void (*set_debugreg)(int regno, unsigned long value); @@ -71,15 +90,23 @@ struct paravirt_ops unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); + /* + * Get/set interrupt state. save_fl and restore_fl are only + * expected to use X86_EFLAGS_IF; all other bits + * returned from save_fl are undefined, and may be ignored by + * restore_fl. + */ unsigned long (*save_fl)(void); void (*restore_fl)(unsigned long); void (*irq_disable)(void); void (*irq_enable)(void); void (*safe_halt)(void); void (*halt)(void); + void (*wbinvd)(void); - /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + /* MSR, PMC and TSR operations. + err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, u64 val); @@ -88,6 +115,7 @@ struct paravirt_ops u64 (*get_scheduled_cycles)(void); unsigned long (*get_cpu_khz)(void); + /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct Xgt_desc_struct *); void (*load_idt)(const struct Xgt_desc_struct *); @@ -105,9 +133,12 @@ struct paravirt_ops void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); - void (*io_delay)(void); + /* + * Hooks for intercepting the creation/use/destruction of an + * mm_struct. + */ void (*activate_mm)(struct mm_struct *prev, struct mm_struct *next); void (*dup_mmap)(struct mm_struct *oldmm, @@ -115,30 +146,43 @@ struct paravirt_ops void (*exit_mmap)(struct mm_struct *mm); #ifdef CONFIG_X86_LOCAL_APIC + /* + * Direct APIC operations, principally for VMI. Ideally + * these shouldn't be in this interface. + */ void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); unsigned long (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); + + void (*startup_ipi_hook)(int phys_apicid, + unsigned long start_eip, + unsigned long start_esp); #endif + /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); + /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*release_pt)(u32 pfn); void (*release_pd)(u32 pfn); + /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); pte_t (*ptep_get_and_clear)(pte_t *ptep); @@ -164,13 +208,12 @@ struct paravirt_ops pgd_t (*make_pgd)(unsigned long pgd); #endif - void (*set_lazy_mode)(int mode); + /* Set deferred update mode, used for batching operations. */ + void (*set_lazy_mode)(enum paravirt_lazy_mode mode); /* These two are jmp to, not actually called. */ void (*irq_enable_sysexit)(void); void (*iret)(void); - - void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); }; /* Mark a paravirt probe function. */ @@ -188,8 +231,10 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" - +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ @@ -199,9 +244,74 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" +/* Generate patchable code, with the default asm parameters. */ #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_ops structure, and can therefore be freely + * converted back into a structure offset. + */ +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" + +/* + * These macros are intended to wrap calls into a paravirt_ops + * operation, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (paravirt_ops.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to e a simple direct call, or + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * These macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend paravirt_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * the two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether its a 32 or 64 bit value, and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * in low,high order. + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do this. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ #define PVOP_CALL0(__rettype, __op) \ ({ \ __rettype __ret; \ @@ -1026,6 +1136,7 @@ static inline unsigned long __raw_local_irq_save(void) [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ paravirt_clobber(CLBR_EAX) +/* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef PVOP_VCALL0 #undef PVOP_CALL0 -- cgit v1.2.3 From 63f70270ccd981ce40a8ff58c03a8c2e97e368be Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: add common patching machinery Implement the actual patching machinery. paravirt_patch_default() contains the logic to automatically patch a callsite based on a few simple rules: - if the paravirt_op function is paravirt_nop, then patch nops - if the paravirt_op function is a jmp target, then jmp to it - if the paravirt_op function is callable and doesn't clobber too much for the callsite, call it directly paravirt_patch_default is suitable as a default implementation of paravirt_ops.patch, will remove most of the expensive indirect calls in favour of either a direct call or a pile of nops. Backends may implement their own patcher, however. There are several helper functions to help with this: paravirt_patch_nop nop out a callsite paravirt_patch_ignore leave the callsite as-is paravirt_patch_call patch a call if the caller and callee have compatible clobbers paravirt_patch_jmp patch in a jmp paravirt_patch_insns patch some literal instructions over the callsite, if they fit This patch also implements more direct patches for the native case, so that when running on native hardware many common operations are implemented inline. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori Acked-by: Ingo Molnar --- include/asm-i386/paravirt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 8bfaf10d996..4b3d5085867 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -248,6 +248,18 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ignore(unsigned len); +unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, + void *site, u16 site_clobbers, + unsigned len); +unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); + +unsigned paravirt_patch_insns(void *site, unsigned len, + const char *start, const char *end); + + /* * This generates an indirect call based on the operation type number. * The type number, computed in PARAVIRT_PATCH, is derived from the -- cgit v1.2.3 From d4c104771a1c58e3de2a888b73b0ba1b54c0ae76 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add flush_tlb_others paravirt_op This patch adds a pv_op for flush_tlb_others. Linux running on native hardware uses cross-CPU IPIs to flush the TLB on any CPU which may have a particular mm's pagetable entries cached in its TLB. This is inefficient in a paravirtualized environment, since the hypervisor knows which real CPUs actually contain cached mappings, which may be a small subset of a guest's VCPUs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 9 +++++++++ include/asm-i386/tlbflush.h | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 4b3d5085867..f880b06d6d5 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -15,6 +15,7 @@ #ifndef __ASSEMBLY__ #include +#include struct thread_struct; struct Xgt_desc_struct; @@ -165,6 +166,8 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -853,6 +856,12 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(flush_tlb_single, addr); } +static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); +} + static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) { PVOP_VCALL3(map_pt_hook, type, va, pfn); diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index 4dd82840d53..db7f77eacfa 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -79,11 +79,15 @@ * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ +#define TLB_FLUSH_ALL 0xffffffff + + #ifndef CONFIG_SMP #define flush_tlb() __flush_tlb() @@ -110,7 +114,12 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -#else +static inline void native_flush_tlb_others(const cpumask_t *cpumask, + struct mm_struct *mm, unsigned long va) +{ +} + +#else /* SMP */ #include @@ -129,6 +138,9 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st flush_tlb_mm(vma->vm_mm); } +void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, + unsigned long va); + #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 @@ -139,8 +151,11 @@ struct tlb_state char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +#endif /* SMP */ - +#ifndef CONFIG_PARAVIRT +#define flush_tlb_others(mask, mm, va) \ + native_flush_tlb_others(&mask, mm, va) #endif #define flush_tlb_kernel_range(start, end) flush_tlb_all() -- cgit v1.2.3 From a27fe809b82c5e18932fcceded28d0d1481ce7bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: revert map_pt_hook. Back out the map_pt_hook to clear the way for kmap_atomic_pte. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 7 ------- include/asm-i386/pgtable.h | 23 ++++------------------- 2 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f880b06d6d5..10f44af76b4 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -169,8 +169,6 @@ struct paravirt_ops void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); - void (*map_pt_hook)(int type, pte_t *va, u32 pfn); - /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); @@ -862,11 +860,6 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); } -static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) -{ - PVOP_VCALL3(map_pt_hook, type, va, pfn); -} - static inline void paravirt_alloc_pt(unsigned pfn) { PVOP_VCALL1(alloc_pt, pfn); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 5b88a6a1278..6599f2aa91b 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,7 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) -#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) #define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif @@ -476,24 +475,10 @@ extern pte_t *lookup_address(unsigned long address); #endif #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE0);\ - paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) -#define pte_offset_map_nested(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE1);\ - paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) +#define pte_offset_map_nested(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else -- cgit v1.2.3 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/highmem.h | 6 ++++++ include/asm-i386/paravirt.h | 15 +++++++++++++++ include/asm-i386/pgtable.h | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index e9a34ebc25d..13cdcd66fff 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -24,6 +24,7 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -67,11 +68,16 @@ extern void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); void kunmap(struct page *page); +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); struct page *kmap_atomic_to_page(void *ptr); +#ifndef CONFIG_PARAVIRT +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#endif + #define flush_cache_kmaps() do { } while (0) #endif /* __KERNEL__ */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 10f44af76b4..5048b41428f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -16,7 +16,9 @@ #ifndef __ASSEMBLY__ #include #include +#include +struct page; struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -187,6 +189,10 @@ struct paravirt_ops pte_t (*ptep_get_and_clear)(pte_t *ptep); +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); @@ -884,6 +890,15 @@ static inline void paravirt_release_pd(unsigned pfn) PVOP_VCALL1(release_pd, pfn); } +#ifdef CONFIG_HIGHPTE +static inline void *kmap_atomic_pte(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); + return (void *)ret; +} +#endif + static inline void pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 6599f2aa91b..befc697821e 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -476,9 +476,9 @@ extern pte_t *lookup_address(unsigned long address); #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) #define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else -- cgit v1.2.3 From 4e0fa85602a4fa219fc3a9c053d5140bf987d3e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Use enums for paravirt lazy flush modi Remove #defines, add enum for PARAVIRT_LAZY_FLUSH. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 5048b41428f..c5451923c79 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -30,6 +30,7 @@ enum paravirt_lazy_mode { PARAVIRT_LAZY_NONE = 0, PARAVIRT_LAZY_MMU = 1, PARAVIRT_LAZY_CPU = 2, + PARAVIRT_LAZY_FLUSH = 3, }; struct paravirt_ops @@ -1036,12 +1037,6 @@ static inline pte_t raw_ptep_get_and_clear(pte_t *p) } #endif /* CONFIG_X86_PAE */ -/* Lazy mode for batching updates / context switch */ -#define PARAVIRT_LAZY_NONE 0 -#define PARAVIRT_LAZY_MMU 1 -#define PARAVIRT_LAZY_CPU 2 -#define PARAVIRT_LAZY_FLUSH 3 - #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE static inline void arch_enter_lazy_cpu_mode(void) { -- cgit v1.2.3 From 1a45b7aaa5051489b46afbc48509bd91f8b4a1ba Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Clean up paravirt patchable wrappers Replace all the open-coded macros for generating calls with a pair of more general macros (__PVOP_CALL/VCALL), and redefine all the PVOP_V?CALL[0-4] in terms of them. [ Andrew, Andi: this should slot in immediately after "Document asm-i386/paravirt.h" (paravirt_ops-document-asm-i386-paravirth.patch) ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 248 +++++++++++--------------------------------- 1 file changed, 60 insertions(+), 188 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c5451923c79..2ba18963e11 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -332,211 +332,81 @@ unsigned paravirt_patch_insns(void *site, unsigned len, * means that all uses must be wrapped in inline functions. This also * makes sure the incoming and outgoing types are always correct. */ -#define PVOP_CALL0(__rettype, __op) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL0(__op) \ +#define __PVOP_CALL(rettype, op, pre, post, ...) \ ({ \ + rettype __ret; \ unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL1(__rettype, __op, arg1) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL1(__op, arg1) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL2(__op, arg1, arg2) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ + if (sizeof(rettype) > sizeof(unsigned long)) { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile("push %[_arg4]; " \ - paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - [_arg4] "mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc",); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + post \ + : "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)__eax; \ } \ __ret; \ }) -#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ +#define __PVOP_VCALL(op, pre, post, ...) \ ({ \ unsigned long __eax, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ + post \ : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ }) +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op, "", "") +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op, "", "") + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ + "1"((u32)(arg2)), "2"((u32)(arg3))) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ + "2"((u32)(arg3))) + +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + static inline int paravirt_enabled(void) { return paravirt_ops.paravirt_enabled; @@ -1162,6 +1032,8 @@ static inline unsigned long __raw_local_irq_save(void) /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL +#undef __PVOP_CALL +#undef __PVOP_VCALL #undef PVOP_VCALL0 #undef PVOP_CALL0 #undef PVOP_VCALL1 -- cgit v1.2.3 From 4cdd9c8931767e1c56a51a1078d33a8c340f4405 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: drop unused ptep_get_and_clear In shadow mode hypervisors, ptep_get_and_clear achieves the desired purpose of keeping the shadows in sync by issuing a native_get_and_clear, followed by a call to pte_update, which indicates the PTE has been modified. Direct mode hypervisors (Xen) have no need for this anyway, and will trap the update using writable pagetables. This means no hypervisor makes use of ptep_get_and_clear; there is no reason to have it in the paravirt-ops structure. Change confusing terminology about raw vs. native functions into consistent use of native_pte_xxx for operations which do not invoke paravirt-ops. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 13 +------------ include/asm-i386/pgtable.h | 4 +--- 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 2ba18963e11..e2e7f98723c 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -188,8 +188,6 @@ struct paravirt_ops void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - pte_t (*ptep_get_and_clear)(pte_t *ptep); - #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); #endif @@ -859,12 +857,8 @@ static inline void pmd_clear(pmd_t *pmdp) PVOP_VCALL1(pmd_clear, pmdp); } -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); - return (pte_t) { val, val >> 32 }; -} #else /* !CONFIG_X86_PAE */ + static inline pte_t __pte(unsigned long val) { return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; @@ -900,11 +894,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) { PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); } - -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; -} #endif /* CONFIG_X86_PAE */ #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index befc697821e..e7ddd234130 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,8 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - -#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* @@ -335,7 +333,7 @@ do { \ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = raw_ptep_get_and_clear(ptep); + pte_t pte = native_ptep_get_and_clear(ptep); pte_update(mm, addr, ptep); return pte; } -- cgit v1.2.3 From 7a61d35d4b4056e7711031202da7605e052f4137 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: Page-align the GDT Xen wants a dedicated page for the GDT. I believe VMI likes it too. lguest, KVM and native don't care. Simple transformation to page-aligned "struct gdt_page". Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Acked-by: Jeremy Fitzhardinge --- include/asm-i386/desc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 4a974064e92..c547403f341 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -18,10 +18,15 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); +struct gdt_page +{ + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return per_cpu(cpu_gdt, cpu); + return per_cpu(gdt_page, cpu).gdt; } extern struct Xgt_desc_struct idt_descr; -- cgit v1.2.3 From 7c3576d261ce046789a7db14f43303f8120910c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert PDA into the percpu section Currently x86 (similar to x84-64) has a special per-cpu structure called "i386_pda" which can be easily and efficiently referenced via the %fs register. An ELF section is more flexible than a structure, allowing any piece of code to use this area. Indeed, such a section already exists: the per-cpu area. So this patch: (1) Removes the PDA and uses per-cpu variables for each current member. (2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU. (3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which can be used to calculate addresses for this CPU's variables. (4) Simplifies startup, because %fs doesn't need to be loaded with a special segment at early boot; it can be deferred until the first percpu area is allocated (or never for UP). The result is less code and one less x86-specific concept. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- include/asm-i386/current.h | 5 +- include/asm-i386/irq_regs.h | 12 ++-- include/asm-i386/pda.h | 99 -------------------------------- include/asm-i386/percpu.h | 132 ++++++++++++++++++++++++++++++++++++++++--- include/asm-i386/processor.h | 2 +- include/asm-i386/segment.h | 6 +- include/asm-i386/smp.h | 4 +- 7 files changed, 139 insertions(+), 121 deletions(-) delete mode 100644 include/asm-i386/pda.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h index 5252ee0f6d7..d3524853991 100644 --- a/include/asm-i386/current.h +++ b/include/asm-i386/current.h @@ -1,14 +1,15 @@ #ifndef _I386_CURRENT_H #define _I386_CURRENT_H -#include #include +#include struct task_struct; +DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return x86_read_percpu(current_task); } #define current get_current() diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h index a1b3f7f594a..3368b20c0b4 100644 --- a/include/asm-i386/irq_regs.h +++ b/include/asm-i386/irq_regs.h @@ -1,25 +1,27 @@ /* * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the PDA. + * the stack, stored in the per-cpu area. * * Jeremy Fitzhardinge */ #ifndef _ASM_I386_IRQ_REGS_H #define _ASM_I386_IRQ_REGS_H -#include +#include + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return read_pda(irq_regs); + return x86_read_percpu(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) { struct pt_regs *old_regs; - old_regs = read_pda(irq_regs); - write_pda(irq_regs, new_regs); + old_regs = get_irq_regs(); + x86_write_percpu(irq_regs, new_regs); return old_regs; } diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h deleted file mode 100644 index aef7f732f77..00000000000 --- a/include/asm-i386/pda.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - Per-processor Data Areas - Jeremy Fitzhardinge 2006 - Based on asm-x86_64/pda.h by Andi Kleen. - */ -#ifndef _I386_PDA_H -#define _I386_PDA_H - -#include -#include -#include - -struct i386_pda -{ - struct i386_pda *_pda; /* pointer to self */ - - int cpu_number; - struct task_struct *pcurrent; /* current process */ - struct pt_regs *irq_regs; -}; - -DECLARE_PER_CPU(struct i386_pda, _cpu_pda); -#define cpu_pda(i) (&per_cpu(_cpu_pda, (i))) -#define pda_offset(field) offsetof(struct i386_pda, field) - -extern void __bad_pda_field(void); - -/* This variable is never instantiated. It is only used as a stand-in - for the real per-cpu PDA memory, so that gcc can understand what - memory operations the inline asms() below are performing. This - eliminates the need to make the asms volatile or have memory - clobbers, so gcc can readily analyse them. */ -extern struct i386_pda _proxy_pda; - -#define pda_to_op(op,field,val) \ - do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } \ - switch (sizeof(_proxy_pda.field)) { \ - case 1: \ - asm(op "b %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 2: \ - asm(op "w %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: __bad_pda_field(); \ - } \ - } while (0) - -#define pda_from_op(op,field) \ - ({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 1: \ - asm(op "b %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 2: \ - asm(op "w %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: __bad_pda_field(); \ - } \ - ret__; }) - -/* Return a pointer to a pda field */ -#define pda_addr(field) \ - ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \ - pda_offset(field))) - -#define read_pda(field) pda_from_op("mov",field) -#define write_pda(field,val) pda_to_op("mov",field,val) -#define add_pda(field,val) pda_to_op("add",field,val) -#define sub_pda(field,val) pda_to_op("sub",field,val) -#define or_pda(field,val) pda_to_op("or",field,val) - -#endif /* _I386_PDA_H */ diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index a10e7c68ae9..c5f12f0d9c2 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -1,9 +1,30 @@ #ifndef __ARCH_I386_PERCPU__ #define __ARCH_I386_PERCPU__ -#ifndef __ASSEMBLY__ -#include -#else +#ifdef __ASSEMBLY__ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * reg - 32bit register + * + * The resulting address is stored in the "reg" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__this_cpu_off, reg; \ + addl $per_cpu__##var, reg +#else /* ! SMP */ +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg; +#endif /* SMP */ + +#else /* ...!ASSEMBLY */ /* * PER_CPU finds an address of a per-cpu variable. @@ -18,14 +39,107 @@ * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, cpu) \ - movl __per_cpu_offset(,cpu,4), cpu; \ - addl $per_cpu__##var, cpu; -#else /* ! SMP */ -#define PER_CPU(var, cpu) \ - movl $per_cpu__##var, cpu; +/* Same as generic implementation except for optimized local access. */ +#define __GENERIC_PER_CPU + +/* This is used for other cpus to find our section. */ +extern unsigned long __per_cpu_offset[]; + +/* Separate out the type, so (int[3], foo) works. */ +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +/* var is in discarded region: offset to particular copy we want */ +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) + +#define __raw_get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \ +})) + +#define __get_cpu_var(var) __raw_get_cpu_var(var) + +/* A macro to avoid #include hell... */ +#define percpu_modcopy(pcpudst, src, size) \ +do { \ + unsigned int __i; \ + for_each_possible_cpu(__i) \ + memcpy((pcpudst)+__per_cpu_offset[__i], \ + (src), (size)); \ +} while (0) + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + +/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ +#define __percpu_seg "%%fs:" +#else /* !SMP */ +#include +#define __percpu_seg "" #endif /* SMP */ +/* For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ +extern void __bad_percpu_size(void); + +#define percpu_to_op(op,var,val) \ + do { \ + typedef typeof(var) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 2: \ + asm(op "w %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 4: \ + asm(op "l %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + } while (0) + +#define percpu_from_op(op,var) \ + ({ \ + typeof(var) ret__; \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + ret__; }) + +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) +#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val) +#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val) +#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val) +#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 92226047464..ced2da8a0d6 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -377,7 +377,7 @@ struct thread_struct { .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .fs = __KERNEL_PDA, \ + .fs = __KERNEL_PERCPU, \ } /* diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 065f10bfa48..07e70624d87 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -39,7 +39,7 @@ * 25 - APM BIOS support * * 26 - ESPFIX small SS - * 27 - PDA [ per-cpu private data area ] + * 27 - per-cpu [ offset to per-cpu data area ] * 28 - unused * 29 - unused * 30 - unused @@ -74,8 +74,8 @@ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) -#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) -#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) #define GDT_ENTRY_DOUBLEFAULT_TSS 31 diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 2d083cb4ca9..090abc1da32 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -8,7 +8,6 @@ #include #include #include -#include #endif #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) @@ -112,7 +111,8 @@ do { } while (0) * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (read_pda(cpu_number)) +DECLARE_PER_CPU(int, cpu_number); +#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) extern cpumask_t cpu_callout_map; extern cpumask_t cpu_callin_map; -- cgit v1.2.3 From 978c038ec944e4f2c940b0975c6acb433203a9be Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: cleanups to help using per-cpu variables from asm This patch does a few small cleanups: - use PER_CPU_NAME to generate the names of per-cpu variables - use lea to add the per_cpu offset in PER_CPU(), because it doesn't affect condition flags - add PER_CPU_VAR which allows direct access to pre-cpu variables with the %fs: prefix on SMP. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Andi Kleen --- include/asm-i386/percpu.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index c5f12f0d9c2..cdcb63db30a 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -16,12 +16,14 @@ * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__this_cpu_off, reg; \ - addl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg; +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg +#define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -- cgit v1.2.3 From 1956c73bb5bf81ee577ed7d3c64e3cad876ad2a5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Define per_cpu_offset Define per_cpu_offset in asm-i386/percpu.h when SMP defined, like asm-generic/percpu.h does for UP. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Andi Kleen --- include/asm-i386/percpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index cdcb63db30a..f54830b5d5a 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -47,6 +47,8 @@ /* This is used for other cpus to find our section. */ extern unsigned long __per_cpu_offset[]; +#define per_cpu_offset(x) (__per_cpu_offset[x]) + /* Separate out the type, so (int[3], foo) works. */ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU(type, name) \ -- cgit v1.2.3 From c5413fbe894924ddb8aa474a4d4da52e7a6c7e0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Fix UP gdt bugs Fixes two problems with the GDT when compiling for uniprocessor: - There's no percpu segment, so trying to load its selector into %fs fails. Use a null selector instead. - The real gdt needs to be loaded at some point. Do it in cpu_init(). Signed-off-by: Chris Wright Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell --- include/asm-i386/processor.h | 1 + include/asm-i386/segment.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index ced2da8a0d6..70f3515c3db 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -750,6 +750,7 @@ extern void enable_sep_cpu(void); extern int sysenter_setup(void); extern void cpu_set_gdt(int); +extern void switch_to_new_gdt(void); extern void cpu_init(void); extern int force_mwait; diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 07e70624d87..597a47c2515 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -75,7 +75,11 @@ #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) #define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#ifdef CONFIG_SMP #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) +#else +#define __KERNEL_PERCPU 0 +#endif #define GDT_ENTRY_DOUBLEFAULT_TSS 31 -- cgit v1.2.3 From 57decbda6a2a7c400b2a3b3b12e52ccbdc977118 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] x86: update for i386 and x86-64 check_bugs Remove spurious comments, headers and keywords from x86-64 bugs.[ch]. Use identify_boot_cpu() AK: merged with other patch Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/bugs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index df539b39044..d28979ff73b 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -7,6 +7,6 @@ #ifndef _ASM_I386_BUG_H #define _ASM_I386_BUG_H -extern void __init check_bugs(void); +void check_bugs(void); #endif /* _ASM_I386_BUG_H */ -- cgit v1.2.3 From e0bb8643974397a8d36670e06e6a54bb84f3289f Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert VMI timer to use clock events Convert VMI timer to use clock events, making it properly able to use the NO_HZ infrastructure. On UP systems, with no local APIC, we just continue to route these events through the PIT. On systems with a local APIC, or SMP, we provide a single source interrupt chip which creates the local timer IRQ. It actually gets delivered by the APIC hardware, but we don't want to use the same local APIC clocksource processing, so we create our own handler here. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen CC: Dan Hecht CC: Ingo Molnar CC: Thomas Gleixner --- include/asm-i386/vmi_time.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index c3a1fcf66c9..213930b995c 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h @@ -53,22 +53,8 @@ extern unsigned long long vmi_get_sched_cycles(void); extern unsigned long vmi_cpu_khz(void); #ifdef CONFIG_X86_LOCAL_APIC -extern void __init vmi_timer_setup_boot_alarm(void); -extern void __devinit vmi_timer_setup_secondary_alarm(void); -extern void apic_vmi_timer_interrupt(void); -#endif - -#ifdef CONFIG_NO_IDLE_HZ -extern int vmi_stop_hz_timer(void); -extern void vmi_account_time_restart_hz_timer(void); -#else -static inline int vmi_stop_hz_timer(void) -{ - return 0; -} -static inline void vmi_account_time_restart_hz_timer(void) -{ -} +extern void __devinit vmi_time_bsp_init(void); +extern void __devinit vmi_time_ap_init(void); #endif /* -- cgit v1.2.3 From 441d40dca024deb305a5e3d5003e8cd9d364d10f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] x86: PARAVIRT: Jeremy Fitzhardinge The other symbols used to delineate the alt-instructions sections have the form __foo/__foo_end. Rename parainstructions to match. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Rusty Russell Signed-off-by: Andrew Morton --- include/asm-i386/alternative.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 5b59d07e9d2..27746732958 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -124,8 +124,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) {} -#define __start_parainstructions NULL -#define __stop_parainstructions NULL +#define __parainstructions NULL +#define __parainstructions_end NULL #endif #endif /* _I386_ALTERNATIVE_H */ -- cgit v1.2.3 From 2b3b4835c94226681c496de9446d456dcf42ed08 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86: Adds mtrr_save_fixed_ranges() for use in two later patches. In this current implementation which is used in other patches, mtrr_save_fixed_ranges() accepts a dummy void pointer because in the current implementation of one of these patches, this function may be called from smp_call_function_single() which requires that this function takes a void pointer argument. This function calls get_fixed_ranges(), passing mtrr_state.fixed_ranges which is the element of the static struct which stores our current backup of the fixed-range MTRR values which all CPUs shall be using. Because mtrr_save_fixed_ranges calls get_fixed_ranges after kernel initialisation time, __init needs to be removed from the declaration of get_fixed_ranges(). If CONFIG_MTRR is not set, we define mtrr_save_fixed_ranges as an empty statement because there is nothing to do. AK: Moved prototypes for x86-64 around to fix warnings Signed-off-by: Bernhard Kaindl Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/mtrr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/mtrr.h b/include/asm-i386/mtrr.h index 07f063ae26e..02a41b99bd7 100644 --- a/include/asm-i386/mtrr.h +++ b/include/asm-i386/mtrr.h @@ -69,6 +69,7 @@ struct mtrr_gentry /* The following functions are for use by other drivers */ # ifdef CONFIG_MTRR +extern void mtrr_save_fixed_ranges(void *); extern int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment); extern int mtrr_add_page (unsigned long base, unsigned long size, @@ -79,6 +80,7 @@ extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); # else +#define mtrr_save_fixed_ranges(arg) do {} while (0) static __inline__ int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment) { -- cgit v1.2.3 From 2b1f6278d77c1f2f669346fc2bb48012b5e9495a Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86: Save the MTRRs of the BSP before booting an AP Applied fix by Andew Morton: http://lkml.org/lkml/2007/4/8/88 - Fix `make headers_check'. AMD and Intel x86 CPU manuals state that it is the responsibility of system software to initialize and maintain MTRR consistency across all processors in Multi-Processing Environments. Quote from page 188 of the AMD64 System Programming manual (Volume 2): 7.6.5 MTRRs in Multi-Processing Environments "In multi-processing environments, the MTRRs located in all processors must characterize memory in the same way. Generally, this means that identical values are written to the MTRRs used by the processors." (short omission here) "Failure to do so may result in coherency violations or loss of atomicity. Processor implementations do not check the MTRR settings in other processors to ensure consistency. It is the responsibility of system software to initialize and maintain MTRR consistency across all processors." Current Linux MTRR code already implements the above in the case that the BIOS does not properly initialize MTRRs on the secondary processors, but the case where the fixed-range MTRRs of the boot processor are changed after Linux started to boot, before the initialsation of a secondary processor, is not handled yet. In this case, secondary processors are currently initialized by Linux with MTRRs which the boot processor had very early, when mtrr_bp_init() did run, but not with the MTRRs which the boot processor uses at the time when that secondary processors is actually booted, causing differing MTRR contents on the secondary processors. Such situation happens on Acer Ferrari 1000 and 5000 notebooks where the BIOS enables and sets AMD-specific IORR bits in the fixed-range MTRRs of the boot processor when it transitions the system into ACPI mode. The SMI handler of the BIOS does this in SMM, entered while Linux ACPI code runs acpi_enable(). Other occasions where the SMI handler of the BIOS may change bits in the MTRRs could occur as well. To initialize newly booted secodary processors with the fixed-range MTRRs which the boot processor uses at that time, this patch saves the fixed-range MTRRs of the boot processor before new secondary processors are started. When the secondary processors run their Linux initialisation code, their fixed-range MTRRs will be updated with the saved fixed-range MTRRs. If CONFIG_MTRR is not set, we define mtrr_save_state as an empty statement because there is nothing to do. Possible TODOs: *) CPU-hotplugging outside of SMP suspend/resume is not yet tested with this patch. *) If, even in this case, an AP never runs i386/do_boot_cpu or x86_64/cpu_up, then the calls to mtrr_save_state() could be replaced by calls to mtrr_save_fixed_ranges(NULL) and mtrr_save_state() would not be needed. That would need either verification of the CPU-hotplug code or at least a test on a >2 CPU machine. *) The MTRRs of other running processors are not yet checked at this time but it might be interesting to syncronize the MTTRs of all processors before booting. That would be an incremental patch, but of rather low priority since there is no machine known so far which would require this. AK: moved prototypes on x86-64 around to fix warnings Signed-off-by: Bernhard Kaindl Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/mtrr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/mtrr.h b/include/asm-i386/mtrr.h index 02a41b99bd7..7e9c7ccbdcf 100644 --- a/include/asm-i386/mtrr.h +++ b/include/asm-i386/mtrr.h @@ -70,6 +70,7 @@ struct mtrr_gentry /* The following functions are for use by other drivers */ # ifdef CONFIG_MTRR extern void mtrr_save_fixed_ranges(void *); +extern void mtrr_save_state(void); extern int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment); extern int mtrr_add_page (unsigned long base, unsigned long size, @@ -81,6 +82,7 @@ extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); # else #define mtrr_save_fixed_ranges(arg) do {} while (0) +#define mtrr_save_state() do {} while (0) static __inline__ int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment) { -- cgit v1.2.3 From de938c51d5fec4ae03af64b06beb15d4423ec611 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: Enable support for fixed-range IORRs to keep RdMem & WrMem in sync If our copy of the MTRRs of the BSP has RdMem or WrMem set, and we are running on an AMD64/K8 system, the boot CPU must have had MtrrFixDramEn and MtrrFixDramModEn set (otherwise our RDMSR would have copied these bits cleared), so we set them on this CPU as well. This allows us to keep the AMD64/K8 RdMem and WrMem bits in sync across the CPUs of SMP systems in order to fullfill the duty of system software to "initialize and maintain MTRR consistency across all processors." as written in the AMD and Intel manuals. If an WRMSR instruction fails because MtrrFixDramModEn is not set, I expect that also the Intel-style MTRR bits are not updated. AK: minor cleanup, moved MSR defines around Signed-off-by: Bernhard Kaindl Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/msr-index.h b/include/asm-i386/msr-index.h index f1190802283..a02eb299134 100644 --- a/include/asm-i386/msr-index.h +++ b/include/asm-i386/msr-index.h @@ -87,6 +87,11 @@ #define MSR_K7_CLK_CTL 0xc001001b #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_K8_SYSCFG 0xc0010010 + +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + #define MSR_K7_HWCR 0xc0010015 #define MSR_K8_HWCR 0xc0010015 #define MSR_K7_FID_VID_CTL 0xc0010041 -- cgit v1.2.3 From f2b218dd6199983b120a96bc6531c1b81f4090d8 Mon Sep 17 00:00:00 2001 From: Fernando Luis VazquezCao Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: safe_apic_wait_icr_idle - i386 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apic_wait_icr_idle looks like this: static __inline__ void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } The busy loop in this function would not be problematic if the corresponding status bit in the ICR were always updated, but that does not seem to be the case under certain crash scenarios. Kdump uses an IPI to stop the other CPUs in the event of a crash, but when any of the other CPUs are locked-up inside the NMI handler the CPU that sends the IPI will end up looping forever in the ICR check, effectively hard-locking the whole system. Quoting from Intel's "MultiProcessor Specification" (Version 1.4), B-3: "A local APIC unit indicates successful dispatch of an IPI by resetting the Delivery Status bit in the Interrupt Command Register (ICR). The operating system polls the delivery status bit after sending an INIT or STARTUP IPI until the command has been dispatched. A period of 20 microseconds should be sufficient for IPI dispatch to complete under normal operating conditions. If the IPI is not successfully dispatched, the operating system can abort the command. Alternatively, the operating system can retry the IPI by writing the lower 32-bit double word of the ICR. This “time-out” mechanism can be implemented through an external interrupt, if interrupts are enabled on the processor, or through execution of an instruction or time-stamp counter spin loop." Intel's documentation suggests the implementation of a time-out mechanism, which, by the way, is already being open-coded in some parts of the kernel that tinker with ICR. Create a apic_wait_icr_idle replacement that implements the time-out mechanism and that can be used to solve the aforementioned problem. AK: moved both functions out of line AK: added improved loop from Keith Owens Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andi Kleen --- include/asm-i386/apic.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index a19810a08ae..1e8f6f252dd 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -2,6 +2,7 @@ #define __ASM_APIC_H #include +#include #include #include #include @@ -64,12 +65,8 @@ static __inline fastcall unsigned long native_apic_read(unsigned long reg) return *((volatile unsigned long *)(APIC_BASE+reg)); } -static __inline__ void apic_wait_icr_idle(void) -{ - while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) - cpu_relax(); -} - +void apic_wait_icr_idle(void); +unsigned long safe_apic_wait_icr_idle(void); int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC -- cgit v1.2.3 From c2c1accd4b2f9c82fb89d40611c7f581948db255 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte clear optimization When exiting from an address space, no special hypervisor notification of page table updates needs to occur; direct page table hypervisors, such as Xen, switch to another address space first (init_mm) and unprotects the page tables to avoid the cost of trapping to the hypervisor for each pte_clear. Shadow mode hypervisors, such as VMI and lhype don't need to do the extra work of calling through paravirt-ops, and can just directly clear the page table entries without notifiying the hypervisor, since all the page tables are about to be freed. So introduce native_pte_clear functions which bypass any paravirt-ops notification. This results in a significant performance win for VMI and removes some indirect calls from zap_pte_range. Note the 3-level paging already had a native_pte_clear function, thus demanding argument conformance and extra args for the 2-level definition. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 5 +++++ include/asm-i386/pgtable.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 781fe4bcc96..85d9005c0cd 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -36,6 +36,11 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) +{ + *xp = __pte(0); +} + static inline pte_t native_ptep_get_and_clear(pte_t *xp) { return __pte(xchg(&xp->pte_low, 0)); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index e7ddd234130..00e97a9d367 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -344,7 +344,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long pte_t pte; if (full) { pte = *ptep; - pte_clear(mm, addr, ptep); + native_pte_clear(mm, addr, ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.2.3 From 142dd975911fdd82b1b6f6617cd20ac90a8ccf00 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte xchg optimization In situations where page table updates need only be made locally, and there is no cross-processor A/D bit races involved, we need not use the heavyweight xchg instruction to atomically fetch and clear page table entries. Instead, we can just read and clear them directly. This introduces a neat optimization for non-SMP kernels; drop the atomic xchg operations from page table updates. Thanks to Michel Lespinasse for noting this potential optimization. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 14 ++++++++++++++ include/asm-i386/pgtable-3level.h | 14 ++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 85d9005c0cd..3daab67cd36 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -41,10 +41,24 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pt *xp = __pte(0); } +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res = *ptep; + native_pte_clear(NULL, 0, ptep); + return res; +} + +#ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *xp) { return __pte(xchg(&xp->pte_low, 0)); } +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 664bfee5a2f..45b02418150 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,6 +139,17 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res = *ptep; + native_pte_clear(NULL, 0, ptep); + return res; +} + +#ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -150,6 +161,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) return res; } +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) -- cgit v1.2.3 From 9e5e3162b2d5e4466187ecd63c9eec2de33cb7bc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte simplify ops Add comment and condense code to make use of native_local_ptep_get_and_clear function. Also, it turns out the 2-level and 3-level paging definitions were identical, so move the common definition into pgtable.h Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 10 ---------- include/asm-i386/pgtable-3level.h | 10 ---------- include/asm-i386/pgtable.h | 17 +++++++++++++++-- 3 files changed, 15 insertions(+), 22 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 3daab67cd36..a50fd1773de 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -41,16 +41,6 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pt *xp = __pte(0); } -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res; - - res = *ptep; - native_pte_clear(NULL, 0, ptep); - return res; -} - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *xp) { diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 45b02418150..eb0f1d7e96a 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,16 +139,6 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res; - - res = *ptep; - native_pte_clear(NULL, 0, ptep); - return res; -} - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 00e97a9d367..c6b8b944120 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -269,6 +269,16 @@ extern void vmalloc_sync_all(void); #define pte_update_defer(mm, addr, ptep) do { } while (0) #endif +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -343,8 +353,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long { pte_t pte; if (full) { - pte = *ptep; - native_pte_clear(mm, addr, ptep); + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.2.3 From 09198e68501a7e34737cd9264d266f42429abcdc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Clean up NMI watchdog code - Introduce a wd_ops structure - Convert the various nmi watchdogs over to it - This allows to split the perfctr reservation from the watchdog setup cleanly. - Do perfctr reservation globally as it should have always been - Remove dead code referenced only by unused EXPORT_SYMBOLs Signed-off-by: Andi Kleen --- include/asm-i386/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index b04333ea6f3..fb1e133efd9 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -50,4 +50,12 @@ void __trigger_all_cpu_backtrace(void); #endif +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +int lapic_watchdog_ok(void); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); + #endif /* ASM_NMI_H */ -- cgit v1.2.3 From c7f81c9453375d6416658995eafd3397cb9bba1d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Verify important CPUID bits in real mode Check some CPUID bits that are needed for compiler generated early in boot. When the system is still in real mode before changing the VESA BIOS mode it is possible to still display an visible error message on the screen. Similar to x86-64. Includes cleanups from Eric Biederman Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 3 +++ include/asm-i386/required-features.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 include/asm-i386/required-features.h (limited to 'include/asm-i386') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index d1b8e4ab6c1..e66d004aa65 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -7,7 +7,10 @@ #ifndef __ASM_I386_CPUFEATURE_H #define __ASM_I386_CPUFEATURE_H +#ifndef __ASSEMBLY__ #include +#endif +#include #define NCAPINTS 7 /* N 32-bit words worth of info */ diff --git a/include/asm-i386/required-features.h b/include/asm-i386/required-features.h new file mode 100644 index 00000000000..9db866c1e64 --- /dev/null +++ b/include/asm-i386/required-features.h @@ -0,0 +1,34 @@ +#ifndef _ASM_REQUIRED_FEATURES_H +#define _ASM_REQUIRED_FEATURES_H 1 + +/* Define minimum CPUID feature set for kernel These bits are checked + really early to actually display a visible error message before the + kernel dies. Only add word 0 bits here + + Some requirements that are not in CPUID yet are also in the + CONFIG_X86_MINIMUM_CPU mode which is checked too. + + The real information is in arch/i386/Kconfig.cpu, this just converts + the CONFIGs into a bitmask */ + +#ifdef CONFIG_X86_PAE +#define NEED_PAE (1< Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Evaluate constant cpu features at runtime Redefine cpu_has() to evaluate cpu features already checked in early boot at compile time. This way the compiler might eliminate some dead code. Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index e66d004aa65..20e849ae6dd 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -106,8 +106,12 @@ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ -#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) -#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) +#define cpu_has(c, bit) \ + ((__builtin_constant_p(bit) && (bit) < 32 && \ + (1UL << (bit)) & REQUIRED_MASK1) ? \ + 1 : \ + test_bit(bit, (c)->x86_capability)) +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) -- cgit v1.2.3 From e859dc553c857f4672b3bbb73ee9170a901f8712 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Implement alternative_io for i386 Ported from x86-64. Signed-off-by: Andi Kleen --- include/asm-i386/alternative.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 27746732958..0f70b379b02 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -82,6 +82,21 @@ static inline void alternatives_smp_switch(int smp) {} "663:\n\t" newinstr "\n664:\n" /* replacement */\ ".previous" :: "i" (feature), ##input) +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .align 4\n" \ + " .long 661b\n" /* label */ \ + " .long 663f\n" /* new instruction */ \ + " .byte %c[feat]\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + /* * Alternative inline assembly for SMP. * -- cgit v1.2.3 From 3aefbe0746580a710d4392a884ac1e4aac7c728f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 Syncs up with x86-64. Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 1 + include/asm-i386/tsc.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 20e849ae6dd..b8a3a5a85fd 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -79,6 +79,7 @@ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ #define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */ +#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 346976632e1..0181f9df753 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -35,7 +35,6 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t get_cycles_sync(void) { unsigned long long ret; -#ifdef X86_FEATURE_SYNC_RDTSC unsigned eax; /* @@ -44,9 +43,6 @@ static __always_inline cycles_t get_cycles_sync(void) */ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); -#else - sync_core(); -#endif rdtscll(ret); return ret; -- cgit v1.2.3 From 9bccb23dc5fc2d268ab676e2d4212d29e230fd86 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Add X86_FEATURE_RDTSCP Following x86-64 Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index b8a3a5a85fd..f514e906643 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -52,6 +52,7 @@ #define X86_FEATURE_MP (1*32+19) /* MP Capable. */ #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ #define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ -- cgit v1.2.3 From c5bcb5635a03da3158f121ae20ccbbf72b4fc62a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible RDTSCP is already synchronous and doesn't need an explicit CPUID. This is a little faster and more importantly avoids VMEXITs on Hypervisors. Original patch from Joerg Roedel, but reworked by AK Also includes miscompilation fix by Eric Biederman Cc: "Joerg Roedel" Signed-off-by: Andi Kleen --- include/asm-i386/tsc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/asm-i386') diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 0181f9df753..3f3c1fa000b 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -37,6 +37,15 @@ static __always_inline cycles_t get_cycles_sync(void) unsigned long long ret; unsigned eax; + /* + * Use RDTSCP if possible; it is guaranteed to be synchronous + * and doesn't cause a VMEXIT on Hypervisors + */ + alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP, + "=A" (ret), "0" (0ULL) : "ecx", "memory"); + if (ret) + return ret; + /* * Don't do an additional sync on CPUs where we know * RDTSC is already synchronous: -- cgit v1.2.3 From 02b64dab5675bc08048c7f70cbb0d8a417d20dbe Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] i386: white space fixes in i387.h Signed-off-by: Jan Kiszka Signed-off-by: Andi Kleen --- include/asm-i386/i387.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index 434936c732d..49dc8e14131 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -83,8 +83,8 @@ static inline void __save_init_fpu( struct task_struct *tsk ) #define __clear_fpu( tsk ) \ do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) { \ - asm volatile("fnclex ; fwait"); \ + if (task_thread_info(tsk)->status & TS_USEDFPU) { \ + asm volatile("fnclex ; fwait"); \ task_thread_info(tsk)->status &= ~TS_USEDFPU; \ stts(); \ } \ @@ -113,7 +113,7 @@ static inline void save_init_fpu( struct task_struct *tsk ) __clear_fpu( tsk ); \ preempt_enable(); \ } while (0) - \ + /* * FPU state interaction... */ -- cgit v1.2.3 From c41bf8fa5e777b6a8a19cf2484937a7167eac77f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu There are two callers of __unlazy_fpu, unlazy_fpu and __switch_to, and none of them appear to require additional preempt_disable/enable here. Let's open-code save_init_fpu in __unlazy_fpu to save a few ops. Signed-off-by: Jan Kiszka Signed-off-by: Andi Kleen --- include/asm-i386/i387.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/asm-i386') diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index 49dc8e14131..cdd1e248e3b 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -74,11 +74,12 @@ static inline void __save_init_fpu( struct task_struct *tsk ) task_thread_info(tsk)->status &= ~TS_USEDFPU; } -#define __unlazy_fpu( tsk ) do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu( tsk ); \ - else \ - tsk->fpu_counter = 0; \ +#define __unlazy_fpu( tsk ) do { \ + if (task_thread_info(tsk)->status & TS_USEDFPU) { \ + __save_init_fpu(tsk); \ + stts(); \ + } else \ + tsk->fpu_counter = 0; \ } while (0) #define __clear_fpu( tsk ) \ -- cgit v1.2.3