diff options
Diffstat (limited to 'arch/x86')
416 files changed, 42140 insertions, 6131 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49349ba77d8..350bee1d54d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,6 +26,7 @@ config X86 select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) @@ -115,6 +116,9 @@ config GENERIC_TIME_VSYSCALL config ARCH_HAS_CPU_RELAX def_bool y +config ARCH_HAS_DEFAULT_IDLE + def_bool y + config ARCH_HAS_CACHE_LINE_SIZE def_bool y @@ -1242,14 +1246,6 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. -config IRQBALANCE - def_bool y - prompt "Enable kernel irq balancing" - depends on X86_32 && SMP && X86_IO_APIC - help - The default yes will allow the kernel to do irq load balancing. - Saying no will keep the kernel from doing irq load balancing. - config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" @@ -1642,6 +1638,8 @@ source "arch/x86/kernel/cpu/cpufreq/Kconfig" source "drivers/cpuidle/Kconfig" +source "drivers/idle/Kconfig" + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f5631da585b..d1a47adb5ae 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -110,16 +110,16 @@ KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) mcore-y := arch/x86/mach-default/ # Voyager subarch support -mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager +mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ # generic subarchitecture -mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic +mflags-$(CONFIG_X86_GENERICARCH):= -Iarch/x86/include/asm/mach-generic fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ # default subarch .h files -mflags-y += -Iinclude/asm-x86/mach-default +mflags-y += -Iarch/x86/include/asm/mach-default # 64 bit does not support subarch support - clear sub arch variables fcore-$(CONFIG_X86_64) := diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5780d361105..da062216948 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -16,7 +16,7 @@ */ #undef CONFIG_PARAVIRT #ifdef CONFIG_X86_32 -#define ASM_X86__DESC_H 1 +#define _ASM_X86_DESC_H 1 #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 49f26aaaebc..3fa979c9c36 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -17,7 +17,7 @@ #include "boot.h" #include "video.h" -__videocard video_bios; +static __videocard video_bios; /* Set a conventional BIOS mode */ static int set_bios_mode(u8 mode); @@ -119,7 +119,7 @@ static int bios_probe(void) return nmodes; } -__videocard video_bios = +static __videocard video_bios = { .card_name = "BIOS", .probe = bios_probe, diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 99b3079dc6a..75115849af3 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -20,7 +20,7 @@ static struct vesa_general_info vginfo; static struct vesa_mode_info vminfo; -__videocard video_vesa; +static __videocard video_vesa; #ifndef _WAKEUP static void vesa_store_mode_params_graphics(void); @@ -293,7 +293,7 @@ void vesa_store_edid(void) #endif /* not _WAKEUP */ -__videocard video_vesa = +static __videocard video_vesa = { .card_name = "VESA", .probe = vesa_probe, diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 52d0359719d..13b8c86ae98 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -287,7 +287,6 @@ CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set CONFIG_X86_PAT=y CONFIG_EFI=y -# CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild new file mode 100644 index 00000000000..4a8e80cdcfa --- /dev/null +++ b/arch/x86/include/asm/Kbuild @@ -0,0 +1,24 @@ +include include/asm-generic/Kbuild.asm + +header-y += boot.h +header-y += bootparam.h +header-y += debugreg.h +header-y += ldt.h +header-y += msr-index.h +header-y += prctl.h +header-y += ptrace-abi.h +header-y += sigcontext32.h +header-y += ucontext.h +header-y += processor-flags.h + +unifdef-y += e820.h +unifdef-y += ist.h +unifdef-y += mce.h +unifdef-y += msr.h +unifdef-y += mtrr.h +unifdef-y += posix_types_32.h +unifdef-y += posix_types_64.h +unifdef-y += unistd_32.h +unifdef-y += unistd_64.h +unifdef-y += vm86.h +unifdef-y += vsyscall.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h new file mode 100644 index 00000000000..37822206083 --- /dev/null +++ b/arch/x86/include/asm/a.out-core.h @@ -0,0 +1,73 @@ +/* a.out coredump register dumper + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_X86_A_OUT_CORE_H +#define _ASM_X86_A_OUT_CORE_H + +#ifdef __KERNEL__ +#ifdef CONFIG_X86_32 + +#include <linux/user.h> +#include <linux/elfcore.h> + +/* + * fill in the user structure for an a.out core dump + */ +static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) +{ + u16 gs; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1))) + >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) + >> PAGE_SHIFT; + + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; + dump->regs.ds = (u16)regs->ds; + dump->regs.es = (u16)regs->es; + dump->regs.fs = (u16)regs->fs; + savesegment(gs, gs); + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; + dump->regs.cs = (u16)regs->cs; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; + dump->regs.ss = (u16)regs->ss; + + dump->u_fpvalid = dump_fpu(regs, &dump->i387); +} + +#endif /* CONFIG_X86_32 */ +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_A_OUT_CORE_H */ diff --git a/arch/x86/include/asm/a.out.h b/arch/x86/include/asm/a.out.h new file mode 100644 index 00000000000..4684f97a5bb --- /dev/null +++ b/arch/x86/include/asm/a.out.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_A_OUT_H +#define _ASM_X86_A_OUT_H + +struct exec +{ + unsigned int a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#endif /* _ASM_X86_A_OUT_H */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h new file mode 100644 index 00000000000..8d676d8ecde --- /dev/null +++ b/arch/x86/include/asm/acpi.h @@ -0,0 +1,178 @@ +#ifndef _ASM_X86_ACPI_H +#define _ASM_X86_ACPI_H + +/* + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <acpi/pdc_intel.h> + +#include <asm/numa.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mpspec.h> + +#define COMPILER_DEPENDENT_INT64 long long +#define COMPILER_DEPENDENT_UINT64 unsigned long long + +/* + * Calling conventions: + * + * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) + * ACPI_EXTERNAL_XFACE - External ACPI interfaces + * ACPI_INTERNAL_XFACE - Internal ACPI interfaces + * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces + */ +#define ACPI_SYSTEM_XFACE +#define ACPI_EXTERNAL_XFACE +#define ACPI_INTERNAL_XFACE +#define ACPI_INTERNAL_VAR_XFACE + +/* Asm macros */ + +#define ACPI_ASM_MACROS +#define BREAKPOINT3 +#define ACPI_DISABLE_IRQS() local_irq_disable() +#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_FLUSH_CPU_CACHE() wbinvd() + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + : "=a"(q32), "=d"(r32) \ + : "r"(d32), \ + "0"(n_lo), "1"(n_hi)) + + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2 ;" \ + "rcrl $1,%3;" \ + : "=r"(n_hi), "=r"(n_lo) \ + : "0"(n_hi), "1"(n_lo)) + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_ht; +extern int acpi_pci_disabled; +extern int acpi_skip_timer_override; +extern int acpi_use_timer_override; + +extern u8 acpi_sci_flags; +extern int acpi_sci_override_gsi; +void acpi_pic_sci_set_trigger(unsigned int, u16); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_ht = 0; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ +#define FIX_ACPI_PAGES 4 + +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline void acpi_disable_pci(void) +{ + acpi_pci_disabled = 1; + acpi_noirq_set(); +} +extern int acpi_irq_balance_set(char *str); + +/* routines for saving/restoring kernel state */ +extern int acpi_save_state_mem(void); +extern void acpi_restore_state_mem(void); + +extern unsigned long acpi_wakeup_address; + +/* early initialization routine */ +extern void acpi_reserve_bootmem(void); + +/* + * Check if the CPU can handle C2 and deeper + */ +static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) +{ + /* + * Early models (<=5) of AMD Opterons are not supposed to go into + * C2 state. + * + * Steppings 0x0A and later are good + */ + if (boot_cpu_data.x86 == 0x0F && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86_model <= 0x05 && + boot_cpu_data.x86_mask < 0x0A) + return 1; + else if (boot_cpu_has(X86_FEATURE_AMDC1E)) + return 1; + else + return max_cstate; +} + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +#endif /* !CONFIG_ACPI */ + +#define ARCH_HAS_POWER_INIT 1 + +struct bootnode; + +#ifdef CONFIG_ACPI_NUMA +extern int acpi_numa; +extern int acpi_scan_nodes(unsigned long start, unsigned long end); +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) +extern void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes); +#else +static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes) +{ +} +#endif + +#define acpi_unlazy_tlb(x) leave_mm(x) + +#endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h new file mode 100644 index 00000000000..9825cd64c9b --- /dev/null +++ b/arch/x86/include/asm/agp.h @@ -0,0 +1,35 @@ +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H + +#include <asm/pgtable.h> +#include <asm/cacheflush.h> + +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. + */ + +#define map_page_into_agp(page) set_pages_uc(page, 1) +#define unmap_page_from_agp(page) set_pages_wb(page, 1) + +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ +#define flush_agp_cache() wbinvd() + +/* Convert a physical address to an address suitable for the GART. */ +#define phys_to_gart(x) (x) +#define gart_to_phys(x) (x) + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h new file mode 100644 index 00000000000..e2077d343c3 --- /dev/null +++ b/arch/x86/include/asm/alternative-asm.h @@ -0,0 +1,22 @@ +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_X86_32 +# define X86_ALIGN .long +#else +# define X86_ALIGN .quad +#endif + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +1: lock + .section .smp_locks,"a" + .align 4 + X86_ALIGN 1b + .previous + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h new file mode 100644 index 00000000000..f6aa18eadf7 --- /dev/null +++ b/arch/x86/include/asm/alternative.h @@ -0,0 +1,183 @@ +#ifndef _ASM_X86_ALTERNATIVE_H +#define _ASM_X86_ALTERNATIVE_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <asm/asm.h> + +/* + * Alternative inline assembly for SMP. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX \ + ".section .smp_locks,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661f\n" /* address */ \ + ".previous\n" \ + "661:\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define LOCK_PREFIX "" +#endif + +/* This must be included *after* the definition of LOCK_PREFIX */ +#include <asm/cpufeature.h> + +struct alt_instr { + u8 *instr; /* original instruction */ + u8 *replacement; + u8 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction, <= instrlen */ + u8 pad1; +#ifdef CONFIG_X86_64 + u32 pad2; +#endif +}; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +struct module; + +#ifdef CONFIG_SMP +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_smp_switch(int smp); +#else +static inline void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) {} +static inline void alternatives_smp_module_del(struct module *mod) {} +static inline void alternatives_smp_switch(int smp) {} +#endif /* CONFIG_SMP */ + +const unsigned char *const *find_nop_table(void); + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature) : "memory") + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature), ##input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte %c[feat]\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a, b) a, b + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +extern void add_nops(void *insns, unsigned int len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * More care must be taken when modifying code in the SMP case because of + * Intel's errata. + * On the local CPU you need to be protected again NMI or MCE handlers seeing an + * inconsistent instruction while you patch. + * The _early version expects the memory to already be RW. + */ + +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + +#endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h new file mode 100644 index 00000000000..f712344329b --- /dev/null +++ b/arch/x86/include/asm/amd_iommu.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_H +#define _ASM_X86_AMD_IOMMU_H + +#include <linux/irqreturn.h> + +#ifdef CONFIG_AMD_IOMMU +extern int amd_iommu_init(void); +extern int amd_iommu_init_dma_ops(void); +extern void amd_iommu_detect(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +#else +static inline int amd_iommu_init(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } +#endif + +#endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h new file mode 100644 index 00000000000..1a30c0440c6 --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -0,0 +1,404 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_TYPES_H +#define _ASM_X86_AMD_IOMMU_TYPES_H + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +/* + * some size calculation constants + */ +#define DEV_TABLE_ENTRY_SIZE 32 +#define ALIAS_TABLE_ENTRY_SIZE 2 +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* Length of the MMIO region for the AMD IOMMU */ +#define MMIO_REGION_LENGTH 0x4000 + +/* Capability offsets used by the driver */ +#define MMIO_CAP_HDR_OFFSET 0x00 +#define MMIO_RANGE_OFFSET 0x0c +#define MMIO_MISC_OFFSET 0x10 + +/* Masks, shifts and macros to parse the device range capability */ +#define MMIO_RANGE_LD_MASK 0xff000000 +#define MMIO_RANGE_FD_MASK 0x00ff0000 +#define MMIO_RANGE_BUS_MASK 0x0000ff00 +#define MMIO_RANGE_LD_SHIFT 24 +#define MMIO_RANGE_FD_SHIFT 16 +#define MMIO_RANGE_BUS_SHIFT 8 +#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) +#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) +#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) +#define MMIO_MSI_NUM(x) ((x) & 0x1f) + +/* Flag masks for the AMD IOMMU exclusion range */ +#define MMIO_EXCL_ENABLE_MASK 0x01ULL +#define MMIO_EXCL_ALLOW_MASK 0x02ULL + +/* Used offsets into the MMIO space */ +#define MMIO_DEV_TABLE_OFFSET 0x0000 +#define MMIO_CMD_BUF_OFFSET 0x0008 +#define MMIO_EVT_BUF_OFFSET 0x0010 +#define MMIO_CONTROL_OFFSET 0x0018 +#define MMIO_EXCL_BASE_OFFSET 0x0020 +#define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_CMD_HEAD_OFFSET 0x2000 +#define MMIO_CMD_TAIL_OFFSET 0x2008 +#define MMIO_EVT_HEAD_OFFSET 0x2010 +#define MMIO_EVT_TAIL_OFFSET 0x2018 +#define MMIO_STATUS_OFFSET 0x2020 + +/* MMIO status bits */ +#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 + +/* event logging constants */ +#define EVENT_ENTRY_SIZE 0x10 +#define EVENT_TYPE_SHIFT 28 +#define EVENT_TYPE_MASK 0xf +#define EVENT_TYPE_ILL_DEV 0x1 +#define EVENT_TYPE_IO_FAULT 0x2 +#define EVENT_TYPE_DEV_TAB_ERR 0x3 +#define EVENT_TYPE_PAGE_TAB_ERR 0x4 +#define EVENT_TYPE_ILL_CMD 0x5 +#define EVENT_TYPE_CMD_HARD_ERR 0x6 +#define EVENT_TYPE_IOTLB_INV_TO 0x7 +#define EVENT_TYPE_INV_DEV_REQ 0x8 +#define EVENT_DEVID_MASK 0xffff +#define EVENT_DEVID_SHIFT 0 +#define EVENT_DOMID_MASK 0xffff +#define EVENT_DOMID_SHIFT 0 +#define EVENT_FLAGS_MASK 0xfff +#define EVENT_FLAGS_SHIFT 0x10 + +/* feature control bits */ +#define CONTROL_IOMMU_EN 0x00ULL +#define CONTROL_HT_TUN_EN 0x01ULL +#define CONTROL_EVT_LOG_EN 0x02ULL +#define CONTROL_EVT_INT_EN 0x03ULL +#define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_PASSPW_EN 0x08ULL +#define CONTROL_RESPASSPW_EN 0x09ULL +#define CONTROL_COHERENT_EN 0x0aULL +#define CONTROL_ISOC_EN 0x0bULL +#define CONTROL_CMDBUF_EN 0x0cULL +#define CONTROL_PPFLOG_EN 0x0dULL +#define CONTROL_PPFINT_EN 0x0eULL + +/* command specific defines */ +#define CMD_COMPL_WAIT 0x01 +#define CMD_INV_DEV_ENTRY 0x02 +#define CMD_INV_IOMMU_PAGES 0x03 + +#define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_COMPL_WAIT_INT_MASK 0x02 +#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 + +#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL + +/* macros and definitions for device table entries */ +#define DEV_ENTRY_VALID 0x00 +#define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_IR 0x3d +#define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_NO_PAGE_FAULT 0x62 +#define DEV_ENTRY_EX 0x67 +#define DEV_ENTRY_SYSMGT1 0x68 +#define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_INIT_PASS 0xb8 +#define DEV_ENTRY_EINT_PASS 0xb9 +#define DEV_ENTRY_NMI_PASS 0xba +#define DEV_ENTRY_LINT0_PASS 0xbe +#define DEV_ENTRY_LINT1_PASS 0xbf +#define DEV_ENTRY_MODE_MASK 0x07 +#define DEV_ENTRY_MODE_SHIFT 0x09 + +/* constants to configure the command buffer */ +#define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_ENTRIES 512 +#define MMIO_CMD_SIZE_SHIFT 56 +#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) + +/* constants for event buffer handling */ +#define EVT_BUFFER_SIZE 8192 /* 512 entries */ +#define EVT_LEN_MASK (0x9ULL << 56) + +#define PAGE_MODE_1_LEVEL 0x01 +#define PAGE_MODE_2_LEVEL 0x02 +#define PAGE_MODE_3_LEVEL 0x03 + +#define IOMMU_PDE_NL_0 0x000ULL +#define IOMMU_PDE_NL_1 0x200ULL +#define IOMMU_PDE_NL_2 0x400ULL +#define IOMMU_PDE_NL_3 0x600ULL + +#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) +#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) +#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) + +#define IOMMU_MAP_SIZE_L1 (1ULL << 21) +#define IOMMU_MAP_SIZE_L2 (1ULL << 30) +#define IOMMU_MAP_SIZE_L3 (1ULL << 39) + +#define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_TV (1ULL << 1) +#define IOMMU_PTE_U (1ULL << 59) +#define IOMMU_PTE_FC (1ULL << 60) +#define IOMMU_PTE_IR (1ULL << 61) +#define IOMMU_PTE_IW (1ULL << 62) + +#define IOMMU_L1_PDE(address) \ + ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define IOMMU_L2_PDE(address) \ + ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) +#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) + +#define IOMMU_PROT_MASK 0x03 +#define IOMMU_PROT_IR 0x01 +#define IOMMU_PROT_IW 0x02 + +/* IOMMU capabilities */ +#define IOMMU_CAP_IOTLB 24 +#define IOMMU_CAP_NPCACHE 26 + +#define MAX_DOMAIN_ID 65536 + +/* FIXME: move this macro to <linux/pci.h> */ +#define PCI_BUS(x) (((x) >> 8) & 0xff) + +/* + * This structure contains generic data for IOMMU protection domains + * independent of their use. + */ +struct protection_domain { + spinlock_t lock; /* mostly used to lock the page table*/ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + void *priv; /* private data */ +}; + +/* + * Data container for a dma_ops specific protection domain + */ +struct dma_ops_domain { + struct list_head list; + + /* generic protection domain information */ + struct protection_domain domain; + + /* size of the aperture for the mappings */ + unsigned long aperture_size; + + /* address we start to search for free addresses */ + unsigned long next_bit; + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 **pte_pages; + + /* This will be set to true when TLB needs to be flushed */ + bool need_flush; + + /* + * if this is a preallocated domain, keep the device for which it was + * preallocated in this variable + */ + u16 target_dev; +}; + +/* + * Structure where we save information about one hardware AMD IOMMU in the + * system. + */ +struct amd_iommu { + struct list_head list; + + /* locks the accesses to the hardware */ + spinlock_t lock; + + /* Pointer to PCI device of this IOMMU */ + struct pci_dev *dev; + + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + + /* physical address of MMIO space */ + u64 mmio_phys; + /* virtual address of MMIO space */ + u8 *mmio_base; + + /* capabilities of that IOMMU read from ACPI */ + u32 cap; + + /* pci domain of this IOMMU */ + u16 pci_seg; + + /* first device this IOMMU handles. read from PCI */ + u16 first_device; + /* last device this IOMMU handles. read from PCI */ + u16 last_device; + + /* start of exclusion range of that IOMMU */ + u64 exclusion_start; + /* length of exclusion range of that IOMMU */ + u64 exclusion_length; + + /* command buffer virtual address */ + u8 *cmd_buf; + /* size of command buffer */ + u32 cmd_buf_size; + + /* event buffer virtual address */ + u8 *evt_buf; + /* size of event buffer */ + u32 evt_buf_size; + /* MSI number for event interrupt */ + u16 evt_msi_num; + + /* if one, we need to send a completion wait command */ + int need_sync; + + /* true if interrupts for this IOMMU are already enabled */ + bool int_enabled; + + /* default dma_ops domain for that IOMMU */ + struct dma_ops_domain *default_dom; +}; + +/* + * List with all IOMMUs in the system. This list is not locked because it is + * only written and read at driver initialization or suspend time + */ +extern struct list_head amd_iommu_list; + +/* + * Structure defining one entry in the device table + */ +struct dev_table_entry { + u32 data[8]; +}; + +/* + * One entry for unity mappings parsed out of the ACPI table. + */ +struct unity_map_entry { + struct list_head list; + + /* starting device id this entry is used for (including) */ + u16 devid_start; + /* end device id this entry is used for (including) */ + u16 devid_end; + + /* start address to unity map (including) */ + u64 address_start; + /* end address to unity map (including) */ + u64 address_end; + + /* required protection */ + int prot; +}; + +/* + * List of all unity mappings. It is not locked because as runtime it is only + * read. It is created at ACPI table parsing time. + */ +extern struct list_head amd_iommu_unity_map; + +/* + * Data structures for device handling + */ + +/* + * Device table used by hardware. Read and write accesses by software are + * locked with the amd_iommu_pd_table lock. + */ +extern struct dev_table_entry *amd_iommu_dev_table; + +/* + * Alias table to find requestor ids to device ids. Not locked because only + * read on runtime. + */ +extern u16 *amd_iommu_alias_table; + +/* + * Reverse lookup table to find the IOMMU which translates a specific device. + */ +extern struct amd_iommu **amd_iommu_rlookup_table; + +/* size of the dma_ops aperture as power of 2 */ +extern unsigned amd_iommu_aperture_order; + +/* largest PCI device id we expect translation requests for */ +extern u16 amd_iommu_last_bdf; + +/* data structures for protection domain handling */ +extern struct protection_domain **amd_iommu_pd_table; + +/* allocation bitmap for domain ids */ +extern unsigned long *amd_iommu_pd_alloc_bitmap; + +/* will be 1 if device isolation is enabled */ +extern int amd_iommu_isolate; + +/* + * If true, the addresses will be flushed on unmap time, not when + * they are reused + */ +extern bool amd_iommu_unmap_flush; + +/* takes a PCI device id and prints it out in a readable form */ +static inline void print_devid(u16 devid, int nl) +{ + int bus = devid >> 8; + int dev = devid >> 3 & 0x1f; + int fn = devid & 0x07; + + printk("%02x:%02x.%x", bus, dev, fn); + if (nl) + printk("\n"); +} + +/* takes bus and device/function and returns the device id + * FIXME: should that be in generic PCI code? */ +static inline u16 calc_devid(u8 bus, u8 devfn) +{ + return (((u16)bus) << 8) | devfn; +} + +#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h new file mode 100644 index 00000000000..3b1510b4fc5 --- /dev/null +++ b/arch/x86/include/asm/apic.h @@ -0,0 +1,199 @@ +#ifndef _ASM_X86_APIC_H +#define _ASM_X86_APIC_H + +#include <linux/pm.h> +#include <linux/delay.h> + +#include <asm/alternative.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/cpufeature.h> +#include <asm/msr.h> + +#define ARCH_APICTIMER_STOPS_ON_C3 1 + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +extern void generic_apic_probe(void); + +#ifdef CONFIG_X86_LOCAL_APIC + +extern unsigned int apic_verbosity; +extern int local_apic_timer_c2_ok; + +extern int disable_apic; +/* + * Basic functions accessing APICs. + */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define setup_boot_clock setup_boot_APIC_clock +#define setup_secondary_clock setup_secondary_APIC_clock +#endif + +extern int is_vsmp_box(void); +extern void xapic_wait_icr_idle(void); +extern u32 safe_xapic_wait_icr_idle(void); +extern u64 xapic_icr_read(void); +extern void xapic_icr_write(u32, u32); +extern int setup_profiling_timer(unsigned int); + +static inline void native_apic_mem_write(u32 reg, u32 v) +{ + volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); + + alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, + ASM_OUTPUT2("=r" (v), "=m" (*addr)), + ASM_OUTPUT2("0" (v), "m" (*addr))); +} + +static inline u32 native_apic_mem_read(u32 reg) +{ + return *((volatile u32 *)(APIC_BASE + reg)); +} + +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u32 low, high; + + if (reg == APIC_DFR) + return -1; + + rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); + return low; +} + +#ifndef CONFIG_X86_32 +extern int x2apic, x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void enable_IR_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); +static inline int x2apic_enabled(void) +{ + int msr, msr2; + + if (!cpu_has_x2apic) + return 0; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (msr & X2APIC_ENABLE) + return 1; + return 0; +} +#else +#define x2apic_enabled() 0 +#endif + +struct apic_ops { + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); +}; + +extern struct apic_ops *apic_ops; + +#define apic_read (apic_ops->read) +#define apic_write (apic_ops->write) +#define apic_icr_read (apic_ops->icr_read) +#define apic_icr_write (apic_ops->icr_write) +#define apic_wait_icr_idle (apic_ops->wait_icr_idle) +#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) + +extern int get_physical_broadcast(void); + +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern int verify_local_APIC(void); +extern void cache_APIC_registers(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void end_local_APIC_setup(void); +extern void init_apic_mappings(void); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); +extern void enable_NMI_through_LVT0(void); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern void early_init_lapic_mapping(void); +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); +extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); + + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } + +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h new file mode 100644 index 00000000000..63134e31e8b --- /dev/null +++ b/arch/x86/include/asm/apicdef.h @@ -0,0 +1,417 @@ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox <Alan.Cox@linux.org>, 1995. + * Ingo Molnar <mingo@redhat.com>, 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +#define APIC_ID 0x20 + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x) & 0xFFu) +#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu) +#ifdef CONFIG_X86_32 +# define APIC_INTEGRATED(x) ((x) & 0xF0u) +#else +# define APIC_INTEGRATED(x) (1) +#endif +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu << 24) +#define GET_APIC_LOGICAL_ID(x) (((x) >> 24) & 0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x) << 24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) +#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) +#define SET_APIC_TIMER_BASE(x) (((x) << 18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_LVT_LEVEL_TRIGGER (1 << 15) +#define APIC_LVT_REMOTE_IRR (1 << 14) +#define APIC_INPUT_POLARITY (1 << 13) +#define APIC_SEND_PENDING (1 << 12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x) >> 8) & 0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x) & ~0x700) | ((y) << 8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 +#define APIC_TDR_DIV_TMBASE (1 << 2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EILVT0 0x500 +#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1 << 16) +#define APIC_EILVT1 0x510 +#define APIC_EILVT2 0x520 +#define APIC_EILVT3 0x530 + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) + +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +/* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { /* LVT - Thermal Sensor */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_thermal; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#ifdef CONFIG_X86_32 + #define BAD_APICID 0xFFu +#else + #define BAD_APICID 0xFFFFu +#endif +#endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h new file mode 100644 index 00000000000..cbd4957838a --- /dev/null +++ b/arch/x86/include/asm/arch_hooks.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_ARCH_HOOKS_H +#define _ASM_X86_ARCH_HOOKS_H + +#include <linux/interrupt.h> + +/* + * linux/include/asm/arch_hooks.h + * + * define the architecture specific hooks + */ + +/* these aren't arch hooks, they are generic routines + * that can be used by the hooks */ +extern void init_ISA_irqs(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id); + +/* these are the defined hooks */ +extern void intr_init_hook(void); +extern void pre_intr_init_hook(void); +extern void pre_setup_arch_hook(void); +extern void trap_init_hook(void); +extern void pre_time_init_hook(void); +extern void time_init_hook(void); +extern void mca_nmi_hook(void); + +#endif /* _ASM_X86_ARCH_HOOKS_H */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h new file mode 100644 index 00000000000..56be78f582f --- /dev/null +++ b/arch/x86/include/asm/asm.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x) x +# define __ASM_EX_SEC .section __ex_table +#else +# define __ASM_FORM(x) " " #x " " +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" +#endif + +#ifdef CONFIG_X86_32 +# define __ASM_SEL(a,b) __ASM_FORM(a) +#else +# define __ASM_SEL(a,b) __ASM_FORM(b) +#endif + +#define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q) +#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) + +#define _ASM_MOV __ASM_SIZE(mov) +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) + +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) + +/* Exception table entry */ +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC \ + _ASM_ALIGN "\n" \ + _ASM_PTR #from "," #to "\n" \ + " .previous\n" + +#endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h new file mode 100644 index 00000000000..4e1b8873c47 --- /dev/null +++ b/arch/x86/include/asm/atomic.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "atomic_32.h" +#else +# include "atomic_64.h" +#endif diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h new file mode 100644 index 00000000000..ad5b9f6ecdd --- /dev/null +++ b/arch/x86/include/asm/atomic_32.h @@ -0,0 +1,259 @@ +#ifndef _ASM_X86_ATOMIC_32_H +#define _ASM_X86_ATOMIC_32_H + +#include <linux/compiler.h> +#include <asm/processor.h> +#include <asm/cmpxchg.h> + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { + int counter; +} atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) (((v)->counter) = (i)) + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_add_return - add integer and return + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + int __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(LOCK_PREFIX "xaddl %0, %1" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = atomic_read(v); + atomic_set(v, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "andl %0,%1" \ + : : "r" (~(mask)), "m" (*(addr)) : "memory") + +#define atomic_set_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "orl %0,%1" \ + : : "r" (mask), "m" (*(addr)) : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include <asm-generic/atomic.h> +#endif /* _ASM_X86_ATOMIC_32_H */ diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h new file mode 100644 index 00000000000..279d2a731f3 --- /dev/null +++ b/arch/x86/include/asm/atomic_64.h @@ -0,0 +1,473 @@ +#ifndef _ASM_X86_ATOMIC_64_H +#define _ASM_X86_ATOMIC_64_H + +#include <asm/alternative.h> +#include <asm/cmpxchg.h> + +/* atomic_t should be 32 bit signed type */ + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { + int counter; +} atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) (((v)->counter) = (i)) + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decl %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_add_return - add and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + int __i = i; + asm volatile(LOCK_PREFIX "xaddl %0, %1" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) + +/* An 64bit atomic type */ + +typedef struct { + long counter; +} atomic64_t; + +#define ATOMIC64_INIT(i) { (i) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +#define atomic64_read(v) ((v)->counter) + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic64_set(v, i) (((v)->counter) = (i)) + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "addq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic64_sub(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "incq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + long __i = i; + asm volatile(LOCK_PREFIX "xaddq %0, %1;" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + return atomic64_add_return(-i, v); +} + +#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) + +#define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +/** + * atomic_inc_short - increment of a short integer + * @v: pointer to type int + * + * Atomically adds 1 to @v + * Returns the new value of @u + */ +static inline short int atomic_inc_short(short int *v) +{ + asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); + return *v; +} + +/** + * atomic_or_long - OR of two long integers + * @v1: pointer to type unsigned long + * @v2: pointer to type unsigned long + * + * Atomically ORs @v1 and @v2 + * Returns the result of the OR + */ +static inline void atomic_or_long(unsigned long *v1, unsigned long v2) +{ + asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "andl %0,%1" \ + : : "r" (~(mask)), "m" (*(addr)) : "memory") + +#define atomic_set_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "orl %0,%1" \ + : : "r" ((unsigned)(mask)), "m" (*(addr)) \ + : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include <asm-generic/atomic.h> +#endif /* _ASM_X86_ATOMIC_64_H */ diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h new file mode 100644 index 00000000000..1316b4c3542 --- /dev/null +++ b/arch/x86/include/asm/auxvec.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_AUXVEC_H +#define _ASM_X86_AUXVEC_H +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#ifdef __i386__ +#define AT_SYSINFO 32 +#endif +#define AT_SYSINFO_EHDR 33 + +#endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h new file mode 100644 index 00000000000..1d9543b9d35 --- /dev/null +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -0,0 +1,139 @@ +#ifndef __ASM_MACH_APIC_H +#define __ASM_MACH_APIC_H + +#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) +#define esr_disable (1) + +static inline int apic_id_registered(void) +{ + return (1); +} + +static inline cpumask_t target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_map; +#else + return cpumask_of_cpu(0); +#endif +} + +#undef APIC_DEST_LOGICAL +#define APIC_DEST_LOGICAL 0 +#define APIC_DFR_VALUE (APIC_DFR_FLAT) +#define INT_DELIVERY_MODE (dest_Fixed) +#define INT_DEST_MODE (0) /* phys delivery to target proc */ +#define NO_BALANCE_IRQ (0) +#define WAKE_SECONDARY_VIA_INIT + + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return (0); +} + +static inline unsigned long check_apicid_present(int bit) +{ + return (1); +} + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + id = xapic_phys_to_log_apicid(cpu); + val |= SET_APIC_LOGICAL_ID(id); + return val; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Physflat", nr_ioapics); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return (0); +} + +static inline int apicid_to_node(int logical_apicid) +{ + return apicid_2_node[hard_smp_processor_id()]; +} + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + + return BAD_APICID; +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ + if (cpu >= NR_CPUS) + return BAD_APICID; + return cpu_physical_id(cpu); +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xFFL); +} + +static inline void setup_portio_remap(void) +{ +} + +static inline void enable_apic_mode(void) +{ +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return (1); +} + +/* As we are using single CPU as destination, pick only one CPU here */ +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + int apicid; + + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + return apicid; +} + +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_MACH_APIC_H */ diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h new file mode 100644 index 00000000000..392c3f5ef2f --- /dev/null +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_MACH_APICDEF_H +#define __ASM_MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h new file mode 100644 index 00000000000..9404c535b7e --- /dev/null +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_MACH_IPI_H +#define __ASM_MACH_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h new file mode 100644 index 00000000000..3c7521063d3 --- /dev/null +++ b/arch/x86/include/asm/bios_ebda.h @@ -0,0 +1,36 @@ +#ifndef _ASM_X86_BIOS_EBDA_H +#define _ASM_X86_BIOS_EBDA_H + +#include <asm/io.h> + +/* + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ +static inline unsigned int get_bios_ebda(void) +{ + unsigned int address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + return address; /* 0 means none */ +} + +void reserve_ebda_region(void); + +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +/* + * This is obviously not a great place for this, but we want to be + * able to scatter it around anywhere in the kernel. + */ +void check_for_bios_corruption(void); +void start_periodic_check_for_corruption(void); +#else +static inline void check_for_bios_corruption(void) +{ +} + +static inline void start_periodic_check_for_corruption(void) +{ +} +#endif + +#endif /* _ASM_X86_BIOS_EBDA_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h new file mode 100644 index 00000000000..36001032271 --- /dev/null +++ b/arch/x86/include/asm/bitops.h @@ -0,0 +1,451 @@ +#ifndef _ASM_X86_BITOPS_H +#define _ASM_X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <linux/compiler.h> +#include <asm/alternative.h> + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) +/* Technically wrong, but this avoids compilation errors on some gcc + versions. */ +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) +#else +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +#endif + +#define ADDR BITOP_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writing portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "orb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX "bts %1,%0" + : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + } +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "andb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btr %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } +} + +/* + * clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and implies release semantics before the memory + * operation. It can be used for an unlock. + */ +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + clear_bit(nr, addr); +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); +} + +/* + * __clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * __clear_bit() is non-atomic and implies release semantics before the memory + * operation. It can be used for an unlock if no other CPUs can concurrently + * modify other bits in the word. + * + * No memory barrier is required here, because x86 cannot reorder stores past + * older loads. Same principle as spin_unlock. + */ +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + __clear_bit(nr, addr); +} + +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "bts %2,%1\n\t" + "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This is the same as test_and_set_bit on x86. + */ +static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +{ + return test_and_set_bit(nr, addr); +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm("bts %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; +} + +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) +{ + int oldbit; + + asm volatile("bt %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr)); + + return oldbit; +} + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile unsigned long *addr); +#endif + +#define test_bit(nr, addr) \ + (__builtin_constant_p((nr)) \ + ? constant_test_bit((nr), (addr)) \ + : variable_test_bit((nr), (addr))) + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +/** + * ffz - find first zero bit in word + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "r" (~word)); + return word; +} + +/* + * __fls: find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +#ifdef __KERNEL__ +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +static inline int ffs(int x) +{ + int r; +#ifdef CONFIG_X86_CMOV + asm("bsfl %1,%0\n\t" + "cmovzl %2,%0" + : "=r" (r) : "rm" (x), "r" (-1)); +#else + asm("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls - find last set bit in word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffs, but returns the position of the most significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 32. + */ +static inline int fls(int x) +{ + int r; +#ifdef CONFIG_X86_CMOV + asm("bsrl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "rm" (-1)); +#else + asm("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} +#endif /* __KERNEL__ */ + +#undef ADDR + +#ifdef __KERNEL__ + +#include <asm-generic/bitops/sched.h> + +#define ARCH_HAS_FAST_MULTIPLIER 1 + +#include <asm-generic/bitops/hweight.h> + +#endif /* __KERNEL__ */ + +#include <asm-generic/bitops/fls64.h> + +#ifdef __KERNEL__ + +#include <asm-generic/bitops/ext2-non-atomic.h> + +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_bit((nr), (unsigned long *)(addr)) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_bit((nr), (unsigned long *)(addr)) + +#include <asm-generic/bitops/minix.h> + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_BITOPS_H */ diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h new file mode 100644 index 00000000000..dd61616cb73 --- /dev/null +++ b/arch/x86/include/asm/boot.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_BOOT_H +#define _ASM_X86_BOOT_H + +/* Don't touch these, unless you really know what you're doing. */ +#define DEF_SYSSEG 0x1000 +#define DEF_SYSSIZE 0x7F00 + +/* Internal svga startup constants */ +#define NORMAL_VGA 0xffff /* 80x25 mode */ +#define EXTENDED_VGA 0xfffe /* 80x50 mode */ +#define ASK_VGA 0xfffd /* ask for it at bootup */ + +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +#ifdef CONFIG_X86_64 +#define BOOT_HEAP_SIZE 0x7000 +#define BOOT_STACK_SIZE 0x4000 +#else +#define BOOT_HEAP_SIZE 0x4000 +#define BOOT_STACK_SIZE 0x1000 +#endif + +#endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h new file mode 100644 index 00000000000..433adaebf9b --- /dev/null +++ b/arch/x86/include/asm/bootparam.h @@ -0,0 +1,111 @@ +#ifndef _ASM_X86_BOOTPARAM_H +#define _ASM_X86_BOOTPARAM_H + +#include <linux/types.h> +#include <linux/screen_info.h> +#include <linux/apm_bios.h> +#include <linux/edd.h> +#include <asm/e820.h> +#include <asm/ist.h> +#include <video/edid.h> + +/* setup data types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 + +/* extensible setup data list node */ +struct setup_data { + __u64 next; + __u32 type; + __u32 len; + __u8 data[0]; +}; + +struct setup_header { + __u8 setup_sects; + __u16 root_flags; + __u32 syssize; + __u16 ram_size; +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + __u16 vid_mode; + __u16 root_dev; + __u16 boot_flag; + __u16 jump; + __u32 header; + __u16 version; + __u32 realmode_swtch; + __u16 start_sys; + __u16 kernel_version; + __u8 type_of_loader; + __u8 loadflags; +#define LOADED_HIGH (1<<0) +#define QUIET_FLAG (1<<5) +#define KEEP_SEGMENTS (1<<6) +#define CAN_USE_HEAP (1<<7) + __u16 setup_move_size; + __u32 code32_start; + __u32 ramdisk_image; + __u32 ramdisk_size; + __u32 bootsect_kludge; + __u16 heap_end_ptr; + __u16 _pad1; + __u32 cmd_line_ptr; + __u32 initrd_addr_max; + __u32 kernel_alignment; + __u8 relocatable_kernel; + __u8 _pad2[3]; + __u32 cmdline_size; + __u32 hardware_subarch; + __u64 hardware_subarch_data; + __u32 payload_offset; + __u32 payload_length; + __u64 setup_data; +} __attribute__((packed)); + +struct sys_desc_table { + __u16 length; + __u8 table[14]; +}; + +struct efi_info { + __u32 efi_loader_signature; + __u32 efi_systab; + __u32 efi_memdesc_size; + __u32 efi_memdesc_version; + __u32 efi_memmap; + __u32 efi_memmap_size; + __u32 efi_systab_hi; + __u32 efi_memmap_hi; +}; + +/* The so-called "zeropage" */ +struct boot_params { + struct screen_info screen_info; /* 0x000 */ + struct apm_bios_info apm_bios_info; /* 0x040 */ + __u8 _pad2[12]; /* 0x054 */ + struct ist_info ist_info; /* 0x060 */ + __u8 _pad3[16]; /* 0x070 */ + __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ + __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ + struct sys_desc_table sys_desc_table; /* 0x0a0 */ + __u8 _pad4[144]; /* 0x0b0 */ + struct edid_info edid_info; /* 0x140 */ + struct efi_info efi_info; /* 0x1c0 */ + __u32 alt_mem_k; /* 0x1e0 */ + __u32 scratch; /* Scratch field! */ /* 0x1e4 */ + __u8 e820_entries; /* 0x1e8 */ + __u8 eddbuf_entries; /* 0x1e9 */ + __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ + __u8 _pad6[6]; /* 0x1eb */ + struct setup_header hdr; /* setup header */ /* 0x1f1 */ + __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; + __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ + struct e820entry e820_map[E820MAX]; /* 0x2d0 */ + __u8 _pad8[48]; /* 0xcd0 */ + struct edd_info eddbuf[EDDMAXNR]; /* 0xd00 */ + __u8 _pad9[276]; /* 0xeec */ +} __attribute__((packed)); + +#endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h new file mode 100644 index 00000000000..3def2065fce --- /dev/null +++ b/arch/x86/include/asm/bug.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_BUG_H +#define _ASM_X86_BUG_H + +#ifdef CONFIG_BUG +#define HAVE_ARCH_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#ifdef CONFIG_X86_32 +# define __BUG_C0 "2:\t.long 1b, %c0\n" +#else +# define __BUG_C0 "2:\t.quad 1b, %c0\n" +#endif + +#define BUG() \ +do { \ + asm volatile("1:\tud2\n" \ + ".pushsection __bug_table,\"a\"\n" \ + __BUG_C0 \ + "\t.word %c1, 0\n" \ + "\t.org 2b+%c2\n" \ + ".popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (sizeof(struct bug_entry))); \ + for (;;) ; \ +} while (0) + +#else +#define BUG() \ +do { \ + asm volatile("ud2"); \ + for (;;) ; \ +} while (0) +#endif + +#endif /* !CONFIG_BUG */ + +#include <asm-generic/bug.h> +#endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h new file mode 100644 index 00000000000..08abf639075 --- /dev/null +++ b/arch/x86/include/asm/bugs.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_BUGS_H +#define _ASM_X86_BUGS_H + +extern void check_bugs(void); + +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) +int ppro_with_ram_bug(void); +#else +static inline int ppro_with_ram_bug(void) { return 0; } +#endif + +#endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h new file mode 100644 index 00000000000..e02ae2d89ac --- /dev/null +++ b/arch/x86/include/asm/byteorder.h @@ -0,0 +1,81 @@ +#ifndef _ASM_X86_BYTEORDER_H +#define _ASM_X86_BYTEORDER_H + +#include <asm/types.h> +#include <linux/compiler.h> + +#ifdef __GNUC__ + +#ifdef __i386__ + +static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) +{ +#ifdef CONFIG_X86_BSWAP + asm("bswap %0" : "=r" (x) : "0" (x)); +#else + asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + : "=q" (x) + : "0" (x)); +#endif + return x; +} + +static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) +{ + union { + struct { + __u32 a; + __u32 b; + } s; + __u64 u; + } v; + v.u = val; +#ifdef CONFIG_X86_BSWAP + asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +#else + v.s.a = ___arch__swab32(v.s.a); + v.s.b = ___arch__swab32(v.s.b); + asm("xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +#endif + return v.u; +} + +#else /* __i386__ */ + +static inline __attribute_const__ __u64 ___arch__swab64(__u64 x) +{ + asm("bswapq %0" + : "=r" (x) + : "0" (x)); + return x; +} + +static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) +{ + asm("bswapl %0" + : "=r" (x) + : "0" (x)); + return x; +} + +#endif + +/* Do not define swab16. Gcc is smart enough to recognize "C" version and + convert it into rotation or exhange. */ + +#define __arch__swab64(x) ___arch__swab64(x) +#define __arch__swab32(x) ___arch__swab32(x) + +#define __BYTEORDER_HAS_U64__ + +#endif /* __GNUC__ */ + +#include <linux/byteorder/little_endian.h> + +#endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h new file mode 100644 index 00000000000..5d367caa0e3 --- /dev/null +++ b/arch/x86/include/asm/cache.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_CACHE_H +#define _ASM_X86_CACHE_H + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) + +#ifdef CONFIG_X86_VSMP +/* vSMP Internode cacheline shift */ +#define INTERNODE_CACHE_SHIFT (12) +#ifdef CONFIG_SMP +#define __cacheline_aligned_in_smp \ + __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ + __attribute__((__section__(".data.page_aligned"))) +#endif +#endif + +#endif /* _ASM_X86_CACHE_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h new file mode 100644 index 00000000000..2f8466540fb --- /dev/null +++ b/arch/x86/include/asm/cacheflush.h @@ -0,0 +1,118 @@ +#ifndef _ASM_X86_CACHEFLUSH_H +#define _ASM_X86_CACHEFLUSH_H + +/* Keep includes the same across arches. */ +#include <linux/mm.h> + +/* Caches aren't brain-dead on the intel. */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) + +#define PG_non_WB PG_arch_1 +PAGEFLAG(NonWB, non_WB) + +/* + * The set_memory_* API can be used to change various attributes of a virtual + * address range. The attributes include: + * Cachability : UnCached, WriteCombining, WriteBack + * Executability : eXeutable, NoteXecutable + * Read/Write : ReadOnly, ReadWrite + * Presence : NotPresent + * + * Within a catagory, the attributes are mutually exclusive. + * + * The implementation of this API will take care of various aspects that + * are associated with changing such attributes, such as: + * - Flushing TLBs + * - Flushing CPU caches + * - Making sure aliases of the memory behind the mapping don't violate + * coherency rules as defined by the CPU in the system. + * + * What this API does not do: + * - Provide exclusion between various callers - including callers that + * operation on other mappings of the same physical page + * - Restore default attributes when a page is freed + * - Guarantee that mappings other than the requested one are + * in any state, other than that these do not violate rules for + * the CPU you have. Do not depend on any effects on other mappings, + * CPUs other than the one you have may have more relaxed rules. + * The caller is required to take care of these. + */ + +int _set_memory_uc(unsigned long addr, int numpages); +int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wb(unsigned long addr, int numpages); +int set_memory_uc(unsigned long addr, int numpages); +int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wb(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_np(unsigned long addr, int numpages); +int set_memory_4k(unsigned long addr, int numpages); + +int set_memory_array_uc(unsigned long *addr, int addrinarray); +int set_memory_array_wb(unsigned long *addr, int addrinarray); + +/* + * For legacy compatibility with the old APIs, a few functions + * are provided that work on a "struct page". + * These functions operate ONLY on the 1:1 kernel mapping of the + * memory that the struct page represents, and internally just + * call the set_memory_* function. See the description of the + * set_memory_* function for more details on conventions. + * + * These APIs should be considered *deprecated* and are likely going to + * be removed in the future. + * The reason for this is the implicit operation on the 1:1 mapping only, + * making this not a generally useful API. + * + * Specifically, many users of the old APIs had a virtual address, + * called virt_to_page() or vmalloc_to_page() on that address to + * get a struct page* that the old API required. + * To convert these cases, use set_memory_*() on the original + * virtual address, do not use these functions. + */ + +int set_pages_uc(struct page *page, int numpages); +int set_pages_wb(struct page *page, int numpages); +int set_pages_x(struct page *page, int numpages); +int set_pages_nx(struct page *page, int numpages); +int set_pages_ro(struct page *page, int numpages); +int set_pages_rw(struct page *page, int numpages); + + +void clflush_cache_range(void *addr, unsigned int size); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +extern const int rodata_test_data; +#endif + +#ifdef CONFIG_DEBUG_RODATA_TEST +int rodata_test(void); +#else +static inline int rodata_test(void) +{ + return 0; +} +#endif + +#endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h new file mode 100644 index 00000000000..b03bedb62aa --- /dev/null +++ b/arch/x86/include/asm/calgary.h @@ -0,0 +1,72 @@ +/* + * Derived from include/asm-powerpc/iommu.h + * + * Copyright IBM Corporation, 2006-2007 + * + * Author: Jon Mason <jdmason@us.ibm.com> + * Author: Muli Ben-Yehuda <muli@il.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_CALGARY_H +#define _ASM_X86_CALGARY_H + +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/timer.h> +#include <asm/types.h> + +struct iommu_table { + struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ + unsigned long it_base; /* mapped address of tce table */ + unsigned long it_hint; /* Hint for next alloc */ + unsigned long *it_map; /* A simple allocation bitmap for now */ + void __iomem *bbar; /* Bridge BAR */ + u64 tar_val; /* Table Address Register */ + struct timer_list watchdog_timer; + spinlock_t it_lock; /* Protects it_map */ + unsigned int it_size; /* Size of iommu table in entries */ + unsigned char it_busno; /* Bus number this table belongs to */ +}; + +struct cal_chipset_ops { + void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); + void (*tce_cache_blast)(struct iommu_table *tbl); + void (*dump_error_regs)(struct iommu_table *tbl); +}; + +#define TCE_TABLE_SIZE_UNSPECIFIED ~0 +#define TCE_TABLE_SIZE_64K 0 +#define TCE_TABLE_SIZE_128K 1 +#define TCE_TABLE_SIZE_256K 2 +#define TCE_TABLE_SIZE_512K 3 +#define TCE_TABLE_SIZE_1M 4 +#define TCE_TABLE_SIZE_2M 5 +#define TCE_TABLE_SIZE_4M 6 +#define TCE_TABLE_SIZE_8M 7 + +extern int use_calgary; + +#ifdef CONFIG_CALGARY_IOMMU +extern int calgary_iommu_init(void); +extern void detect_calgary(void); +#else +static inline int calgary_iommu_init(void) { return 1; } +static inline void detect_calgary(void) { return; } +#endif + +#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h new file mode 100644 index 00000000000..2bc162e0ec6 --- /dev/null +++ b/arch/x86/include/asm/calling.h @@ -0,0 +1,170 @@ +/* + * Some macros to handle stack frames in assembly. + */ + +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 + +/* arguments: interrupts/non tracing syscalls only save upto here*/ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ +/* end of arguments */ + +/* cpu exception frame or undefined in case of fast syscall. */ +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 + +#define ARGOFFSET R11 +#define SWFRAME ORIG_RAX + + .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0 + subq $9*8+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET 9*8+\addskip + movq %rdi, 8*8(%rsp) + CFI_REL_OFFSET rdi, 8*8 + movq %rsi, 7*8(%rsp) + CFI_REL_OFFSET rsi, 7*8 + movq %rdx, 6*8(%rsp) + CFI_REL_OFFSET rdx, 6*8 + .if \norcx + .else + movq %rcx, 5*8(%rsp) + CFI_REL_OFFSET rcx, 5*8 + .endif + movq %rax, 4*8(%rsp) + CFI_REL_OFFSET rax, 4*8 + .if \nor891011 + .else + movq %r8, 3*8(%rsp) + CFI_REL_OFFSET r8, 3*8 + movq %r9, 2*8(%rsp) + CFI_REL_OFFSET r9, 2*8 + movq %r10, 1*8(%rsp) + CFI_REL_OFFSET r10, 1*8 + movq %r11, (%rsp) + CFI_REL_OFFSET r11, 0*8 + .endif + .endm + +#define ARG_SKIP 9*8 + + .macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \ + skipr8910=0, skiprdx=0 + .if \skipr11 + .else + movq (%rsp), %r11 + CFI_RESTORE r11 + .endif + .if \skipr8910 + .else + movq 1*8(%rsp), %r10 + CFI_RESTORE r10 + movq 2*8(%rsp), %r9 + CFI_RESTORE r9 + movq 3*8(%rsp), %r8 + CFI_RESTORE r8 + .endif + .if \skiprax + .else + movq 4*8(%rsp), %rax + CFI_RESTORE rax + .endif + .if \skiprcx + .else + movq 5*8(%rsp), %rcx + CFI_RESTORE rcx + .endif + .if \skiprdx + .else + movq 6*8(%rsp), %rdx + CFI_RESTORE rdx + .endif + movq 7*8(%rsp), %rsi + CFI_RESTORE rsi + movq 8*8(%rsp), %rdi + CFI_RESTORE rdi + .if ARG_SKIP+\addskip > 0 + addq $ARG_SKIP+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) + .endif + .endm + + .macro LOAD_ARGS offset, skiprax=0 + movq \offset(%rsp), %r11 + movq \offset+8(%rsp), %r10 + movq \offset+16(%rsp), %r9 + movq \offset+24(%rsp), %r8 + movq \offset+40(%rsp), %rcx + movq \offset+48(%rsp), %rdx + movq \offset+56(%rsp), %rsi + movq \offset+64(%rsp), %rdi + .if \skiprax + .else + movq \offset+72(%rsp), %rax + .endif + .endm + +#define REST_SKIP 6*8 + + .macro SAVE_REST + subq $REST_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET REST_SKIP + movq %rbx, 5*8(%rsp) + CFI_REL_OFFSET rbx, 5*8 + movq %rbp, 4*8(%rsp) + CFI_REL_OFFSET rbp, 4*8 + movq %r12, 3*8(%rsp) + CFI_REL_OFFSET r12, 3*8 + movq %r13, 2*8(%rsp) + CFI_REL_OFFSET r13, 2*8 + movq %r14, 1*8(%rsp) + CFI_REL_OFFSET r14, 1*8 + movq %r15, (%rsp) + CFI_REL_OFFSET r15, 0*8 + .endm + + .macro RESTORE_REST + movq (%rsp), %r15 + CFI_RESTORE r15 + movq 1*8(%rsp), %r14 + CFI_RESTORE r14 + movq 2*8(%rsp), %r13 + CFI_RESTORE r13 + movq 3*8(%rsp), %r12 + CFI_RESTORE r12 + movq 4*8(%rsp), %rbp + CFI_RESTORE rbp + movq 5*8(%rsp), %rbx + CFI_RESTORE rbx + addq $REST_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET -(REST_SKIP) + .endm + + .macro SAVE_ALL + SAVE_ARGS + SAVE_REST + .endm + + .macro RESTORE_ALL addskip=0 + RESTORE_REST + RESTORE_ARGS 0, \addskip + .endm + + .macro icebp + .byte 0xf1 + .endm diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h new file mode 100644 index 00000000000..848850fd7d6 --- /dev/null +++ b/arch/x86/include/asm/checksum.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "checksum_32.h" +#else +# include "checksum_64.h" +#endif diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h new file mode 100644 index 00000000000..7c5ef8b14d9 --- /dev/null +++ b/arch/x86/include/asm/checksum_32.h @@ -0,0 +1,189 @@ +#ifndef _ASM_X86_CHECKSUM_32_H +#define _ASM_X86_CHECKSUM_32_H + +#include <linux/in6.h> + +#include <asm/uaccess.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); +} + +static inline __wsum csum_partial_copy_from_user(const void __user *src, + void *dst, + int len, __wsum sum, + int *err_ptr) +{ + might_sleep(); + return csum_partial_copy_generic((__force void *)src, dst, + len, sum, err_ptr, NULL); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm volatile("movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0;\n" + "1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" + "2: ;\n" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ + +static inline __sum16 csum_fold(__wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" + : "=r" (sum) + : "g" (daddr), "g"(saddr), + "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum) +{ + asm("addl 0(%1), %0 ;\n" + "adcl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" + "adcl 0(%2), %0 ;\n" + "adcl 4(%2), %0 ;\n" + "adcl 8(%2), %0 ;\n" + "adcl 12(%2), %0 ;\n" + "adcl %3, %0 ;\n" + "adcl %4, %0 ;\n" + "adcl $0, %0 ;\n" + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static inline __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len, __wsum sum, + int *err_ptr) +{ + might_sleep(); + if (access_ok(VERIFY_WRITE, dst, len)) + return csum_partial_copy_generic(src, (__force void *)dst, + len, sum, NULL, err_ptr); + + if (len) + *err_ptr = -EFAULT; + + return (__force __wsum)-1; /* invalid checksum */ +} + +#endif /* _ASM_X86_CHECKSUM_32_H */ diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h new file mode 100644 index 00000000000..9bfdc41629e --- /dev/null +++ b/arch/x86/include/asm/checksum_64.h @@ -0,0 +1,191 @@ +#ifndef _ASM_X86_CHECKSUM_64_H +#define _ASM_X86_CHECKSUM_64_H + +/* + * Checksums for x86-64 + * Copyright 2002 by Andi Kleen, SuSE Labs + * with some code from asm-x86/checksum.h + */ + +#include <linux/compiler.h> +#include <asm/uaccess.h> +#include <asm/byteorder.h> + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + asm(" addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm(" movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), + "g" ((len + proto)<<8), "0" (sum)); + return sum; +} + + +/** + * csum_tcpup_magic - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 16bit pseudo header checksum the input data already + * complemented and ready to be filled in. + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/** + * csum_partial - Compute an internet checksum. + * @buff: buffer to be checksummed + * @len: length of buffer. + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 32bit unfolded internet checksum of the buffer. + * Before filling it in it needs to be csum_fold()'ed. + * buff should be aligned to a 64bit boundary if possible. + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +#define HAVE_CSUM_COPY_USER 1 + + +/* Do not call this directly. Use the wrappers below */ +extern __wsum csum_partial_copy_generic(const void *src, const void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + + +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum isum, int *errp); +extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst, + int len, __wsum isum, int *errp); +extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum); + +/* Old names. To be removed. */ +#define csum_and_copy_to_user csum_partial_copy_to_user +#define csum_and_copy_from_user csum_partial_copy_from_user + +/** + * ip_compute_csum - Compute an 16bit IP checksum. + * @buff: buffer address. + * @len: length of buffer. + * + * Returns the 16bit folded/inverted checksum of the passed buffer. + * Ready to fill in. + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +/** + * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: protocol of packet + * @sum: initial sum (32bit unfolded) to be added in + * + * Computes an IPv6 pseudo header checksum. This sum is added the checksum + * into UDP/TCP packets and contains some link layer information. + * Returns the unfolded 32bit checksum. + */ + +struct in6_addr; + +#define _HAVE_ARCH_IPV6_CSUM 1 +extern __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum); + +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "r" (b)); + return a; +} + +#endif /* _ASM_X86_CHECKSUM_64_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 00000000000..a460fa088d4 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "cmpxchg_32.h" +#else +# include "cmpxchg_64.h" +#endif diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h new file mode 100644 index 00000000000..82ceb788a98 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -0,0 +1,344 @@ +#ifndef _ASM_X86_CMPXCHG_32_H +#define _ASM_X86_CMPXCHG_32_H + +#include <linux/bitops.h> /* for LOCK_PREFIX */ + +/* + * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you + * you need to test for the feature in boot_cpu_data. + */ + +#define xchg(ptr, v) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) + +struct __xchg_dummy { + unsigned long a[100]; +}; +#define __xg(x) ((struct __xchg_dummy *)(x)) + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + * + * cmpxchg8b must be used with the lock prefix here to allow + * the instruction to be executed atomically, see page 3-102 + * of the instruction set reference 24319102.pdf. We need + * the reader side to see the coherent 64bit value. + */ +static inline void __set_64bit(unsigned long long *ptr, + unsigned int low, unsigned int high) +{ + asm volatile("\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + LOCK_PREFIX "cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax", "dx", "memory"); +} + +static inline void __set_64bit_constant(unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); +} + +#define ll_low(x) *(((unsigned int *)&(x)) + 0) +#define ll_high(x) *(((unsigned int *)&(x)) + 1) + +static inline void __set_64bit_var(unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr, ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr, value) \ + (__builtin_constant_p((value)) \ + ? __set_64bit_constant((ptr), (value)) \ + : __set_64bit_var((ptr), (value))) + +#define _set_64bit(ptr, value) \ + (__builtin_constant_p(value) \ + ? __set_64bit(ptr, (unsigned int)(value), \ + (unsigned int)((value) >> 32)) \ + : __set_64bit(ptr, ll_low((value)), ll_high((value)))) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void *ptr, + int size) +{ + switch (size) { + case 1: + asm volatile("xchgb %b0,%1" + : "=q" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + case 2: + asm volatile("xchgw %w0,%1" + : "=r" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + case 4: + asm volatile("xchgl %0,%1" + : "=r" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) +#define sync_cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +#define cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#define cmpxchg64_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#endif + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +/* + * Always use locked operations when touching memory shared with a + * hypervisor, since the system may be SMP even if the guest kernel + * isn't. + */ +static inline unsigned long __sync_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +static inline unsigned long long __cmpxchg64(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long prev; + asm volatile(LOCK_PREFIX "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long prev; + asm volatile("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + +#ifndef CONFIG_X86_CMPXCHG +/* + * Building a kernel capable running on 80386. It may be necessary to + * simulate the cmpxchg on the 80386 CPU. For that purpose we define + * a function for each of the sizes we support. + */ + +extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); +extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); +extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); + +static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + return cmpxchg_386_u8(ptr, old, new); + case 2: + return cmpxchg_386_u16(ptr, old, new); + case 4: + return cmpxchg_386_u32(ptr, old, new); + } + return old; +} + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +/* + * Building a kernel capable running on 80386 and 80486. It may be necessary + * to simulate the cmpxchg8b on the 80386 and 80486 CPU. + */ + +extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); + +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 4)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) +#define cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 4)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) + +#endif + +#endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h new file mode 100644 index 00000000000..52de72e0de8 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -0,0 +1,185 @@ +#ifndef _ASM_X86_CMPXCHG_64_H +#define _ASM_X86_CMPXCHG_64_H + +#include <asm/alternative.h> /* Provides LOCK_PREFIX */ + +#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ + (ptr), sizeof(*(ptr)))) + +#define __xg(x) ((volatile long *)(x)) + +static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +{ + *ptr = val; +} + +#define _set_64bit set_64bit + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void *ptr, + int size) +{ + switch (size) { + case 1: + asm volatile("xchgb %b0,%1" + : "=q" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + case 2: + asm volatile("xchgw %w0,%1" + : "=r" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + case 4: + asm volatile("xchgl %k0,%1" + : "=r" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + case 8: + asm volatile("xchgq %0,%1" + : "=r" (x) + : "m" (*__xg(ptr)), "0" (x) + : "memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +/* + * Always use locked operations when touching memory shared with a + * hypervisor, since the system may be SMP even if the guest kernel + * isn't. + */ +static inline unsigned long __sync_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + asm volatile("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) +#define sync_cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) +#define cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ +}) + +#endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h new file mode 100644 index 00000000000..9a9c7bdc923 --- /dev/null +++ b/arch/x86/include/asm/compat.h @@ -0,0 +1,218 @@ +#ifndef _ASM_X86_COMPAT_H +#define _ASM_X86_COMPAT_H + +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/user32.h> + +#define COMPAT_USER_HZ 100 + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u16 compat_mode_t; +typedef u32 compat_ino_t; +typedef u16 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef u16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_timer_t; +typedef s32 compat_key_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 __attribute__((aligned(4))) compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 __attribute__((aligned(4))) compat_u64; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; + u16 __pad1; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid_t st_uid; + __compat_gid_t st_gid; + compat_dev_t st_rdev; + u16 __pad2; + u32 st_size; + u32 st_blksize; + u32 st_blocks; + u32 st_atime; + u32 st_atime_nsec; + u32 st_mtime; + u32 st_mtime_nsec; + u32 st_ctime; + u32 st_ctime_nsec; + u32 __unused4; + u32 __unused5; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* + * IA32 uses 4 byte alignment for 64 bit quantities, + * so we need to pack this structure. + */ +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +} __attribute__((packed)); + +struct compat_statfs { + int f_type; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + compat_fsid_t f_fsid; + int f_namelen; /* SunOS ignores this field. */ + int f_frsize; + int f_spare[5]; +}; + +#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; /* at least 32 bits */ + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + unsigned short mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + compat_ulong_t unused1; + compat_ulong_t unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __unused1; + compat_time_t sem_ctime; + compat_ulong_t __unused2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused3; + compat_ulong_t __unused4; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __unused1; + compat_time_t msg_rtime; + compat_ulong_t __unused2; + compat_time_t msg_ctime; + compat_ulong_t __unused3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __unused1; + compat_time_t shm_dtime; + compat_ulong_t __unused2; + compat_time_t shm_ctime; + compat_ulong_t __unused3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +/* + * The type of struct elf_prstatus.pr_reg in compatible core dumps. + */ +typedef struct user_regs_struct32 compat_elf_gregset_t; + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +typedef u32 compat_uptr_t; + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)uptr; +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +static inline void __user *compat_alloc_user_space(long len) +{ + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->sp - len; +} + +static inline int is_compat_task(void) +{ + return current_thread_info()->status & TS_COMPAT; +} + +#endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h new file mode 100644 index 00000000000..bae482df603 --- /dev/null +++ b/arch/x86/include/asm/cpu.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_CPU_H +#define _ASM_X86_CPU_H + +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/topology.h> +#include <linux/nodemask.h> +#include <linux/percpu.h> + +struct x86_cpu { + struct cpu cpu; +}; + +#ifdef CONFIG_HOTPLUG_CPU +extern int arch_register_cpu(int num); +extern void arch_unregister_cpu(int); +#endif + +DECLARE_PER_CPU(int, cpu_state); +#endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h new file mode 100644 index 00000000000..f73e95d75b4 --- /dev/null +++ b/arch/x86/include/asm/cpufeature.h @@ -0,0 +1,271 @@ +/* + * Defines x86 CPU feature bits + */ +#ifndef _ASM_X86_CPUFEATURE_H +#define _ASM_X86_CPUFEATURE_H + +#include <asm/required-features.h> + +#define NCAPINTS 9 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ +#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM (0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ +#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID (4*32+10) /* Context ID */ +#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_AES (4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ +#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc + */ +#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ + +/* Virtualization flags: Linux defined */ +#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +#include <linux/bitops.h> + +extern const char * const x86_cap_flags[NCAPINTS*32]; +extern const char * const x86_power_flags[32]; + +#define test_cpu_cap(c, bit) \ + test_bit(bit, (unsigned long *)((c)->x86_capability)) + +#define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ + ? 1 : \ + test_cpu_cap(c, bit)) + +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) +#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) +#define setup_clear_cpu_cap(bit) do { \ + clear_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cleared_cpu_caps); \ +} while (0) +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ +} while (0) + +#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) +#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) +#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) +#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) +#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) +#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) +#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) +#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) +#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) +#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) +#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) +#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) +#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) +#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) +#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) +#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) +#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) +#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) +#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) +#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) +#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) +#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) +#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) +#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) +#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) +#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) +#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) +#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) +#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) +#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) +#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) +#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) +#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) +#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) + +#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) +# define cpu_has_invlpg 1 +#else +# define cpu_has_invlpg (boot_cpu_data.x86 > 3) +#endif + +#ifdef CONFIG_X86_64 + +#undef cpu_has_vme +#define cpu_has_vme 0 + +#undef cpu_has_pae +#define cpu_has_pae ___BUG___ + +#undef cpu_has_mp +#define cpu_has_mp 1 + +#undef cpu_has_k6_mtrr +#define cpu_has_k6_mtrr 0 + +#undef cpu_has_cyrix_arr +#define cpu_has_cyrix_arr 0 + +#undef cpu_has_centaur_mcr +#define cpu_has_centaur_mcr 0 + +#endif /* CONFIG_X86_64 */ + +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ + +#endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h new file mode 100644 index 00000000000..6d68ad7e0ea --- /dev/null +++ b/arch/x86/include/asm/cputime.h @@ -0,0 +1 @@ +#include <asm-generic/cputime.h> diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h new file mode 100644 index 00000000000..0930b4f8d67 --- /dev/null +++ b/arch/x86/include/asm/current.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_CURRENT_H +#define _ASM_X86_CURRENT_H + +#ifdef CONFIG_X86_32 +#include <linux/compiler.h> +#include <asm/percpu.h> + +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); +static __always_inline struct task_struct *get_current(void) +{ + return x86_read_percpu(current_task); +} + +#else /* X86_32 */ + +#ifndef __ASSEMBLY__ +#include <asm/pda.h> + +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return read_pda(pcurrent); +} + +#else /* __ASSEMBLY__ */ + +#include <asm/asm-offsets.h> +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif /* __ASSEMBLY__ */ + +#endif /* X86_32 */ + +#define current get_current() + +#endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h new file mode 100644 index 00000000000..3ea6f37be9e --- /dev/null +++ b/arch/x86/include/asm/debugreg.h @@ -0,0 +1,70 @@ +#ifndef _ASM_X86_DEBUGREG_H +#define _ASM_X86_DEBUGREG_H + + +/* Indicate the register numbers for a number of the specific + debug registers. Registers 0-3 contain the addresses we wish to trap on */ +#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ +#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ + +#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ +#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ + +/* Define a few things for the status register. We can use this to determine + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +#define DR_TRAP0 (0x1) /* db0 */ +#define DR_TRAP1 (0x2) /* db1 */ +#define DR_TRAP2 (0x4) /* db2 */ +#define DR_TRAP3 (0x8) /* db3 */ + +#define DR_STEP (0x4000) /* single-step */ +#define DR_SWITCH (0x8000) /* task switch */ + +/* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, + and indicates what types of access we trap on, and how large the data + field is that we are looking at */ + +#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ +#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ + +#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ +#define DR_RW_WRITE (0x1) +#define DR_RW_READ (0x3) + +#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ +#define DR_LEN_2 (0x4) +#define DR_LEN_4 (0xC) +#define DR_LEN_8 (0x8) + +/* The low byte to the control register determine which registers are + enabled. There are 4 fields of two bits. One bit is "local", meaning + that the processor will reset the bit after a task switch and the other + is global meaning that we have to explicitly reset the bit. With linux, + you can use either one, since we explicitly zero the register when we enter + kernel mode. */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ + +#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ +#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ + +/* The second byte to the control register has a few special things. + We can slow the instruction pipeline for instructions coming via the + gdt or the ldt if we want to. I am not sure why this is an advantage */ + +#ifdef __i386__ +#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#else +#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */ +#endif + +#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ +#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ + +#endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h new file mode 100644 index 00000000000..409a649204a --- /dev/null +++ b/arch/x86/include/asm/delay.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_DELAY_H +#define _ASM_X86_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/x86/lib/delay.c + */ + +/* Undefined functions to get compile-time errors */ +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); +extern void __const_udelay(unsigned long xloops); +extern void __delay(unsigned long loops); + +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ + __udelay(n)) + +/* 0x5 is 2**32 / 1000000000 (rounded up) */ +#define ndelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ + __ndelay(n)) + +void use_tsc_delay(void); + +#endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h new file mode 100644 index 00000000000..e6b82b17b07 --- /dev/null +++ b/arch/x86/include/asm/desc.h @@ -0,0 +1,409 @@ +#ifndef _ASM_X86_DESC_H +#define _ASM_X86_DESC_H + +#ifndef __ASSEMBLY__ +#include <asm/desc_defs.h> +#include <asm/ldt.h> +#include <asm/mmu.h> +#include <linux/smp.h> + +static inline void fill_ldt(struct desc_struct *desc, + const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + desc->base0 = info->base_addr & 0x0000ffff; + + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + desc->base2 = (info->base_addr & 0xff000000) >> 24; + /* + * Don't allow setting of the lm bit. It is useless anyway + * because 64bit system calls require __USER_CS: + */ + desc->l = 0; +} + +extern struct desc_ptr idt_descr; +extern gate_desc idt_table[]; + +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + +#ifdef CONFIG_X86_64 + +static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate->offset_low = PTR_LOW(func); + gate->segment = __KERNEL_CS; + gate->ist = ist; + gate->p = 1; + gate->dpl = dpl; + gate->zero0 = 0; + gate->zero1 = 0; + gate->type = type; + gate->offset_middle = PTR_MIDDLE(func); + gate->offset_high = PTR_HIGH(func); +} + +#else +static inline void pack_gate(gate_desc *gate, unsigned char type, + unsigned long base, unsigned dpl, unsigned flags, + unsigned short seg) +{ + gate->a = (seg << 16) | (base & 0xffff); + gate->b = (base & 0xffff0000) | + (((0x80 | type | (dpl << 5)) & 0xff) << 8); +} + +#endif + +static inline int desc_empty(const void *ptr) +{ + const u32 *desc = ptr; + return !(desc[0] | desc[1]); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) +#define store_ldt(ldt) asm("sldt %0":"=m" (ldt)) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt + +#define write_ldt_entry(dt, entry, desc) \ + native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) \ + native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) \ + native_write_idt_entry(dt, entry, g) + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ +} +#endif /* CONFIG_PARAVIRT */ + +static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) +{ + memcpy(&idt[entry], gate, sizeof(*gate)); +} + +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, + const void *desc) +{ + memcpy(&ldt[entry], desc, 8); +} + +static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, + const void *desc, int type) +{ + unsigned int size; + switch (type) { + case DESC_TSS: + size = sizeof(tss_desc); + break; + case DESC_LDT: + size = sizeof(ldt_desc); + break; + default: + size = sizeof(struct desc_struct); + break; + } + memcpy(&gdt[entry], desc, size); +} + +static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, + unsigned long limit, unsigned char type, + unsigned char flags) +{ + desc->a = ((base & 0xffff) << 16) | (limit & 0xffff); + desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | + (limit & 0x000f0000) | ((type & 0xff) << 8) | + ((flags & 0xf) << 20); + desc->p = 1; +} + + +static inline void set_tssldt_descriptor(void *d, unsigned long addr, + unsigned type, unsigned size) +{ +#ifdef CONFIG_X86_64 + struct ldttss_desc64 *desc = d; + memset(desc, 0, sizeof(*desc)); + desc->limit0 = size & 0xFFFF; + desc->base0 = PTR_LOW(addr); + desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; + desc->base3 = PTR_HIGH(addr); +#else + pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); +#endif +} + +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +{ + struct desc_struct *d = get_cpu_gdt_table(cpu); + tss_desc tss; + + /* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. See tss_struct definition in processor.h + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ + set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, + IO_BITMAP_OFFSET + IO_BITMAP_BYTES + + sizeof(unsigned long) - 1); + write_gdt_entry(d, entry, &tss, DESC_TSS); +} + +#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) + +static inline void native_set_ldt(const void *addr, unsigned int entries) +{ + if (likely(entries == 0)) + asm volatile("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + ldt_desc ldt; + + set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT, + entries * LDT_ENTRY_SIZE - 1); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, + &ldt, DESC_LDT); + asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } +} + +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static inline void native_load_gdt(const struct desc_ptr *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static inline void native_load_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct desc_ptr *dtr) +{ + asm volatile("sgdt %0":"=m" (*dtr)); +} + +static inline void native_store_idt(struct desc_ptr *dtr) +{ + asm volatile("sidt %0":"=m" (*dtr)); +} + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + asm volatile("str %0":"=r" (tr)); + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + +#define _LDT_empty(info) \ + ((info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0) + +#ifdef CONFIG_X86_64 +#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) +#else +#define LDT_empty(info) (_LDT_empty(info)) +#endif + +static inline void clear_LDT(void) +{ + set_ldt(NULL, 0); +} + +/* + * load one particular LDT into the current CPU + */ +static inline void load_LDT_nolock(mm_context_t *pc) +{ + set_ldt(pc->ldt, pc->size); +} + +static inline void load_LDT(mm_context_t *pc) +{ + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); +} + +static inline unsigned long get_desc_base(const struct desc_struct *desc) +{ + return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); +} + +static inline unsigned long get_desc_limit(const struct desc_struct *desc) +{ + return desc->limit0 | (desc->limit << 16); +} + +static inline void _set_gate(int gate, unsigned type, void *addr, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate_desc s; + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); + /* + * does not need to be atomic because it is only done once at + * setup time + */ + write_idt_entry(idt_table, gate, &s); +} + +/* + * This needs to use 'idt_table' rather than 'idt', and + * thus use the _nonmapped_ version of the IDT, as the + * Pentium F0 0F bugfix can have resulted in the mapped + * IDT being write-protected. + */ +static inline void set_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); +} + +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + +extern int first_system_vector; +extern char system_vectors[]; + +static inline void alloc_system_vector(int vector) +{ + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (first_system_vector > vector) + first_system_vector = vector; + } else + BUG(); +} + +static inline void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + +/* + * This routine sets up an interrupt gate at directory privilege level 3. + */ +static inline void set_system_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); +} + +static inline void set_system_trap_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); +} + +static inline void set_trap_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); +} + +static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); +} + +static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); +} + +static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); +} + +#else +/* + * GET_DESC_BASE reads the descriptor base of the specified segment. + * + * Args: + * idx - descriptor index + * gdt - GDT pointer + * base - 32bit register to which the base will be written + * lo_w - lo word of the "base" register + * lo_b - lo byte of the "base" register + * hi_b - hi byte of the low word of the "base" register + * + * Example: + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. + */ +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ + movb idx * 8 + 4(gdt), lo_b; \ + movb idx * 8 + 7(gdt), hi_b; \ + shll $16, base; \ + movw idx * 8 + 2(gdt), lo_w; + + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h new file mode 100644 index 00000000000..a6adefa28b9 --- /dev/null +++ b/arch/x86/include/asm/desc_defs.h @@ -0,0 +1,95 @@ +/* Written 2000 by Andi Kleen */ +#ifndef _ASM_X86_DESC_DEFS_H +#define _ASM_X86_DESC_DEFS_H + +/* + * Segment descriptor structure definitions, usable from both x86_64 and i386 + * archs. + */ + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +/* + * FIXME: Acessing the desc_struct through its fields is more elegant, + * and should be the one valid thing to do. However, a lot of open code + * still touches the a and b acessors, and doing this allow us to do it + * incrementally. We keep the signature as a struct, rather than an union, + * so we can get rid of it transparently in the future -- glommer + */ +/* 8 byte segment descriptor */ +struct desc_struct { + union { + struct { + unsigned int a; + unsigned int b; + }; + struct { + u16 limit0; + u16 base0; + unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; + }; + }; +} __attribute__((packed)); + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, + GATE_TASK = 0x5, +}; + +/* 16byte gate */ +struct gate_struct64 { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, + DESCTYPE_S = 0x10, /* !system */ +}; + +/* LDT or TSS descriptor in the GDT. 16 bytes. */ +struct ldttss_desc64 { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +#ifdef CONFIG_X86_64 +typedef struct gate_struct64 gate_desc; +typedef struct ldttss_desc64 ldt_desc; +typedef struct ldttss_desc64 tss_desc; +#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32)) +#define gate_segment(g) ((g).segment) +#else +typedef struct desc_struct gate_desc; +typedef struct desc_struct ldt_desc; +typedef struct desc_struct tss_desc; +#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff)) +#define gate_segment(g) ((g).a >> 16) +#endif + +struct desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)) ; + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_DESC_DEFS_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h new file mode 100644 index 00000000000..3c034f48fdb --- /dev/null +++ b/arch/x86/include/asm/device.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_DEVICE_H +#define _ASM_X86_DEVICE_H + +struct dev_archdata { +#ifdef CONFIG_ACPI + void *acpi_handle; +#endif +#ifdef CONFIG_X86_64 +struct dma_mapping_ops *dma_ops; +#endif +#ifdef CONFIG_DMAR + void *iommu; /* hook for IOMMU specific extension */ +#endif +}; + +#endif /* _ASM_X86_DEVICE_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h new file mode 100644 index 00000000000..9a2d644c08e --- /dev/null +++ b/arch/x86/include/asm/div64.h @@ -0,0 +1,60 @@ +#ifndef _ASM_X86_DIV64_H +#define _ASM_X86_DIV64_H + +#ifdef CONFIG_X86_32 + +#include <linux/types.h> + +/* + * do_div() is NOT a C function. It wants to return + * two values (the quotient and the remainder), but + * since that doesn't work very well in C, what it + * does is: + * + * - modifies the 64-bit dividend _in_place_ + * - returns the 32-bit remainder + * + * This ends up being the most efficient "calling + * convention" on x86. + */ +#define do_div(n, base) \ +({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + asm("divl %2":"=a" (__low), "=d" (__mod) \ + : "rm" (__base), "0" (__low), "1" (__upper)); \ + asm("":"=A" (n) : "a" (__low), "d" (__high)); \ + __mod; \ +}) + +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + union { + u64 v64; + u32 v32[2]; + } d = { dividend }; + u32 upper; + + upper = d.v32[1]; + d.v32[1] = 0; + if (upper >= divisor) { + d.v32[1] = upper / divisor; + upper %= divisor; + } + asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) : + "rm" (divisor), "0" (d.v32[0]), "1" (upper)); + return d.v64; +} +#define div_u64_rem div_u64_rem + +#else +# include <asm-generic/div64.h> +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_DIV64_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h new file mode 100644 index 00000000000..4a5397bfce2 --- /dev/null +++ b/arch/x86/include/asm/dma-mapping.h @@ -0,0 +1,308 @@ +#ifndef _ASM_X86_DMA_MAPPING_H +#define _ASM_X86_DMA_MAPPING_H + +/* + * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for + * documentation. + */ + +#include <linux/scatterlist.h> +#include <asm/io.h> +#include <asm/swiotlb.h> +#include <asm-generic/dma-coherent.h> + +extern dma_addr_t bad_dma_address; +extern int iommu_merge; +extern struct device x86_dma_fallback_dev; +extern int panic_on_overflow; + +struct dma_mapping_ops { + int (*mapping_error)(struct device *dev, + dma_addr_t dma_addr); + void* (*alloc_coherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + void (*free_coherent)(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, + size_t size, int direction); + void (*unmap_single)(struct device *dev, dma_addr_t addr, + size_t size, int direction); + void (*sync_single_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_for_device)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_range_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_single_range_for_device)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_sg_for_cpu)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + void (*sync_sg_for_device)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + int (*map_sg)(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + void (*unmap_sg)(struct device *hwdev, + struct scatterlist *sg, int nents, + int direction); + int (*dma_supported)(struct device *hwdev, u64 mask); + int is_phys; +}; + +extern struct dma_mapping_ops *dma_ops; + +static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) +{ +#ifdef CONFIG_X86_32 + return dma_ops; +#else + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +#endif /* _ASM_X86_DMA_MAPPING_H */ +} + +/* Make sure we keep the same behaviour */ +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + struct dma_mapping_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); + + return (dma_addr == bad_dma_address); +#endif +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) +#define dma_is_consistent(d, h) (1) + +extern int dma_supported(struct device *hwdev, u64 mask); +extern int dma_set_mask(struct device *dev, u64 mask); + +extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag); + +static inline dma_addr_t +dma_map_single(struct device *hwdev, void *ptr, size_t size, + int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); +} + +static inline void +dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, + int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->unmap_single) + ops->unmap_single(dev, addr, size, direction); +} + +static inline int +dma_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + return ops->map_sg(hwdev, sg, nents, direction); +} + +static inline void +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->unmap_sg) + ops->unmap_sg(hwdev, sg, nents, direction); +} + +static inline void +dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_for_device) + ops->sync_single_for_device(hwdev, dma_handle, size, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_cpu) + ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, + size, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_device) + ops->sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_sg_for_device) + ops->sync_sg_for_device(hwdev, sg, nelems, direction); + + flush_write_buffers(); +} + +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(direction)); + return ops->map_single(dev, page_to_phys(page) + offset, + size, direction); +} + +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction) +{ + dma_unmap_single(dev, addr, size, direction); +} + +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + +static inline int dma_get_cache_alignment(void) +{ + /* no easy way to get cache size on all x86, so return the + * maximum possible, to be safe */ + return boot_cpu_data.x86_clflush_size; +} + +static inline unsigned long dma_alloc_coherent_mask(struct device *dev, + gfp_t gfp) +{ + unsigned long dma_mask = 0; + + dma_mask = dev->coherent_dma_mask; + if (!dma_mask) + dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; + + return dma_mask; +} + +static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) +{ +#ifdef CONFIG_X86_64 + unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); + + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) + gfp |= GFP_DMA32; +#endif + return gfp; +} + +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + void *memory; + + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) { + dev = &x86_dma_fallback_dev; + gfp |= GFP_DMA; + } + + if (!is_device_dma_capable(dev)) + return NULL; + + if (!ops->alloc_coherent) + return NULL; + + return ops->alloc_coherent(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp)); +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); +} + +#endif diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h new file mode 100644 index 00000000000..ca1098a7e58 --- /dev/null +++ b/arch/x86/include/asm/dma.h @@ -0,0 +1,318 @@ +/* + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_X86_DMA_H +#define _ASM_X86_DMA_H + +#include <linux/spinlock.h> /* And spinlocks */ +#include <asm/io.h> /* need byte IO */ +#include <linux/delay.h> + +#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER +#define dma_outb outb_p +#else +#define dma_outb outb +#endif + +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + +#ifdef CONFIG_X86_32 + +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) + +#else + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) + +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) + +#endif + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +/* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_READ 0x44 +/* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 +/* pass thru DREQ->HRQ, DACK<-HLDA only */ +#define DMA_MODE_CASCADE 0xC0 + +#define DMA_AUTOINIT 0x10 + + +extern spinlock_t dma_spin_lock; + +static inline unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static inline void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} + +/* enable/disable a specific DMA channel */ +static inline void enable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static inline void disable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static inline void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr <= 3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr & 3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static inline void set_dma_page(unsigned int dmanr, char pagenr) +{ + switch (dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static inline void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + set_dma_page(dmanr, a>>16); + if (dmanr <= 3) { + dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + } else { + dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + } +} + + +/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + } else { + dma_outb((count >> 1) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 9) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static inline int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port; + /* using short to get 16-bit wrap around */ + unsigned short count; + + io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE + : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr <= 3) ? count : (count << 1); +} + + +/* These are in kernel/dma.c: */ +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); + +/* From PCI */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* _ASM_X86_DMA_H */ diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h new file mode 100644 index 00000000000..bc68212c6bc --- /dev/null +++ b/arch/x86/include/asm/dmi.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_DMI_H +#define _ASM_X86_DMI_H + +#include <asm/io.h> + +#define DMI_MAX_DATA 2048 + +extern int dmi_alloc_index; +extern char dmi_alloc_data[DMI_MAX_DATA]; + +/* This is so early that there is no good way to allocate dynamic memory. + Allocate data in an BSS array. */ +static inline void *dmi_alloc(unsigned len) +{ + int idx = dmi_alloc_index; + if ((dmi_alloc_index + len) > DMI_MAX_DATA) + return NULL; + dmi_alloc_index += len; + return dmi_alloc_data + idx; +} + +/* Use early IO mappings for DMI because it's initialized early */ +#define dmi_ioremap early_ioremap +#define dmi_iounmap early_iounmap + +#endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h new file mode 100644 index 00000000000..72c5a190bf4 --- /dev/null +++ b/arch/x86/include/asm/ds.h @@ -0,0 +1,238 @@ +/* + * Debug Store (DS) support + * + * This provides a low-level interface to the hardware's Debug Store + * feature that is used for branch trace store (BTS) and + * precise-event based sampling (PEBS). + * + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access + * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks + * + * + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 + */ + +#ifndef _ASM_X86_DS_H +#define _ASM_X86_DS_H + +#ifdef CONFIG_X86_DS + +#include <linux/types.h> +#include <linux/init.h> + + +struct task_struct; + +/* + * Request BTS or PEBS + * + * Due to alignement constraints, the actual buffer may be slightly + * smaller than the requested or provided buffer. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to request recording for; + * NULL for per-cpu recording on the current cpu + * base: the base pointer for the (non-pageable) buffer; + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer + * ovfl: pointer to a function to be called on buffer overflow; + * NULL if cyclic buffer requested + */ +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); + +/* + * Release BTS or PEBS resources + * + * Frees buffers allocated on ds_request. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to release resources for; + * NULL to release resources for the current cpu + */ +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); + +/* + * Return the (array) index of the write pointer. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); + +/* + * Return the (array) index one record beyond the end of the array. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); + +/* + * Provide a pointer to the BTS/PEBS record at parameter index. + * (assuming an array of BTS/PEBS records) + * + * The pointer points directly into the buffer. The user is + * responsible for copying the record. + * + * Returns the size of a single record on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * index: the index of the requested record + * record (out): pointer to the requested record + */ +extern int ds_access_bts(struct task_struct *task, + size_t index, const void **record); +extern int ds_access_pebs(struct task_struct *task, + size_t index, const void **record); + +/* + * Write one or more BTS/PEBS records at the write pointer index and + * advance the write pointer. + * + * If size is not a multiple of the record size, trailing bytes are + * zeroed out. + * + * May result in one or more overflow notifications. + * + * If called during overflow handling, that is, with index >= + * interrupt threshold, the write will wrap around. + * + * An overflow notification is given if and when the interrupt + * threshold is reached during or after the write. + * + * Returns the number of bytes written or -Eerrno. + * + * task: the task to access; + * NULL to access the current cpu + * buffer: the buffer to write + * size: the size of the buffer + */ +extern int ds_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Reset the write pointer of the BTS/PEBS buffer. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); + +/* + * Clear the BTS/PEBS buffer and reset the write pointer. + * The entire buffer will be zeroed out. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); + +/* + * Provide the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value (out): the counter reset value + */ +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); + +/* + * Set the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value: the new counter reset value + */ +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); + +/* + * Initialization + */ +struct cpuinfo_x86; +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); + + + +/* + * The DS context - part of struct thread_struct. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char *ds; + /* the owner of the BTS and PEBS configuration, respectively */ + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; +}; + +/* called by exit_thread() to free leftover contexts */ +extern void ds_free(struct ds_context *context); + +#else /* CONFIG_X86_DS */ + +#define ds_init_intel(config) do {} while (0) + +#endif /* CONFIG_X86_DS */ +#endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h new file mode 100644 index 00000000000..804b6e6be92 --- /dev/null +++ b/arch/x86/include/asm/dwarf2.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_DWARF2_H +#define _ASM_X86_DWARF2_H + +#ifndef __ASSEMBLY__ +#warning "asm/dwarf2.h should be only included in pure assembly files" +#endif + +/* + Macros for dwarf2 CFI unwind table entries. + See "as.info" for details on these pseudo ops. Unfortunately + they are only supported in very new binutils, so define them + away for older version. + */ + +#ifdef CONFIG_AS_CFI + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined + +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif + +#else + +/* Due to the structure of pre-exisiting code, don't use assembler line + comment character # to ignore the arguments. Instead, use a dummy macro. */ +.macro cfi_ignore a=0, b=0, c=0, d=0 +.endm + +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore +#define CFI_DEF_CFA_REGISTER cfi_ignore +#define CFI_DEF_CFA_OFFSET cfi_ignore +#define CFI_ADJUST_CFA_OFFSET cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore + +#endif + +#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h new file mode 100644 index 00000000000..3d8ceddbd40 --- /dev/null +++ b/arch/x86/include/asm/e820.h @@ -0,0 +1,146 @@ +#ifndef _ASM_X86_E820_H +#define _ASM_X86_E820_H +#define E820MAP 0x2d0 /* our map */ +#define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include <linux/numa.h> +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + +#define E820NR 0x1e8 /* # entries in E820MAP */ + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +/* reserved RAM used by kernel itself */ +#define E820_RESERVED_KERN 128 + +#ifndef __ASSEMBLY__ +struct e820entry { + __u64 addr; /* start of memory segment */ + __u64 size; /* size of memory segment */ + __u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + __u32 nr_map; + struct e820entry map[E820_X_MAX]; +}; + +#ifdef __KERNEL__ +/* see comment in arch/x86/kernel/e820.c */ +extern struct e820map e820; +extern struct e820map e820_saved; + +extern unsigned long pci_mem_start; +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +extern int +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype); +extern void update_e820(void); +extern void e820_setup_gap(void); +extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr); +struct setup_data; +extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); +#else +static inline void early_memtest(unsigned long start, unsigned long end) +{ +} +#endif + +extern unsigned long end_user_pfn; + +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern void reserve_early(u64 start, u64 end, char *name); +extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); +extern void free_early(u64 start, u64 end); +extern void early_res_to_bootmem(u64 start, u64 end); +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); + +extern unsigned long e820_end_of_ram_pfn(void); +extern unsigned long e820_end_of_low_ram_pfn(void); +extern int e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn); +extern void e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); +extern u64 e820_hole_size(u64 start, u64 end); +extern void finish_e820_parsing(void); +extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); +extern void setup_memory_map(void); +extern char *default_machine_specific_memory_setup(void); +extern char *machine_specific_memory_setup(void); +extern char *memory_setup(void); +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#define ISA_START_ADDRESS 0xa0000 +#define ISA_END_ADDRESS 0x100000 +#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) + +#define BIOS_BEGIN 0x000a0000 +#define BIOS_END 0x00100000 + +#ifdef __KERNEL__ +#include <linux/ioport.h> + +#define HIGH_MEMORY (1024*1024) +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_E820_H */ diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h new file mode 100644 index 00000000000..e9b57ecc70c --- /dev/null +++ b/arch/x86/include/asm/edac.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_EDAC_H +#define _ASM_X86_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static inline void atomic_scrub(void *va, u32 size) +{ + u32 i, *virt_addr = va; + + /* + * Very carefully read and write to memory atomically so we + * are interrupt, DMA and SMP safe. + */ + for (i = 0; i < size / 4; i++, virt_addr++) + asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); +} + +#endif /* _ASM_X86_EDAC_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h new file mode 100644 index 00000000000..a2e545c91c3 --- /dev/null +++ b/arch/x86/include/asm/efi.h @@ -0,0 +1,110 @@ +#ifndef _ASM_X86_EFI_H +#define _ASM_X86_EFI_H + +#ifdef CONFIG_X86_32 + +extern unsigned long asmlinkage efi_call_phys(void *, ...); + +#define efi_call_phys0(f) efi_call_phys(f) +#define efi_call_phys1(f, a1) efi_call_phys(f, a1) +#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2) +#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call_phys(f, a1, a2, a3, a4) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call_phys(f, a1, a2, a3, a4, a5) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_phys(f, a1, a2, a3, a4, a5, a6) +/* + * Wrap all the virtual calls in a way that forces the parameters on the stack. + */ + +#define efi_call_virt(f, args...) \ + ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) + +#define efi_call_virt0(f) efi_call_virt(f) +#define efi_call_virt1(f, a1) efi_call_virt(f, a1) +#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2) +#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call_virt(f, a1, a2, a3, a4) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call_virt(f, a1, a2, a3, a4, a5) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_virt(f, a1, a2, a3, a4, a5, a6) + +#define efi_ioremap(addr, size) ioremap_cache(addr, size) + +#else /* !CONFIG_X86_32 */ + +#define MAX_EFI_IO_PAGES 100 + +extern u64 efi_call0(void *fp); +extern u64 efi_call1(void *fp, u64 arg1); +extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); +extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3); +extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4); +extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5); +extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6); + +#define efi_call_phys0(f) \ + efi_call0((void *)(f)) +#define efi_call_phys1(f, a1) \ + efi_call1((void *)(f), (u64)(a1)) +#define efi_call_phys2(f, a1, a2) \ + efi_call2((void *)(f), (u64)(a1), (u64)(a2)) +#define efi_call_phys3(f, a1, a2, a3) \ + efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3)) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4)) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5)) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5), (u64)(a6)) + +#define efi_call_virt0(f) \ + efi_call0((void *)(efi.systab->runtime->f)) +#define efi_call_virt1(f, a1) \ + efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) +#define efi_call_virt2(f, a1, a2) \ + efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) +#define efi_call_virt3(f, a1, a2, a3) \ + efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3)) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4)) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5)) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) + +extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); + +#endif /* CONFIG_X86_32 */ + +extern void efi_reserve_early(void); +extern void efi_call_phys_prelog(void); +extern void efi_call_phys_epilog(void); + +#ifndef CONFIG_EFI +/* + * IF EFI is not configured, have the EFI calls return -ENOSYS. + */ +#define efi_call0(_f) (-ENOSYS) +#define efi_call1(_f, _a1) (-ENOSYS) +#define efi_call2(_f, _a1, _a2) (-ENOSYS) +#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) +#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) +#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) +#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) +#endif /* CONFIG_EFI */ + +#endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h new file mode 100644 index 00000000000..40ca1bea791 --- /dev/null +++ b/arch/x86/include/asm/elf.h @@ -0,0 +1,336 @@ +#ifndef _ASM_X86_ELF_H +#define _ASM_X86_ELF_H + +/* + * ELF register definitions.. + */ + +#include <asm/ptrace.h> +#include <asm/user.h> +#include <asm/auxvec.h> + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +#ifdef __i386__ + +typedef struct user_fxsr_struct elf_fpxregset_t; + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#else + +/* x86-64 relocation types */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#endif + +#include <asm/vdso.h> + +extern unsigned int vdso_enabled; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch_ia32(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +#include <asm/processor.h> +#include <asm/system.h> + +#ifdef CONFIG_X86_32 +#include <asm/desc.h> + +#define elf_check_arch(x) elf_check_arch_ia32(x) + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + contains a pointer to a function which might be registered using `atexit'. + This provides a mean for the dynamic linker to call DT_FINI functions for + shared libraries that have been loaded before the code runs. + + A value of 0 tells we have no such handler. + + We might as well make sure everything else is cleared too (except for %esp), + just to make things more deterministic. + */ +#define ELF_PLAT_INIT(_r, load_addr) \ + do { \ + _r->bx = 0; _r->cx = 0; _r->dx = 0; \ + _r->si = 0; _r->di = 0; _r->bp = 0; \ + _r->ax = 0; \ +} while (0) + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different) + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + pr_reg[0] = regs->bx; \ + pr_reg[1] = regs->cx; \ + pr_reg[2] = regs->dx; \ + pr_reg[3] = regs->si; \ + pr_reg[4] = regs->di; \ + pr_reg[5] = regs->bp; \ + pr_reg[6] = regs->ax; \ + pr_reg[7] = regs->ds & 0xffff; \ + pr_reg[8] = regs->es & 0xffff; \ + pr_reg[9] = regs->fs & 0xffff; \ + savesegment(gs, pr_reg[10]); \ + pr_reg[11] = regs->orig_ax; \ + pr_reg[12] = regs->ip; \ + pr_reg[13] = regs->cs & 0xffff; \ + pr_reg[14] = regs->flags; \ + pr_reg[15] = regs->sp; \ + pr_reg[16] = regs->ss & 0xffff; \ +} while (0); + +#define ELF_PLATFORM (utsname()->machine) +#define set_personality_64bit() do { } while (0) + +#else /* CONFIG_X86_32 */ + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_X86_64) + +#define compat_elf_check_arch(x) elf_check_arch_ia32(x) + +static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) +{ + loadsegment(fs, 0); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + load_gs_index(0); + regs->ip = ip; + regs->sp = sp; + regs->flags = X86_EFLAGS_IF; + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; +} + +static inline void elf_common_init(struct thread_struct *t, + struct pt_regs *regs, const u16 ds) +{ + regs->ax = regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + t->fs = t->gs = 0; + t->fsindex = t->gsindex = 0; + t->ds = t->es = ds; +} + +#define ELF_PLAT_INIT(_r, load_addr) \ +do { \ + elf_common_init(¤t->thread, _r, 0); \ + clear_thread_flag(TIF_IA32); \ +} while (0) + +#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ + elf_common_init(¤t->thread, regs, __USER_DS) + +#define compat_start_thread(regs, ip, sp) \ +do { \ + start_ia32_thread(regs, ip, sp); \ + set_fs(USER_DS); \ +} while (0) + +#define COMPAT_SET_PERSONALITY(ex) \ +do { \ + if (test_thread_flag(TIF_IA32)) \ + clear_thread_flag(TIF_ABI_PENDING); \ + else \ + set_thread_flag(TIF_ABI_PENDING); \ + current->personality |= force_personality32; \ +} while (0) + +#define COMPAT_ELF_PLATFORM ("i686") + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different). Assumes current is the process + * getting dumped. + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + unsigned v; \ + (pr_reg)[0] = (regs)->r15; \ + (pr_reg)[1] = (regs)->r14; \ + (pr_reg)[2] = (regs)->r13; \ + (pr_reg)[3] = (regs)->r12; \ + (pr_reg)[4] = (regs)->bp; \ + (pr_reg)[5] = (regs)->bx; \ + (pr_reg)[6] = (regs)->r11; \ + (pr_reg)[7] = (regs)->r10; \ + (pr_reg)[8] = (regs)->r9; \ + (pr_reg)[9] = (regs)->r8; \ + (pr_reg)[10] = (regs)->ax; \ + (pr_reg)[11] = (regs)->cx; \ + (pr_reg)[12] = (regs)->dx; \ + (pr_reg)[13] = (regs)->si; \ + (pr_reg)[14] = (regs)->di; \ + (pr_reg)[15] = (regs)->orig_ax; \ + (pr_reg)[16] = (regs)->ip; \ + (pr_reg)[17] = (regs)->cs; \ + (pr_reg)[18] = (regs)->flags; \ + (pr_reg)[19] = (regs)->sp; \ + (pr_reg)[20] = (regs)->ss; \ + (pr_reg)[21] = current->thread.fs; \ + (pr_reg)[22] = current->thread.gs; \ + asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ + asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ + asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ + asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ +} while (0); + +/* I'm not sure if we can use '-' here */ +#define ELF_PLATFORM ("x86_64") +extern void set_personality_64bit(void); +extern unsigned int sysctl_vsyscall32; +extern int force_personality32; + +#endif /* !CONFIG_X86_32 */ + +#define CORE_DUMP_USE_REGSET +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define SET_PERSONALITY(ex) set_personality_64bit() + +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + */ +#define elf_read_implies_exec(ex, executable_stack) \ + (executable_stack != EXSTACK_DISABLE_X) + +struct task_struct; + +#define ARCH_DLINFO_IA32(vdso_enabled) \ +do { \ + if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ + } \ +} while (0) + +#ifdef CONFIG_X86_32 + +#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) + +#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ + +#else /* CONFIG_X86_32 */ + +#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ + +/* 1GB for 64bit, 8MB for 32bit */ +#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) + +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + +#define AT_SYSINFO 32 + +#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) + +#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + +#endif /* !CONFIG_X86_32 */ + +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) + +#define VDSO_ENTRY \ + ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); + +extern int syscall32_setup_pages(struct linux_binprm *, int exstack); +#define compat_arch_setup_additional_pages syscall32_setup_pages + +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + +#endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h new file mode 100644 index 00000000000..94826cf8745 --- /dev/null +++ b/arch/x86/include/asm/emergency-restart.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_EMERGENCY_RESTART_H +#define _ASM_X86_EMERGENCY_RESTART_H + +enum reboot_type { + BOOT_TRIPLE = 't', + BOOT_KBD = 'k', +#ifdef CONFIG_X86_32 + BOOT_BIOS = 'b', +#endif + BOOT_ACPI = 'a', + BOOT_EFI = 'e' +}; + +extern enum reboot_type reboot_type; + +extern void machine_emergency_restart(void); + +#endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/errno.h b/arch/x86/include/asm/errno.h new file mode 100644 index 00000000000..4c82b503d92 --- /dev/null +++ b/arch/x86/include/asm/errno.h @@ -0,0 +1 @@ +#include <asm-generic/errno.h> diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h new file mode 100644 index 00000000000..380f0b4f17e --- /dev/null +++ b/arch/x86/include/asm/es7000/apic.h @@ -0,0 +1,193 @@ +#ifndef __ASM_ES7000_APIC_H +#define __ASM_ES7000_APIC_H + +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) +#define esr_disable (1) + +static inline int apic_id_registered(void) +{ + return (1); +} + +static inline cpumask_t target_cpus(void) +{ +#if defined CONFIG_ES7000_CLUSTERED_APIC + return CPU_MASK_ALL; +#else + return cpumask_of_cpu(smp_processor_id()); +#endif +} + +#if defined CONFIG_ES7000_CLUSTERED_APIC +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE (dest_LowestPrio) +#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ +#define NO_BALANCE_IRQ (1) +#undef WAKE_SECONDARY_VIA_INIT +#define WAKE_SECONDARY_VIA_MIP +#else +#define APIC_DFR_VALUE (APIC_DFR_FLAT) +#define INT_DELIVERY_MODE (dest_Fixed) +#define INT_DEST_MODE (0) /* phys delivery to target procs */ +#define NO_BALANCE_IRQ (0) +#undef APIC_DEST_LOGICAL +#define APIC_DEST_LOGICAL 0x0 +#define WAKE_SECONDARY_VIA_INIT +#endif + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long id; + id = xapic_phys_to_log_apicid(cpu); + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +#ifndef CONFIG_X86_GENERICARCH +extern void enable_apic_mode(void); +#endif + +extern int apic_version [MAX_APICS]; +static inline void setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} + +static inline int apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < NR_CPUS) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + mask = physid_mask_of_physid(id); + ++id; + return mask; +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + + +static inline void setup_portio_remap(void) +{ +} + +extern unsigned int boot_cpu_physical_apicid; +static inline int check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = cpus_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_ES7000_APIC_H */ diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h new file mode 100644 index 00000000000..8b234a3cb85 --- /dev/null +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_ES7000_APICDEF_H +#define __ASM_ES7000_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h new file mode 100644 index 00000000000..632a955fcc0 --- /dev/null +++ b/arch/x86/include/asm/es7000/ipi.h @@ -0,0 +1,24 @@ +#ifndef __ASM_ES7000_IPI_H +#define __ASM_ES7000_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h new file mode 100644 index 00000000000..ed5a3caae14 --- /dev/null +++ b/arch/x86/include/asm/es7000/mpparse.h @@ -0,0 +1,30 @@ +#ifndef __ASM_ES7000_MPPARSE_H +#define __ASM_ES7000_MPPARSE_H + +#include <linux/acpi.h> + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#ifndef CONFIG_X86_GENERICARCH +extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); +extern int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); +#endif + +#ifdef CONFIG_ACPI + +static inline int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h new file mode 100644 index 00000000000..3ffc5a7bf66 --- /dev/null +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -0,0 +1,59 @@ +#ifndef __ASM_ES7000_WAKECPU_H +#define __ASM_ES7000_WAKECPU_H + +/* + * This file copes with machines that wakeup secondary CPUs by the + * INIT, INIT, STARTUP sequence. + */ + +#ifdef CONFIG_ES7000_CLUSTERED_APIC +#define WAKE_SECONDARY_VIA_MIP +#else +#define WAKE_SECONDARY_VIA_INIT +#endif + +#ifdef WAKE_SECONDARY_VIA_MIP +extern int es7000_start_cpu(int cpu, unsigned long eip); +static inline int +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ + int boot_error = 0; + boot_error = es7000_start_cpu(phys_apicid, start_eip); + return boot_error; +} +#endif + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid + +static inline void wait_for_init_deassert(atomic_t *deassert) +{ +#ifdef WAKE_SECONDARY_VIA_INIT + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +/* Nothing to do for most platforms, since cleared by the INIT cycle */ +static inline void smp_callin_clear_local_apic(void) +{ +} + +static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +#if APIC_DEBUG + #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) +#else + #define inquire_remote_apic(apicid) {} +#endif + +#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h new file mode 100644 index 00000000000..53018464aea --- /dev/null +++ b/arch/x86/include/asm/fb.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_FB_H +#define _ASM_X86_FB_H + +#include <linux/fb.h> +#include <linux/fs.h> +#include <asm/page.h> + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + if (boot_cpu_data.x86 > 3) + pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; +} + +#ifdef CONFIG_X86_32 +extern int fb_is_primary_device(struct fb_info *info); +#else +static inline int fb_is_primary_device(struct fb_info *info) { return 0; } +#endif + +#endif /* _ASM_X86_FB_H */ diff --git a/arch/x86/include/asm/fcntl.h b/arch/x86/include/asm/fcntl.h new file mode 100644 index 00000000000..46ab12db573 --- /dev/null +++ b/arch/x86/include/asm/fcntl.h @@ -0,0 +1 @@ +#include <asm-generic/fcntl.h> diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h new file mode 100644 index 00000000000..8668a94f850 --- /dev/null +++ b/arch/x86/include/asm/fixmap.h @@ -0,0 +1,68 @@ +#ifndef _ASM_X86_FIXMAP_H +#define _ASM_X86_FIXMAP_H + +#ifdef CONFIG_X86_32 +# include "fixmap_32.h" +#else +# include "fixmap_64.h" +#endif + +extern int fixmaps_set; + +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); +void native_set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#ifndef CONFIG_PARAVIRT +static inline void __set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags) +{ + native_set_fixmap(idx, phys, flags); +} +#endif + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) + +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) + +#define clear_fixmap(idx) \ + __set_fixmap(idx, 0, __pgprot(0)) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} +#endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h new file mode 100644 index 00000000000..09f29ab5c13 --- /dev/null +++ b/arch/x86/include/asm/fixmap_32.h @@ -0,0 +1,123 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_X86_FIXMAP_32_H +#define _ASM_X86_FIXMAP_32_H + + +/* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) + +#ifndef __ASSEMBLY__ +#include <linux/kernel.h> +#include <asm/acpi.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#ifdef CONFIG_HIGHMEM +#include <linux/threads.h> +#include <asm/kmap_types.h> +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ +enum fixed_addresses { + FIX_HOLE, + FIX_VDSO, + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +#endif +#ifdef CONFIG_X86_VISWS_APIC + FIX_CO_CPU, /* Cobalt timer */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_LI_PCIA, /* Lithium PCI Bridge A */ + FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_X86_F00F_BUG + FIX_F00F_IDT, /* Virtual mapping for IDT */ +#endif +#ifdef CONFIG_X86_CYCLONE_TIMER + FIX_CYCLONE_TIMER, /*cyclone timer register*/ +#endif +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif + __end_of_permanent_fixed_addresses, + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 256 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_SLOTS 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - + (__end_of_permanent_fixed_addresses & 255), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, + FIX_WP_TEST, +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif + __end_of_fixed_addresses +}; + +extern void reserve_top_address(unsigned long reserve); + + +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) + +#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_FIXMAP_32_H */ diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h new file mode 100644 index 00000000000..00a30ab9b1a --- /dev/null +++ b/arch/x86/include/asm/fixmap_64.h @@ -0,0 +1,83 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + */ + +#ifndef _ASM_X86_FIXMAP_64_H +#define _ASM_X86_FIXMAP_64_H + +#include <linux/kernel.h> +#include <asm/acpi.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#include <asm/vsyscall.h> +#include <asm/efi.h> + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +enum fixed_addresses { + VSYSCALL_LAST_PAGE, + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VSYSCALL_HPET, + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + + MAX_EFI_IO_PAGES - 1, +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif + __end_of_permanent_fixed_addresses, +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 256 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_SLOTS 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - + (__end_of_permanent_fixed_addresses & 255), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, + __end_of_fixed_addresses +}; + +#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) + +#endif /* _ASM_X86_FIXMAP_64_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h new file mode 100644 index 00000000000..dbe82a5c5ea --- /dev/null +++ b/arch/x86/include/asm/floppy.h @@ -0,0 +1,281 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef _ASM_X86_FLOPPY_H +#define _ASM_X86_FLOPPY_H + +#include <linux/vmalloc.h> + +/* + * The DMA channel used by the floppy controller cannot access data at + * addresses >= 16MB + * + * Went back to the 1MB limit, as some people had problems with the floppy + * driver otherwise. It doesn't matter much for performance anyway, as most + * floppy accesses go through the track buffer. + */ +#define _CROSS_64KB(a, s, vdma) \ + (!(vdma) && \ + ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) + +#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1) + + +#define SW fd_routine[use_virtual_dma & 1] +#define CSW fd_routine[can_use_virtual_dma & 1] + + +#define fd_inb(port) inb_p(port) +#define fd_outb(value, port) outb_p(value, port) + +#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") +#define fd_free_dma() CSW._free_dma(FLOPPY_DMA) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) +#define fd_get_dma_residue() SW._get_dma_residue(FLOPPY_DMA) +#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size) +#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io) + +#define FLOPPY_CAN_FALLBACK_ON_NODMA + +static int virtual_dma_count; +static int virtual_dma_residue; +static char *virtual_dma_addr; +static int virtual_dma_mode; +static int doing_pdma; + +static irqreturn_t floppy_hardint(int irq, void *dev_id) +{ + unsigned char st; + +#undef TRACE_FLPY_INT + +#ifdef TRACE_FLPY_INT + static int calls; + static int bytes; + static int dma_wait; +#endif + if (!doing_pdma) + return floppy_interrupt(irq, dev_id); + +#ifdef TRACE_FLPY_INT + if (!calls) + bytes = virtual_dma_count; +#endif + + { + int lcount; + char *lptr; + + st = 1; + for (lcount = virtual_dma_count, lptr = virtual_dma_addr; + lcount; lcount--, lptr++) { + st = inb(virtual_dma_port + 4) & 0xa0; + if (st != 0xa0) + break; + if (virtual_dma_mode) + outb_p(*lptr, virtual_dma_port + 5); + else + *lptr = inb_p(virtual_dma_port + 5); + } + virtual_dma_count = lcount; + virtual_dma_addr = lptr; + st = inb(virtual_dma_port + 4); + } + +#ifdef TRACE_FLPY_INT + calls++; +#endif + if (st == 0x20) + return IRQ_HANDLED; + if (!(st & 0x20)) { + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +#ifdef TRACE_FLPY_INT + printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + virtual_dma_count, virtual_dma_residue, calls, bytes, + dma_wait); + calls = 0; + dma_wait = 0; +#endif + doing_pdma = 0; + floppy_interrupt(irq, dev_id); + return IRQ_HANDLED; + } +#ifdef TRACE_FLPY_INT + if (!virtual_dma_count) + dma_wait++; +#endif + return IRQ_HANDLED; +} + +static void fd_disable_dma(void) +{ + if (!(can_use_virtual_dma & 1)) + disable_dma(FLOPPY_DMA); + doing_pdma = 0; + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +} + +static int vdma_request_dma(unsigned int dmanr, const char *device_id) +{ + return 0; +} + +static void vdma_nop(unsigned int dummy) +{ +} + + +static int vdma_get_dma_residue(unsigned int dummy) +{ + return virtual_dma_count + virtual_dma_residue; +} + + +static int fd_request_irq(void) +{ + if (can_use_virtual_dma) + return request_irq(FLOPPY_IRQ, floppy_hardint, + IRQF_DISABLED, "floppy", NULL); + else + return request_irq(FLOPPY_IRQ, floppy_interrupt, + IRQF_DISABLED, "floppy", NULL); +} + +static unsigned long dma_mem_alloc(unsigned long size) +{ + return __get_dma_pages(GFP_KERNEL|__GFP_NORETRY, get_order(size)); +} + + +static unsigned long vdma_mem_alloc(unsigned long size) +{ + return (unsigned long)vmalloc(size); + +} + +#define nodma_mem_alloc(size) vdma_mem_alloc(size) + +static void _fd_dma_mem_free(unsigned long addr, unsigned long size) +{ + if ((unsigned long)addr >= (unsigned long)high_memory) + vfree((void *)addr); + else + free_pages(addr, get_order(size)); +} + +#define fd_dma_mem_free(addr, size) _fd_dma_mem_free(addr, size) + +static void _fd_chose_dma_mode(char *addr, unsigned long size) +{ + if (can_use_virtual_dma == 2) { + if ((unsigned long)addr >= (unsigned long)high_memory || + isa_virt_to_bus(addr) >= 0x1000000 || + _CROSS_64KB(addr, size, 0)) + use_virtual_dma = 1; + else + use_virtual_dma = 0; + } else { + use_virtual_dma = can_use_virtual_dma & 1; + } +} + +#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size) + + +static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + doing_pdma = 1; + virtual_dma_port = io; + virtual_dma_mode = (mode == DMA_MODE_WRITE); + virtual_dma_addr = addr; + virtual_dma_count = size; + virtual_dma_residue = 0; + return 0; +} + +static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) +{ +#ifdef FLOPPY_SANITY_CHECK + if (CROSS_64KB(addr, size)) { + printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); + return -1; + } +#endif + /* actual, physical DMA */ + doing_pdma = 0; + clear_dma_ff(FLOPPY_DMA); + set_dma_mode(FLOPPY_DMA, mode); + set_dma_addr(FLOPPY_DMA, isa_virt_to_bus(addr)); + set_dma_count(FLOPPY_DMA, size); + enable_dma(FLOPPY_DMA); + return 0; +} + +static struct fd_routine_l { + int (*_request_dma)(unsigned int dmanr, const char *device_id); + void (*_free_dma)(unsigned int dmanr); + int (*_get_dma_residue)(unsigned int dummy); + unsigned long (*_dma_mem_alloc)(unsigned long size); + int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); +} fd_routine[] = { + { + request_dma, + free_dma, + get_dma_residue, + dma_mem_alloc, + hard_dma_setup + }, + { + vdma_request_dma, + vdma_nop, + vdma_get_dma_residue, + vdma_mem_alloc, + vdma_dma_setup + } +}; + + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock + * is needed to prevent corrupted CMOS RAM in case "insmod floppy" + * coincides with another rtc CMOS user. Paul G. + */ +#define FLOPPY0_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = (CMOS_READ(0x10) >> 4) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define FLOPPY1_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = CMOS_READ(0x10) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define N_FDC 2 +#define N_DRIVE 8 + +#define EXTRA_FLOPPY_PARAMS + +#endif /* _ASM_X86_FLOPPY_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h new file mode 100644 index 00000000000..06850a7194e --- /dev/null +++ b/arch/x86/include/asm/frame.h @@ -0,0 +1,27 @@ +#ifdef __ASSEMBLY__ + +#include <asm/dwarf2.h> + +/* The annotation hides the frame from the unwinder and makes it look + like a ordinary ebp save/restore. This avoids some special cases for + frame pointer later */ +#ifdef CONFIG_FRAME_POINTER + .macro FRAME + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp,0 + movl %esp,%ebp + .endm + .macro ENDFRAME + popl %ebp + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebp + .endm +#else + .macro FRAME + .endm + .macro ENDFRAME + .endm +#endif + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h new file mode 100644 index 00000000000..47f7e65e6c1 --- /dev/null +++ b/arch/x86/include/asm/ftrace.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_FTRACE_H +#define _ASM_X86_FTRACE_H + +#ifdef CONFIG_FTRACE +#define MCOUNT_ADDR ((long)(mcount)) +#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void mcount(void); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* + * call mcount is "e8 <4 byte offset>" + * The addr points to the 4 byte offset and the caller of this + * function wants the pointer to e8. Simply subtract one. + */ + return addr - 1; +} +#endif + +#endif /* CONFIG_FTRACE */ + +#endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h new file mode 100644 index 00000000000..1f11ce44e95 --- /dev/null +++ b/arch/x86/include/asm/futex.h @@ -0,0 +1,140 @@ +#ifndef _ASM_X86_FUTEX_H +#define _ASM_X86_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> + +#include <asm/asm.h> +#include <asm/errno.h> +#include <asm/processor.h> +#include <asm/system.h> + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\t" insn "\n" \ + "2:\t.section .fixup,\"ax\"\n" \ + "3:\tmov\t%3, %1\n" \ + "\tjmp\t2b\n" \ + "\t.previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "0" (oparg), "1" (0)) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\tmovl %2, %0\n" \ + "\tmovl\t%0, %3\n" \ + "\t" insn "\n" \ + "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "\tjnz\t1b\n" \ + "3:\t.section .fixup,\"ax\"\n" \ + "4:\tmov\t%5, %1\n" \ + "\tjmp\t3b\n" \ + "\t.previous\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=&a" (oldval), "=&r" (ret), \ + "+m" (*uaddr), "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "1" (0)) + +static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines can only support FUTEX_OP_SET */ + if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, + int newval) +{ + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines have no cmpxchg instruction */ + if (boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + "2:\t.section .fixup, \"ax\"\n" + "3:\tmov %2, %0\n" + "\tjmp 2b\n" + "\t.previous\n" + _ASM_EXTABLE(1b, 3b) + : "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "0" (oldval) + : "memory" + ); + + return oldval; +} + +#endif +#endif /* _ASM_X86_FUTEX_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h new file mode 100644 index 00000000000..74252264433 --- /dev/null +++ b/arch/x86/include/asm/gart.h @@ -0,0 +1,73 @@ +#ifndef _ASM_X86_GART_H +#define _ASM_X86_GART_H + +#include <asm/e820.h> + +extern void set_up_gart_resume(u32, u32); + +extern int fallback_aper_order; +extern int fallback_aper_force; +extern int fix_aperture; + +/* PTE bits. */ +#define GPTE_VALID 1 +#define GPTE_COHERENT 2 + +/* Aperture control register bits. */ +#define GARTEN (1<<0) +#define DISGARTCPU (1<<4) +#define DISGARTIO (1<<5) + +/* GART cache control register bits. */ +#define INVGART (1<<0) +#define GARTPTEERR (1<<1) + +/* K8 On-cpu GART registers */ +#define AMD64_GARTAPERTURECTL 0x90 +#define AMD64_GARTAPERTUREBASE 0x94 +#define AMD64_GARTTABLEBASE 0x98 +#define AMD64_GARTCACHECTL 0x9c +#define AMD64_GARTEN (1<<0) + +extern int agp_amd64_init(void); + +static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) +{ + u32 tmp, ctl; + + /* address of the mappings table */ + addr >>= 12; + tmp = (u32) addr<<4; + tmp &= ~0xf; + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); + + /* Enable GART translation for this hammer. */ + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); + ctl |= GARTEN; + ctl &= ~(DISGARTCPU | DISGARTIO); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + +static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) +{ + if (!aper_base) + return 0; + + if (aper_base + aper_size > 0x100000000ULL) { + printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n"); + return 0; + } + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + if (aper_size < min_size) { + printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); + return 0; + } + + return 1; +} + +#endif /* _ASM_X86_GART_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h new file mode 100644 index 00000000000..d48bee663a6 --- /dev/null +++ b/arch/x86/include/asm/genapic.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "genapic_32.h" +#else +# include "genapic_64.h" +#endif diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h new file mode 100644 index 00000000000..5cbd4fcc06f --- /dev/null +++ b/arch/x86/include/asm/genapic_32.h @@ -0,0 +1,126 @@ +#ifndef _ASM_X86_GENAPIC_32_H +#define _ASM_X86_GENAPIC_32_H + +#include <asm/mpspec.h> + +/* + * Generic APIC driver interface. + * + * An straight forward mapping of the APIC related parts of the + * x86 subarchitecture interface to a dynamic object. + * + * This is used by the "generic" x86 subarchitecture. + * + * Copyright 2003 Andi Kleen, SuSE Labs. + */ + +struct mpc_config_bus; +struct mp_config_table; +struct mpc_config_processor; + +struct genapic { + char *name; + int (*probe)(void); + + int (*apic_id_registered)(void); + cpumask_t (*target_cpus)(void); + int int_delivery_mode; + int int_dest_mode; + int ESR_DISABLE; + int apic_destination_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + int no_balance_irq; + int no_ioapic_check; + void (*init_apic_ldr)(void); + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); + + /* mpparse */ + /* When one of the next two hooks returns 1 the genapic + is switched to this. Essentially they are additional probe + functions. */ + int (*mps_oem_check)(struct mp_config_table *mpc, char *oem, + char *productid); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + + unsigned (*get_apic_id)(unsigned long x); + unsigned long apic_id_mask; + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + cpumask_t (*vector_allocation_domain)(int cpu); + +#ifdef CONFIG_SMP + /* ipi */ + void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); +#endif +}; + +#define APICFUNC(x) .x = x, + +/* More functions could be probably marked IPIFUNC and save some space + in UP GENERICARCH kernels, but I don't have the nerve right now + to untangle this mess. -AK */ +#ifdef CONFIG_SMP +#define IPIFUNC(x) APICFUNC(x) +#else +#define IPIFUNC(x) +#endif + +#define APIC_INIT(aname, aprobe) \ +{ \ + .name = aname, \ + .probe = aprobe, \ + .int_delivery_mode = INT_DELIVERY_MODE, \ + .int_dest_mode = INT_DEST_MODE, \ + .no_balance_irq = NO_BALANCE_IRQ, \ + .ESR_DISABLE = esr_disable, \ + .apic_destination_logical = APIC_DEST_LOGICAL, \ + APICFUNC(apic_id_registered) \ + APICFUNC(target_cpus) \ + APICFUNC(check_apicid_used) \ + APICFUNC(check_apicid_present) \ + APICFUNC(init_apic_ldr) \ + APICFUNC(ioapic_phys_id_map) \ + APICFUNC(setup_apic_routing) \ + APICFUNC(multi_timer_check) \ + APICFUNC(apicid_to_node) \ + APICFUNC(cpu_to_logical_apicid) \ + APICFUNC(cpu_present_to_apicid) \ + APICFUNC(apicid_to_cpu_present) \ + APICFUNC(setup_portio_remap) \ + APICFUNC(check_phys_apicid_present) \ + APICFUNC(mps_oem_check) \ + APICFUNC(get_apic_id) \ + .apic_id_mask = APIC_ID_MASK, \ + APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(vector_allocation_domain) \ + APICFUNC(acpi_madt_oem_check) \ + IPIFUNC(send_IPI_mask) \ + IPIFUNC(send_IPI_allbutself) \ + IPIFUNC(send_IPI_all) \ + APICFUNC(enable_apic_mode) \ + APICFUNC(phys_pkg_id) \ +} + +extern struct genapic *genapic; + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; +#define get_uv_system_type() UV_NONE +#define is_uv_system() 0 +#define uv_wakeup_secondary(a, b) 1 +#define uv_system_init() do {} while (0) + + +#endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h new file mode 100644 index 00000000000..13c4e96199e --- /dev/null +++ b/arch/x86/include/asm/genapic_64.h @@ -0,0 +1,58 @@ +#ifndef _ASM_X86_GENAPIC_64_H +#define _ASM_X86_GENAPIC_64_H + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ + +struct genapic { + char *name; + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + u32 int_delivery_mode; + u32 int_dest_mode; + int (*apic_id_registered)(void); + cpumask_t (*target_cpus)(void); + cpumask_t (*vector_allocation_domain)(int cpu); + void (*init_apic_ldr)(void); + /* ipi */ + void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + /* */ + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + unsigned int (*phys_pkg_id)(int index_msb); + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; +}; + +extern struct genapic *genapic; + +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; +extern int acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; +extern enum uv_system_type get_uv_system_type(void); +extern int is_uv_system(void); + +extern struct genapic apic_x2apic_uv_x; +DECLARE_PER_CPU(int, x2apic_extra_bits); +extern void uv_cpu_init(void); +extern void uv_system_init(void); +extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); + +extern void setup_apic_routing(void); + +#endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h new file mode 100644 index 00000000000..ad3c2ed7548 --- /dev/null +++ b/arch/x86/include/asm/geode.h @@ -0,0 +1,253 @@ +/* + * AMD Geode definitions + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_X86_GEODE_H +#define _ASM_X86_GEODE_H + +#include <asm/processor.h> +#include <linux/io.h> + +/* Generic southbridge functions */ + +#define GEODE_DEV_PMS 0 +#define GEODE_DEV_ACPI 1 +#define GEODE_DEV_GPIO 2 +#define GEODE_DEV_MFGPT 3 + +extern int geode_get_dev_base(unsigned int dev); + +/* Useful macros */ +#define geode_pms_base() geode_get_dev_base(GEODE_DEV_PMS) +#define geode_acpi_base() geode_get_dev_base(GEODE_DEV_ACPI) +#define geode_gpio_base() geode_get_dev_base(GEODE_DEV_GPIO) +#define geode_mfgpt_base() geode_get_dev_base(GEODE_DEV_MFGPT) + +/* MSRS */ + +#define MSR_GLIU_P2D_RO0 0x10000029 + +#define MSR_LX_GLD_MSR_CONFIG 0x48002001 +#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data + * sheet has the wrong value */ +#define MSR_GLCP_SYS_RSTPLL 0x4C000014 +#define MSR_GLCP_DOTPLL 0x4C000015 + +#define MSR_LBAR_SMB 0x5140000B +#define MSR_LBAR_GPIO 0x5140000C +#define MSR_LBAR_MFGPT 0x5140000D +#define MSR_LBAR_ACPI 0x5140000E +#define MSR_LBAR_PMS 0x5140000F + +#define MSR_DIVIL_SOFT_RESET 0x51400017 + +#define MSR_PIC_YSEL_LOW 0x51400020 +#define MSR_PIC_YSEL_HIGH 0x51400021 +#define MSR_PIC_ZSEL_LOW 0x51400022 +#define MSR_PIC_ZSEL_HIGH 0x51400023 +#define MSR_PIC_IRQM_LPC 0x51400025 + +#define MSR_MFGPT_IRQ 0x51400028 +#define MSR_MFGPT_NR 0x51400029 +#define MSR_MFGPT_SETUP 0x5140002B + +#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ + +#define MSR_GX_GLD_MSR_CONFIG 0xC0002001 +#define MSR_GX_MSR_PADSEL 0xC0002011 + +/* Resource Sizes */ + +#define LBAR_GPIO_SIZE 0xFF +#define LBAR_MFGPT_SIZE 0x40 +#define LBAR_ACPI_SIZE 0x40 +#define LBAR_PMS_SIZE 0x80 + +/* ACPI registers (PMS block) */ + +/* + * PM1_EN is only valid when VSA is enabled for 16 bit reads. + * When VSA is not enabled, *always* read both PM1_STS and PM1_EN + * with a 32 bit read at offset 0x0 + */ + +#define PM1_STS 0x00 +#define PM1_EN 0x02 +#define PM1_CNT 0x08 +#define PM2_CNT 0x0C +#define PM_TMR 0x10 +#define PM_GPE0_STS 0x18 +#define PM_GPE0_EN 0x1C + +/* PMC registers (PMS block) */ + +#define PM_SSD 0x00 +#define PM_SCXA 0x04 +#define PM_SCYA 0x08 +#define PM_OUT_SLPCTL 0x0C +#define PM_SCLK 0x10 +#define PM_SED 0x1 +#define PM_SCXD 0x18 +#define PM_SCYD 0x1C +#define PM_IN_SLPCTL 0x20 +#define PM_WKD 0x30 +#define PM_WKXD 0x34 +#define PM_RD 0x38 +#define PM_WKXA 0x3C +#define PM_FSD 0x40 +#define PM_TSD 0x44 +#define PM_PSD 0x48 +#define PM_NWKD 0x4C +#define PM_AWKD 0x50 +#define PM_SSC 0x54 + +/* VSA2 magic values */ + +#define VSA_VRC_INDEX 0xAC1C +#define VSA_VRC_DATA 0xAC1E +#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */ +#define VSA_VR_SIGNATURE 0x0003 +#define VSA_VR_MEM_SIZE 0x0200 +#define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ +#define GSW_VSA_SIG 0x534d /* General Software signature */ +/* GPIO */ + +#define GPIO_OUTPUT_VAL 0x00 +#define GPIO_OUTPUT_ENABLE 0x04 +#define GPIO_OUTPUT_OPEN_DRAIN 0x08 +#define GPIO_OUTPUT_INVERT 0x0C +#define GPIO_OUTPUT_AUX1 0x10 +#define GPIO_OUTPUT_AUX2 0x14 +#define GPIO_PULL_UP 0x18 +#define GPIO_PULL_DOWN 0x1C +#define GPIO_INPUT_ENABLE 0x20 +#define GPIO_INPUT_INVERT 0x24 +#define GPIO_INPUT_FILTER 0x28 +#define GPIO_INPUT_EVENT_COUNT 0x2C +#define GPIO_READ_BACK 0x30 +#define GPIO_INPUT_AUX1 0x34 +#define GPIO_EVENTS_ENABLE 0x38 +#define GPIO_LOCK_ENABLE 0x3C +#define GPIO_POSITIVE_EDGE_EN 0x40 +#define GPIO_NEGATIVE_EDGE_EN 0x44 +#define GPIO_POSITIVE_EDGE_STS 0x48 +#define GPIO_NEGATIVE_EDGE_STS 0x4C + +#define GPIO_MAP_X 0xE0 +#define GPIO_MAP_Y 0xE4 +#define GPIO_MAP_Z 0xE8 +#define GPIO_MAP_W 0xEC + +static inline u32 geode_gpio(unsigned int nr) +{ + BUG_ON(nr > 28); + return 1 << nr; +} + +extern void geode_gpio_set(u32, unsigned int); +extern void geode_gpio_clear(u32, unsigned int); +extern int geode_gpio_isset(u32, unsigned int); +extern void geode_gpio_setup_event(unsigned int, int, int); +extern void geode_gpio_set_irq(unsigned int, unsigned int); + +static inline void geode_gpio_event_irq(unsigned int gpio, int pair) +{ + geode_gpio_setup_event(gpio, pair, 0); +} + +static inline void geode_gpio_event_pme(unsigned int gpio, int pair) +{ + geode_gpio_setup_event(gpio, pair, 1); +} + +/* Specific geode tests */ + +static inline int is_geode_gx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 5)); +} + +static inline int is_geode_lx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 10)); +} + +static inline int is_geode(void) +{ + return (is_geode_gx() || is_geode_lx()); +} + +#ifdef CONFIG_MGEODE_LX +extern int geode_has_vsa2(void); +#else +static inline int geode_has_vsa2(void) +{ + return 0; +} +#endif + +/* MFGPTs */ + +#define MFGPT_MAX_TIMERS 8 +#define MFGPT_TIMER_ANY (-1) + +#define MFGPT_DOMAIN_WORKING 1 +#define MFGPT_DOMAIN_STANDBY 2 +#define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY) + +#define MFGPT_CMP1 0 +#define MFGPT_CMP2 1 + +#define MFGPT_EVENT_IRQ 0 +#define MFGPT_EVENT_NMI 1 +#define MFGPT_EVENT_RESET 3 + +#define MFGPT_REG_CMP1 0 +#define MFGPT_REG_CMP2 2 +#define MFGPT_REG_COUNTER 4 +#define MFGPT_REG_SETUP 6 + +#define MFGPT_SETUP_CNTEN (1 << 15) +#define MFGPT_SETUP_CMP2 (1 << 14) +#define MFGPT_SETUP_CMP1 (1 << 13) +#define MFGPT_SETUP_SETUP (1 << 12) +#define MFGPT_SETUP_STOPEN (1 << 11) +#define MFGPT_SETUP_EXTEN (1 << 10) +#define MFGPT_SETUP_REVEN (1 << 5) +#define MFGPT_SETUP_CLKSEL (1 << 4) + +static inline void geode_mfgpt_write(int timer, u16 reg, u16 value) +{ + u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); + outw(value, base + reg + (timer * 8)); +} + +static inline u16 geode_mfgpt_read(int timer, u16 reg) +{ + u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); + return inw(base + reg + (timer * 8)); +} + +extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); +extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); +extern int geode_mfgpt_alloc_timer(int timer, int domain); + +#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) +#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) + +#ifdef CONFIG_GEODE_MFGPT_TIMER +extern int __init mfgpt_timer_setup(void); +#else +static inline int mfgpt_timer_setup(void) { return 0; } +#endif + +#endif /* _ASM_X86_GEODE_H */ diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h new file mode 100644 index 00000000000..49dbfdfa50f --- /dev/null +++ b/arch/x86/include/asm/gpio.h @@ -0,0 +1,56 @@ +/* + * Generic GPIO API implementation for x86. + * + * Derived from the generic GPIO API for powerpc: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov <avorontsov@ru.mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_X86_GPIO_H +#define _ASM_X86_GPIO_H + +#include <asm-generic/gpio.h> + +#ifdef CONFIG_GPIOLIB + +/* + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +/* + * Not implemented, yet. + */ +static inline int gpio_to_irq(unsigned int gpio) +{ + return -ENOSYS; +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_X86_GPIO_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h new file mode 100644 index 00000000000..000787df66e --- /dev/null +++ b/arch/x86/include/asm/hardirq.h @@ -0,0 +1,11 @@ +#ifdef CONFIG_X86_32 +# include "hardirq_32.h" +#else +# include "hardirq_64.h" +#endif + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h new file mode 100644 index 00000000000..5ca135e72f2 --- /dev/null +++ b/arch/x86/include/asm/hardirq_32.h @@ -0,0 +1,28 @@ +#ifndef _ASM_X86_HARDIRQ_32_H +#define _ASM_X86_HARDIRQ_32_H + +#include <linux/threads.h> +#include <linux/irq.h> + +typedef struct { + unsigned int __softirq_pending; + unsigned long idle_timestamp; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +#define __ARCH_IRQ_STAT +#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) + +void ack_bad_irq(unsigned int irq); +#include <linux/irq_cpustat.h> + +#endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h new file mode 100644 index 00000000000..1ba381fc51d --- /dev/null +++ b/arch/x86/include/asm/hardirq_64.h @@ -0,0 +1,23 @@ +#ifndef _ASM_X86_HARDIRQ_64_H +#define _ASM_X86_HARDIRQ_64_H + +#include <linux/threads.h> +#include <linux/irq.h> +#include <asm/pda.h> +#include <asm/apic.h> + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT 1 + +#define local_softirq_pending() read_pda(__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING 1 + +#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) +#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); + +#endif /* _ASM_X86_HARDIRQ_64_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h new file mode 100644 index 00000000000..a3b3b7c3027 --- /dev/null +++ b/arch/x86/include/asm/highmem.h @@ -0,0 +1,82 @@ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +#ifndef _ASM_X86_HIGHMEM_H +#define _ASM_X86_HIGHMEM_H + +#ifdef __KERNEL__ + +#include <linux/interrupt.h> +#include <linux/threads.h> +#include <asm/kmap_types.h> +#include <asm/tlbflush.h> +#include <asm/paravirt.h> + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +/* + * Ordering is: + * + * FIXADDR_TOP + * fixed_addresses + * FIXADDR_START + * temp fixed addresses + * FIXADDR_BOOT_START + * Persistent kmap area + * PKMAP_BASE + * VMALLOC_END + * Vmalloc area + * VMALLOC_START + * high_memory + */ +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +void *kmap(struct page *page); +void kunmap(struct page *page); +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); +void *kmap_atomic(struct page *page, enum km_type type); +void kunmap_atomic(void *kvaddr, enum km_type type); +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); +struct page *kmap_atomic_to_page(void *ptr); + +#ifndef CONFIG_PARAVIRT +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#endif + +#define flush_cache_kmaps() do { } while (0) + +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h new file mode 100644 index 00000000000..1c22cb05ad6 --- /dev/null +++ b/arch/x86/include/asm/hpet.h @@ -0,0 +1,114 @@ +#ifndef _ASM_X86_HPET_H +#define _ASM_X86_HPET_H + +#include <linux/msi.h> + +#ifdef CONFIG_HPET_TIMER + +#define HPET_MMAP_SIZE 1024 + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 + +#define HPET_Tn_CFG(n) (0x100 + 0x20 * n) +#define HPET_Tn_CMP(n) (0x108 + 0x20 * n) +#define HPET_Tn_ROUTE(n) (0x110 + 0x20 * n) + +#define HPET_T0_CFG 0x100 +#define HPET_T0_CMP 0x108 +#define HPET_T0_ROUTE 0x110 +#define HPET_T1_CFG 0x120 +#define HPET_T1_CMP 0x128 +#define HPET_T1_ROUTE 0x130 +#define HPET_T2_CFG 0x140 +#define HPET_T2_CMP 0x148 +#define HPET_T2_ROUTE 0x150 + +#define HPET_ID_REV 0x000000ff +#define HPET_ID_NUMBER 0x00001f00 +#define HPET_ID_64BIT 0x00002000 +#define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_VENDOR 0xffff0000 +#define HPET_ID_NUMBER_SHIFT 8 +#define HPET_ID_VENDOR_SHIFT 16 + +#define HPET_ID_VENDOR_8086 0x8086 + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 +#define HPET_LEGACY_8254 2 +#define HPET_LEGACY_RTC 8 + +#define HPET_TN_LEVEL 0x0002 +#define HPET_TN_ENABLE 0x0004 +#define HPET_TN_PERIODIC 0x0008 +#define HPET_TN_PERIODIC_CAP 0x0010 +#define HPET_TN_64BIT_CAP 0x0020 +#define HPET_TN_SETVAL 0x0040 +#define HPET_TN_32BIT 0x0100 +#define HPET_TN_ROUTE 0x3e00 +#define HPET_TN_FSB 0x4000 +#define HPET_TN_FSB_CAP 0x8000 +#define HPET_TN_ROUTE_SHIFT 9 + +/* Max HPET Period is 10^8 femto sec as in HPET spec */ +#define HPET_MAX_PERIOD 100000000UL +/* + * Min HPET period is 10^5 femto sec just for safety. If it is less than this, + * then 32 bit HPET counter wrapsaround in less than 0.5 sec. + */ +#define HPET_MIN_PERIOD 100000UL + +/* hpet memory map physical address */ +extern unsigned long hpet_address; +extern unsigned long force_hpet_address; +extern int hpet_force_user; +extern int is_hpet_enabled(void); +extern int hpet_enable(void); +extern void hpet_disable(void); +extern unsigned long hpet_readl(unsigned long a); +extern void force_hpet_resume(void); + +extern void hpet_msi_unmask(unsigned int irq); +extern void hpet_msi_mask(unsigned int irq); +extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); +extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); + +#ifdef CONFIG_PCI_MSI +extern int arch_setup_hpet_msi(unsigned int irq); +#else +static inline int arch_setup_hpet_msi(unsigned int irq) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_HPET_EMULATE_RTC + +#include <linux/interrupt.h> + +typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie); +extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, + unsigned char sec); +extern int hpet_set_periodic_freq(unsigned long freq); +extern int hpet_rtc_dropped_irq(void); +extern int hpet_rtc_timer_init(void); +extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); +extern int hpet_register_irq_handler(rtc_irq_handler handler); +extern void hpet_unregister_irq_handler(rtc_irq_handler handler); + +#endif /* CONFIG_HPET_EMULATE_RTC */ + +#else /* CONFIG_HPET_TIMER */ + +static inline int hpet_enable(void) { return 0; } +static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 + +#endif +#endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h new file mode 100644 index 00000000000..439a9acc132 --- /dev/null +++ b/arch/x86/include/asm/hugetlb.h @@ -0,0 +1,93 @@ +#ifndef _ASM_X86_HUGETLB_H +#define _ASM_X86_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h new file mode 100644 index 00000000000..b97aecb0b61 --- /dev/null +++ b/arch/x86/include/asm/hw_irq.h @@ -0,0 +1,131 @@ +#ifndef _ASM_X86_HW_IRQ_H +#define _ASM_X86_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * <tomsoft@informatik.tu-chemnitz.de> + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#include <asm/irq_vectors.h> + +#ifndef __ASSEMBLY__ + +#include <linux/percpu.h> +#include <linux/profile.h> +#include <linux/smp.h> + +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/sections.h> + +#define platform_legacy_irq(irq) ((irq) < 16) + +/* Interrupt handlers registered during init_IRQ */ +extern void apic_timer_interrupt(void); +extern void error_interrupt(void); +extern void spurious_interrupt(void); +extern void thermal_interrupt(void); +extern void reschedule_interrupt(void); + +extern void invalidate_interrupt(void); +extern void invalidate_interrupt0(void); +extern void invalidate_interrupt1(void); +extern void invalidate_interrupt2(void); +extern void invalidate_interrupt3(void); +extern void invalidate_interrupt4(void); +extern void invalidate_interrupt5(void); +extern void invalidate_interrupt6(void); +extern void invalidate_interrupt7(void); + +extern void irq_move_cleanup_interrupt(void); +extern void threshold_interrupt(void); + +extern void call_function_interrupt(void); +extern void call_function_single_interrupt(void); + +/* PIC specific functions */ +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); + +/* IOAPIC */ +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +extern unsigned long io_apic_irqs; + +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void setup_ioapic_dest(void); + +#ifdef CONFIG_X86_64 +extern void enable_IO_APIC(void); +#endif + +/* IPI functions */ +#ifdef CONFIG_X86_32 +extern void send_IPI_self(int vector); +#endif +extern void send_IPI(int dest, int vector); + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +/* EISA */ +extern void eisa_set_level_irq(unsigned int irq); + +/* Voyager functions */ +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); + +/* SMP */ +extern void smp_apic_timer_interrupt(struct pt_regs *); +extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_error_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_SMP +extern void smp_reschedule_interrupt(struct pt_regs *); +extern void smp_call_function_interrupt(struct pt_regs *); +extern void smp_call_function_single_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_invalidate_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); +#endif +#endif + +#ifdef CONFIG_X86_32 +extern void (*const interrupt[NR_VECTORS])(void); +#endif + +typedef int vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); + +#ifdef CONFIG_X86_IO_APIC +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +extern void __setup_vector_irq(int cpu); +#else +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +static inline void __setup_vector_irq(int cpu) {} +#endif + +#endif /* !ASSEMBLY_ */ + +#endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h new file mode 100644 index 00000000000..334b1a885d9 --- /dev/null +++ b/arch/x86/include/asm/hypertransport.h @@ -0,0 +1,45 @@ +#ifndef _ASM_X86_HYPERTRANSPORT_H +#define _ASM_X86_HYPERTRANSPORT_H + +/* + * Constants for x86 Hypertransport Interrupts. + */ + +#define HT_IRQ_LOW_BASE 0xf8000000 + +#define HT_IRQ_LOW_VECTOR_SHIFT 16 +#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000 +#define HT_IRQ_LOW_VECTOR(v) \ + (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK) + +#define HT_IRQ_LOW_DEST_ID_SHIFT 8 +#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00 +#define HT_IRQ_LOW_DEST_ID(v) \ + (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK) + +#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000 +#define HT_IRQ_LOW_DM_LOGICAL 0x0000040 + +#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000 +#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020 + + +#define HT_IRQ_LOW_MT_FIXED 0x0000000 +#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004 +#define HT_IRQ_LOW_MT_SMI 0x0000008 +#define HT_IRQ_LOW_MT_NMI 0x000000c +#define HT_IRQ_LOW_MT_INIT 0x0000010 +#define HT_IRQ_LOW_MT_STARTUP 0x0000014 +#define HT_IRQ_LOW_MT_EXTINT 0x0000018 +#define HT_IRQ_LOW_MT_LINT1 0x000008c +#define HT_IRQ_LOW_MT_LINT0 0x0000098 + +#define HT_IRQ_LOW_IRQ_MASKED 0x0000001 + + +#define HT_IRQ_HIGH_DEST_ID_SHIFT 0 +#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff +#define HT_IRQ_HIGH_DEST_ID(v) \ + ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) + +#endif /* _ASM_X86_HYPERTRANSPORT_H */ diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h new file mode 100644 index 00000000000..48f0004db8c --- /dev/null +++ b/arch/x86/include/asm/i387.h @@ -0,0 +1,400 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_I387_H +#define _ASM_X86_I387_H + +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/regset.h> +#include <linux/hardirq.h> +#include <asm/asm.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> +#include <asm/user.h> +#include <asm/uaccess.h> +#include <asm/xsave.h> + +extern unsigned int sig_xstate_size; +extern void fpu_init(void); +extern void mxcsr_feature_mask_init(void); +extern int init_fpu(struct task_struct *child); +extern asmlinkage void math_state_restore(void); +extern void init_thread_xstate(void); +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); + +extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; + +extern struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +extern unsigned int sig_xstate_ia32_size; +extern struct _fpx_sw_bytes fx_sw_reserved_ia32; +struct _fpstate_ia32; +struct _xstate_ia32; +extern int save_i387_xstate_ia32(void __user *buf); +extern int restore_i387_xstate_ia32(void __user *buf); +#endif + +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + +#ifdef CONFIG_X86_64 + +/* Ignore delayed exceptions from user space */ +static inline void tolerant_fwait(void) +{ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); +} + +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + int err; + + asm volatile("1: rex64/fxrstor (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) +#if 0 /* See comment in __save_init_fpu() below. */ + : [fx] "r" (fx), "m" (*fx), "0" (0)); +#else + : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); +#endif + return err; +} + +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} + +/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. The kernel data segment can be sometimes 0 and sometimes + new user value. Both should be ok. + Use the PDA as safe address because it should be already in L1. */ +static inline void clear_fpu_state(struct task_struct *tsk) +{ + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + /* + * xsave header may indicate the init state of the FP. + */ + if ((task_thread_info(tsk)->status & TS_XSAVE) && + !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + return; + + if (unlikely(fx->swd & X87_FSW_ES)) + asm volatile("fnclex"); + alternative_input(ASM_NOP8 ASM_NOP2, + " emms\n" /* clear stack tags */ + " fildl %%gs:0", /* load to clear state */ + X86_FEATURE_FXSAVE_LEAK); +} + +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +{ + int err; + + asm volatile("1: rex64/fxsave (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), "=m" (*fx) +#if 0 /* See comment in __fxsave_clear() below. */ + : [fx] "r" (fx), "0" (0)); +#else + : [fx] "cdaSDb" (fx), "0" (0)); +#endif + if (unlikely(err) && + __clear_user(fx, sizeof(struct i387_fxsave_struct))) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline void fxsave(struct task_struct *tsk) +{ + /* Using "rex64; fxsave %0" is broken because, if the memory operand + uses any extended registers for addressing, a second REX prefix + will be generated (to the assembler, rex64 followed by semicolon + is a separate instruction), and hence the 64-bitness is lost. */ +#if 0 + /* Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. */ + __asm__ __volatile__("fxsaveq %0" + : "=m" (tsk->thread.xstate->fxsave)); +#elif 0 + /* Using, as a workaround, the properly prefixed form below isn't + accepted by any binutils version so far released, complaining that + the same type of prefix is used twice if an extended register is + needed for addressing (fix submitted to mainline 2005-11-21). */ + __asm__ __volatile__("rex64/fxsave %0" + : "=m" (tsk->thread.xstate->fxsave)); +#else + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__("rex64/fxsave (%1)" + : "=m" (tsk->thread.xstate->fxsave) + : "cdaSDb" (&tsk->thread.xstate->fxsave)); +#endif +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + xsave(tsk); + else + fxsave(tsk); + + clear_fpu_state(tsk); + task_thread_info(tsk)->status &= ~TS_USEDFPU; +} + +#else /* CONFIG_X86_32 */ + +extern void finit(void); + +static inline void tolerant_fwait(void) +{ + asm volatile("fnclex ; fwait"); +} + +static inline void restore_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) { + xrstor_checking(&tsk->thread.xstate->xsave); + return; + } + /* + * The "nop" is needed to make the instructions the same + * length. + */ + alternative_input( + "nop ; frstor %1", + "fxrstor %1", + X86_FEATURE_FXSR, + "m" (tsk->thread.xstate->fxsave)); +} + +/* We need a safe address that is cheap to find and that is already + in L1 during context switch. The best choices are unfortunately + different for UP and SMP */ +#ifdef CONFIG_SMP +#define safe_address (__per_cpu_offset[0]) +#else +#define safe_address (kstat_cpu(0).cpustat.user) +#endif + +/* + * These must be called with preempt disabled + */ +static inline void __save_init_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + xsave(tsk); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + goto end; + + if (unlikely(fx->swd & X87_FSW_ES)) + asm volatile("fnclex"); + + /* + * we can do a simple return here or be paranoid :) + */ + goto clear_state; + } + + /* Use more nops than strictly needed in case the compiler + varies code */ + alternative_input( + "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, + "fxsave %[fx]\n" + "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", + X86_FEATURE_FXSR, + [fx] "m" (tsk->thread.xstate->fxsave), + [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); +clear_state: + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. safe_address is a random variable that should be in L1 */ + alternative_input( + GENERIC_NOP8 GENERIC_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (safe_address)); +end: + task_thread_info(tsk)->status &= ~TS_USEDFPU; +} + +#endif /* CONFIG_X86_64 */ + +/* + * Signal frame handlers... + */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); + +static inline void __unlazy_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + __save_init_fpu(tsk); + stts(); + } else + tsk->fpu_counter = 0; +} + +static inline void __clear_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + tolerant_fwait(); + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } +} + +static inline void kernel_fpu_begin(void) +{ + struct thread_info *me = current_thread_info(); + preempt_disable(); + if (me->status & TS_USEDFPU) + __save_init_fpu(me->task); + else + clts(); +} + +static inline void kernel_fpu_end(void) +{ + stts(); + preempt_enable(); +} + +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context aswell. To prevent these kernel instructions + * in interrupt context interact wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If we are in process context, we are ok to take a spurious DNA fault. + * Otherwise, doing clts() in process context require pre-emption to + * be disabled or some heavy lifting like kernel_fpu_begin() + */ + if (!in_interrupt()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + +#ifdef CONFIG_X86_64 + +static inline void save_init_fpu(struct task_struct *tsk) +{ + __save_init_fpu(tsk); + stts(); +} + +#define unlazy_fpu __unlazy_fpu +#define clear_fpu __clear_fpu + +#else /* CONFIG_X86_32 */ + +/* + * These disable preemption on their own and are safe + */ +static inline void save_init_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __save_init_fpu(tsk); + stts(); + preempt_enable(); +} + +static inline void unlazy_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __unlazy_fpu(tsk); + preempt_enable(); +} + +static inline void clear_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __clear_fpu(tsk); + preempt_enable(); +} + +#endif /* CONFIG_X86_64 */ + +/* + * i387 state interaction + */ +static inline unsigned short get_fpu_cwd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.xstate->fxsave.cwd; + } else { + return (unsigned short)tsk->thread.xstate->fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.xstate->fxsave.swd; + } else { + return (unsigned short)tsk->thread.xstate->fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) +{ + if (cpu_has_xmm) { + return tsk->thread.xstate->fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +#endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h new file mode 100644 index 00000000000..1edbf89680f --- /dev/null +++ b/arch/x86/include/asm/i8253.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_I8253_H +#define _ASM_X86_I8253_H + +/* i8253A PIT registers */ +#define PIT_MODE 0x43 +#define PIT_CH0 0x40 +#define PIT_CH2 0x42 + +extern spinlock_t i8253_lock; + +extern struct clock_event_device *global_clock_event; + +extern void setup_pit_timer(void); + +#define inb_pit inb_p +#define outb_pit outb_p + +#endif /* _ASM_X86_I8253_H */ diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h new file mode 100644 index 00000000000..58d7091eeb1 --- /dev/null +++ b/arch/x86/include/asm/i8259.h @@ -0,0 +1,63 @@ +#ifndef _ASM_X86_I8259_H +#define _ASM_X86_I8259_H + +#include <linux/delay.h> + +extern unsigned int cached_irq_mask; + +#define __byte(x, y) (((unsigned char *)&(y))[x]) +#define cached_master_mask (__byte(0, cached_irq_mask)) +#define cached_slave_mask (__byte(1, cached_irq_mask)) + +/* i8259A PIC registers */ +#define PIC_MASTER_CMD 0x20 +#define PIC_MASTER_IMR 0x21 +#define PIC_MASTER_ISR PIC_MASTER_CMD +#define PIC_MASTER_POLL PIC_MASTER_ISR +#define PIC_MASTER_OCW3 PIC_MASTER_ISR +#define PIC_SLAVE_CMD 0xa0 +#define PIC_SLAVE_IMR 0xa1 + +/* i8259A PIC related value */ +#define PIC_CASCADE_IR 2 +#define MASTER_ICW4_DEFAULT 0x01 +#define SLAVE_ICW4_DEFAULT 0x01 +#define PIC_ICW4_AEOI 2 + +extern spinlock_t i8259A_lock; + +extern void init_8259A(int auto_eoi); +extern void enable_8259A_irq(unsigned int irq); +extern void disable_8259A_irq(unsigned int irq); +extern unsigned int startup_8259A_irq(unsigned int irq); + +/* the PIC may need a careful delay on some platforms, hence specific calls */ +static inline unsigned char inb_pic(unsigned int port) +{ + unsigned char value = inb(port); + + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); + + return value; +} + +static inline void outb_pic(unsigned char value, unsigned int port) +{ + outb(value, port); + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); +} + +extern struct irq_chip i8259A_chip; + +extern void mask_8259A(void); +extern void unmask_8259A(void); + +#endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h new file mode 100644 index 00000000000..97989c0e534 --- /dev/null +++ b/arch/x86/include/asm/ia32.h @@ -0,0 +1,170 @@ +#ifndef _ASM_X86_IA32_H +#define _ASM_X86_IA32_H + + +#ifdef CONFIG_IA32_EMULATION + +#include <linux/compat.h> + +/* + * 32 bit structures for IA32 support. + */ + +#include <asm/sigcontext32.h> + +/* signal.h */ +struct sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ + compat_sigset_t sa_mask; /* A 32 bit mask */ +}; + +struct old_sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + compat_old_sigset_t sa_mask; /* A 32 bit mask */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ +}; + +typedef struct sigaltstack_ia32 { + unsigned int ss_sp; + int ss_flags; + unsigned int ss_size; +} stack_ia32_t; + +struct ucontext_ia32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + struct sigcontext_ia32 uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +/* This matches struct stat64 in glibc2.2, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned int __st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned int st_uid; + unsigned int st_gid; + + unsigned long long st_rdev; + unsigned char __pad3[4]; + + long long st_size; + unsigned int st_blksize; + + long long st_blocks;/* Number 512-byte blocks allocated */ + + unsigned st_atime; + unsigned st_atime_nsec; + unsigned st_mtime; + unsigned st_mtime_nsec; + unsigned st_ctime; + unsigned st_ctime_nsec; + + unsigned long long st_ino; +} __attribute__((packed)); + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[((128 / sizeof(int)) - 3)]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +struct sigframe32 { + u32 pretcode; + int sig; + struct sigcontext_ia32 sc; + struct _fpstate_ia32 fpstate; + unsigned int extramask[_COMPAT_NSIG_WORDS-1]; +}; + +struct rt_sigframe32 { + u32 pretcode; + int sig; + u32 pinfo; + u32 puc; + compat_siginfo_t info; + struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; +}; + +struct ustat32 { + __u32 f_tfree; + compat_ino_t f_tinode; + char f_fname[6]; + char f_fpack[6]; +}; + +#define IA32_STACK_TOP IA32_PAGE_OFFSET + +#ifdef __KERNEL__ +struct linux_binprm; +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); +struct mm_struct; +extern void ia32_pick_mmap_layout(struct mm_struct *mm); + +#endif + +#endif /* !CONFIG_IA32_SUPPORT */ + +#endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h new file mode 100644 index 00000000000..976f6ecd2ce --- /dev/null +++ b/arch/x86/include/asm/ia32_unistd.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_IA32_UNISTD_H +#define _ASM_X86_IA32_UNISTD_H + +/* + * This file contains the system call numbers of the ia32 port, + * this is for the kernel only. + * Only add syscalls here where some part of the kernel needs to know + * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK + */ + +#define __NR_ia32_restart_syscall 0 +#define __NR_ia32_exit 1 +#define __NR_ia32_read 3 +#define __NR_ia32_write 4 +#define __NR_ia32_sigreturn 119 +#define __NR_ia32_rt_sigreturn 173 + +#endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h new file mode 100644 index 00000000000..44c89c3a23e --- /dev/null +++ b/arch/x86/include/asm/idle.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_IDLE_H +#define _ASM_X86_IDLE_H + +#define IDLE_START 1 +#define IDLE_END 2 + +struct notifier_block; +void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); + +void enter_idle(void); +void exit_idle(void); + +void c1e_remove_cpu(int cpu); + +#endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h new file mode 100644 index 00000000000..fa0fd068bc2 --- /dev/null +++ b/arch/x86/include/asm/intel_arch_perfmon.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H +#define _ASM_X86_INTEL_ARCH_PERFMON_H + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h new file mode 100644 index 00000000000..5618a103f39 --- /dev/null +++ b/arch/x86/include/asm/io.h @@ -0,0 +1,91 @@ +#ifndef _ASM_X86_IO_H +#define _ASM_X86_IO_H + +#define ARCH_HAS_IOREMAP_WC + +#include <linux/compiler.h> + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "=q", ) +build_mmio_read(__readw, "w", unsigned short, "=r", ) +build_mmio_read(__readl, "l", unsigned int, "=r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 +build_mmio_read(readq, "q", unsigned long, "=r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "=r", ) +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) + +#define readq_relaxed(a) __readq(a) +#define __raw_readq __readq +#define __raw_writeq writeq + +/* Let people know we have them */ +#define readq readq +#define writeq writeq +#endif + +extern int iommu_bio_merge; + +#ifdef CONFIG_X86_32 +# include "io_32.h" +#else +# include "io_64.h" +#endif + +extern void *xlate_dev_mem_ptr(unsigned long phys); +extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); + +extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, + unsigned long prot_val); +extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); + +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void early_ioremap_init(void); +extern void early_ioremap_clear(void); +extern void early_ioremap_reset(void); +extern void *early_ioremap(unsigned long offset, unsigned long size); +extern void *early_memremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + + +#endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h new file mode 100644 index 00000000000..d8e242e1b39 --- /dev/null +++ b/arch/x86/include/asm/io_32.h @@ -0,0 +1,284 @@ +#ifndef _ASM_X86_IO_32_H +#define _ASM_X86_IO_32_H + +#include <linux/string.h> +#include <linux/compiler.h> + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> + */ + +#define IO_SPACE_LIMIT 0xffff + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ + +#ifdef __KERNEL__ + +#include <asm-generic/iomap.h> + +#include <linux/vmalloc.h> + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline unsigned long virt_to_phys(volatile void *address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + +extern void iounmap(volatile void __iomem *addr); + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + */ +#define isa_virt_to_bus virt_to_phys +#define isa_page_to_bus page_to_phys +#define isa_bus_to_virt phys_to_virt + +/* + * However PCI ones are not necessarily 1:1 and therefore these interfaces + * are forbidden in portable PCI drivers. + * + * Allow them on x86 for legacy drivers, though. + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +static inline void +memset_io(volatile void __iomem *addr, unsigned char val, int count) +{ + memset((void __force *)addr, val, count); +} + +static inline void +memcpy_fromio(void *dst, const volatile void __iomem *src, int count) +{ + __memcpy(dst, (const void __force *)src, count); +} + +static inline void +memcpy_toio(volatile void __iomem *dst, const void *src, int count) +{ + __memcpy((void __force *)dst, src, count); +} + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + asm volatile("lock; addl $0,0(%%esp)": : :"memory"); +} + +#else + +#define flush_write_buffers() do { } while (0) + +#endif + +#endif /* __KERNEL__ */ + +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); + +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> +#else + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} + +#endif + +#define __BUILDIO(bwl, bw, type) \ +static inline void out##bwl(unsigned type value, int port) \ +{ \ + out##bwl##_local(value, port); \ +} \ + \ +static inline unsigned type in##bwl(int port) \ +{ \ + return in##bwl##_local(port); \ +} + +#define BUILDIO(bwl, bw, type) \ +static inline void out##bwl##_local(unsigned type value, int port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static inline unsigned type in##bwl##_local(int port) \ +{ \ + unsigned type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} \ + \ +static inline void out##bwl##_local_p(unsigned type value, int port) \ +{ \ + out##bwl##_local(value, port); \ + slow_down_io(); \ +} \ + \ +static inline unsigned type in##bwl##_local_p(int port) \ +{ \ + unsigned type value = in##bwl##_local(port); \ + slow_down_io(); \ + return value; \ +} \ + \ +__BUILDIO(bwl, bw, type) \ + \ +static inline void out##bwl##_p(unsigned type value, int port) \ +{ \ + out##bwl(value, port); \ + slow_down_io(); \ +} \ + \ +static inline unsigned type in##bwl##_p(int port) \ +{ \ + unsigned type value = in##bwl(port); \ + slow_down_io(); \ + return value; \ +} \ + \ +static inline void outs##bwl(int port, const void *addr, unsigned long count) \ +{ \ + asm volatile("rep; outs" #bwl \ + : "+S"(addr), "+c"(count) : "d"(port)); \ +} \ + \ +static inline void ins##bwl(int port, void *addr, unsigned long count) \ +{ \ + asm volatile("rep; ins" #bwl \ + : "+D"(addr), "+c"(count) : "d"(port)); \ +} + +BUILDIO(b, b, char) +BUILDIO(w, w, short) +BUILDIO(l, , int) + +#endif /* _ASM_X86_IO_32_H */ diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h new file mode 100644 index 00000000000..fea325a1122 --- /dev/null +++ b/arch/x86/include/asm/io_64.h @@ -0,0 +1,244 @@ +#ifndef _ASM_X86_IO_64_H +#define _ASM_X86_IO_64_H + + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> + */ + +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); + +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> +#else + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} +#endif + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s, x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s, s1, s2) \ +asm volatile ("out" #s " %" s1 "0,%" s2 "1" + +#ifndef REALLY_SLOW_IO +#define REALLY_SLOW_IO +#define UNSET_REALLY_SLOW_IO +#endif + +#define __OUT(s, s1, x) \ + __OUT1(s, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ + } \ + __OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ + slow_down_io(); \ +} + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) \ +{ \ + RETURN_TYPE _v; + +#define __IN2(s, s1, s2) \ + asm volatile ("in" #s " %" s2 "1,%" s1 "0" + +#define __IN(s, s1, i...) \ + __IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ + return _v; \ + } \ + __IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ + slow_down_io(); \ + return _v; } + +#ifdef UNSET_REALLY_SLOW_IO +#undef REALLY_SLOW_IO +#endif + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void *addr, \ + unsigned long count) \ +{ \ + asm volatile ("rep ; ins" #s \ + : "=D" (addr), "=c" (count) \ + : "d" (port), "0" (addr), "1" (count)); \ +} + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void *addr, \ + unsigned long count) \ +{ \ + asm volatile ("rep ; outs" #s \ + : "=S" (addr), "=c" (count) \ + : "d" (port), "0" (addr), "1" (count)); \ +} + +#define RETURN_TYPE unsigned char +__IN(b, "") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w, "") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l, "") +#undef RETURN_TYPE + +__OUT(b, "b", char) +__OUT(w, "w", short) +__OUT(l, , int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#define IO_SPACE_LIMIT 0xffff + +#if defined(__KERNEL__) && defined(__x86_64__) + +#include <linux/vmalloc.h> + +#ifndef __i386__ +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void *address) +{ + return __pa(address); +} + +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} +#endif + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +#include <asm-generic/iomap.h> + +/* + * This one maps high address device memory and turns off caching for that area. + * it's useful if some control registers are in such an area and write combining + * or read caching is not desirable: + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + +extern void iounmap(volatile void __iomem *addr); + +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + */ +#define isa_virt_to_bus virt_to_phys +#define isa_page_to_bus page_to_phys +#define isa_bus_to_virt phys_to_virt + +/* + * However PCI ones are not necessarily 1:1 and therefore these interfaces + * are forbidden in portable PCI drivers. + * + * Allow them on x86 for legacy drivers, though. + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +void __memcpy_fromio(void *, unsigned long, unsigned); +void __memcpy_toio(unsigned long, const void *, unsigned); + +static inline void memcpy_fromio(void *to, const volatile void __iomem *from, + unsigned len) +{ + __memcpy_fromio(to, (unsigned long)from, len); +} + +static inline void memcpy_toio(volatile void __iomem *to, const void *from, + unsigned len) +{ + __memcpy_toio((unsigned long)to, from, len); +} + +void memset_io(volatile void __iomem *a, int b, size_t c); + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) + +#define flush_write_buffers() + +#define BIO_VMERGE_BOUNDARY iommu_bio_merge + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_IO_64_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h new file mode 100644 index 00000000000..6afd9933a7d --- /dev/null +++ b/arch/x86/include/asm/io_apic.h @@ -0,0 +1,204 @@ +#ifndef _ASM_X86_IO_APIC_H +#define _ASM_X86_IO_APIC_H + +#include <linux/types.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/irq_vectors.h> + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +/* I/O Unit Redirection Table */ +#define IO_APIC_REDIR_VECTOR_MASK 0x000FF +#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 +#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 +#define IO_APIC_REDIR_SEND_PENDING (1 << 12) +#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) +#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) +#define IO_APIC_REDIR_MASKED (1 << 16) + +/* + * The structure of the IO-APIC: + */ +union IO_APIC_reg_00 { + u32 raw; + struct { + u32 __reserved_2 : 14, + LTS : 1, + delivery_type : 1, + __reserved_1 : 8, + ID : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_01 { + u32 raw; + struct { + u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_02 { + u32 raw; + struct { + u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_03 { + u32 raw; + struct { + u32 boot_DT : 1, + __reserved_1 : 31; + } __attribute__ ((packed)) bits; +}; + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + + __u32 __reserved_3 : 24, + dest : 8; +} __attribute__ ((packed)); + +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + +#ifdef CONFIG_X86_IO_APIC + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + +/* + * MP-BIOS irq configuration table structures: + */ + +#define MP_MAX_IOAPIC_PIN 127 + +struct mp_config_ioapic { + unsigned long mp_apicaddr; + unsigned int mp_apicid; + unsigned char mp_type; + unsigned char mp_apicver; + unsigned char mp_flags; +}; + +struct mp_config_intsrc { + unsigned int mp_dstapic; + unsigned char mp_type; + unsigned char mp_irqtype; + unsigned short mp_irqflag; + unsigned char mp_srcbus; + unsigned char mp_srcbusirq; + unsigned char mp_dstirq; +}; + +/* I/O APIC entries */ +extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +/* Older SiS APIC requires we rewrite the index register */ +extern int sis_apic_bug; + +/* 1 if "noapic" boot option passed */ +extern int skip_ioapic_setup; + +/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ +extern int timer_through_8259; + +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs \ + (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + +#ifdef CONFIG_ACPI +extern int io_apic_get_unique_id(int ioapic, int apic_id); +extern int io_apic_get_version(int ioapic); +extern int io_apic_get_redir_entries(int ioapic); +extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, + int edge_level, int active_high_low); +#endif /* CONFIG_ACPI */ + +extern int (*ioapic_renumber_irq)(int ioapic, int irq); +extern void ioapic_init_mappings(void); + +#ifdef CONFIG_X86_64 +extern int save_mask_IO_APIC_setup(void); +extern void restore_IO_APIC_setup(void); +extern void reinit_intr_remapped_IO_APIC(int); +#endif + +extern int probe_nr_irqs(void); + +#else /* !CONFIG_X86_IO_APIC */ +#define io_apic_assign_pci_irqs 0 +static const int timer_through_8259 = 0; +static inline void ioapic_init_mappings(void) { } + +static inline int probe_nr_irqs(void) +{ + return NR_IRQS; +} +#endif + +#endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/ioctl.h b/arch/x86/include/asm/ioctl.h new file mode 100644 index 00000000000..b279fe06dfe --- /dev/null +++ b/arch/x86/include/asm/ioctl.h @@ -0,0 +1 @@ +#include <asm-generic/ioctl.h> diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h new file mode 100644 index 00000000000..0d5b23b7b06 --- /dev/null +++ b/arch/x86/include/asm/ioctls.h @@ -0,0 +1,94 @@ +#ifndef _ASM_X86_IOCTLS_H +#define _ASM_X86_IOCTLS_H + +#include <asm/ioctl.h> + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */ +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T', 0x2A, struct termios2) +#define TCSETS2 _IOW('T', 0x2B, struct termios2) +#define TCSETSW2 _IOW('T', 0x2C, struct termios2) +#define TCSETSF2 _IOW('T', 0x2D, struct termios2) +#define TIOCGRS485 0x542E +#define TIOCSRS485 0x542F +#define TIOCGPTN _IOR('T', 0x30, unsigned int) + /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ +#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ +#define TCSETX 0x5433 +#define TCSETXF 0x5434 +#define TCSETXW 0x5435 + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* _ASM_X86_IOCTLS_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h new file mode 100644 index 00000000000..98e28ea8cd1 --- /dev/null +++ b/arch/x86/include/asm/iommu.h @@ -0,0 +1,50 @@ +#ifndef _ASM_X86_IOMMU_H +#define _ASM_X86_IOMMU_H + +extern void pci_iommu_shutdown(void); +extern void no_iommu_init(void); +extern struct dma_mapping_ops nommu_dma_ops; +extern int force_iommu, no_iommu; +extern int iommu_detected; +extern int dmar_disabled; +extern int forbid_dac; + +extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); + +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) + +#ifdef CONFIG_GART_IOMMU +extern int gart_iommu_aperture; +extern int gart_iommu_aperture_allowed; +extern int gart_iommu_aperture_disabled; + +extern void early_gart_iommu_check(void); +extern void gart_iommu_init(void); +extern void gart_iommu_shutdown(void); +extern void __init gart_parse_options(char *); +extern void gart_iommu_hole_init(void); + +#else +#define gart_iommu_aperture 0 +#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture_disabled 1 + +static inline void early_gart_iommu_check(void) +{ +} +static inline void gart_iommu_init(void) +{ +} +static inline void gart_iommu_shutdown(void) +{ +} +static inline void gart_parse_options(char *options) +{ +} +static inline void gart_iommu_hole_init(void) +{ +} +#endif + +#endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h new file mode 100644 index 00000000000..ee678fd5159 --- /dev/null +++ b/arch/x86/include/asm/ipcbuf.h @@ -0,0 +1,28 @@ +#ifndef _ASM_X86_IPCBUF_H +#define _ASM_X86_IPCBUF_H + +/* + * The ipc64_perm structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 32-bit mode_t and seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm { + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _ASM_X86_IPCBUF_H */ diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h new file mode 100644 index 00000000000..f89dffb28aa --- /dev/null +++ b/arch/x86/include/asm/ipi.h @@ -0,0 +1,138 @@ +#ifndef _ASM_X86_IPI_H +#define _ASM_X86_IPI_H + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC InterProcessor Interrupt code. + * + * Moved to include file by James Cleverdon from + * arch/x86-64/kernel/smp.c + * + * Copyrights from kernel/smp.c: + * + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, v.2 + */ + +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/smp.h> + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, + unsigned int dest) +{ + unsigned int icr = shortcut | dest; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr |= APIC_DM_NMI; + break; + } + return icr; +} + +static inline int __prepare_ICR2(unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + +static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, + unsigned int dest) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + __xapic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +static inline void __send_IPI_dest_field(unsigned int mask, int vector, + unsigned int dest) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + __xapic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + native_apic_mem_write(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ + local_irq_save(flags); + for_each_cpu_mask_nr(query_cpu, mask) { + __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +#endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h new file mode 100644 index 00000000000..bae0eda9548 --- /dev/null +++ b/arch/x86/include/asm/irq.h @@ -0,0 +1,50 @@ +#ifndef _ASM_X86_IRQ_H +#define _ASM_X86_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * <tomsoft@informatik.tu-chemnitz.de> + */ + +#include <asm/apicdef.h> +#include <asm/irq_vectors.h> + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +#ifdef CONFIG_X86_LOCAL_APIC +# define ARCH_HAS_NMI_WATCHDOG +#endif + +#ifdef CONFIG_4KSTACKS + extern void irq_ctx_init(int cpu); + extern void irq_ctx_exit(int cpu); +# define __ARCH_HAS_DO_SOFTIRQ +#else +# define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) +# ifdef CONFIG_X86_64 +# define __ARCH_HAS_DO_SOFTIRQ +# endif +#endif + +#ifdef CONFIG_IRQBALANCE +extern int irqbalance_disable(char *str); +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#include <linux/cpumask.h> +extern void fixup_irqs(cpumask_t map); +#endif + +extern unsigned int do_IRQ(struct pt_regs *regs); +extern void init_IRQ(void); +extern void native_init_IRQ(void); + +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + +#endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h new file mode 100644 index 00000000000..89c898ab298 --- /dev/null +++ b/arch/x86/include/asm/irq_regs.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "irq_regs_32.h" +#else +# include "irq_regs_64.h" +#endif diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h new file mode 100644 index 00000000000..af2f02d27fc --- /dev/null +++ b/arch/x86/include/asm/irq_regs_32.h @@ -0,0 +1,29 @@ +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge <jeremy@goop.org> + */ +#ifndef _ASM_X86_IRQ_REGS_32_H +#define _ASM_X86_IRQ_REGS_32_H + +#include <asm/percpu.h> + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return x86_read_percpu(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + x86_write_percpu(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h new file mode 100644 index 00000000000..3dd9c0b7027 --- /dev/null +++ b/arch/x86/include/asm/irq_regs_64.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h new file mode 100644 index 00000000000..20e1fd588db --- /dev/null +++ b/arch/x86/include/asm/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_X86_IRQ_REMAPPING_H +#define _ASM_X86_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h new file mode 100644 index 00000000000..d843ed0e9b2 --- /dev/null +++ b/arch/x86/include/asm/irq_vectors.h @@ -0,0 +1,164 @@ +#ifndef _ASM_X86_IRQ_VECTORS_H +#define _ASM_X86_IRQ_VECTORS_H + +#include <linux/threads.h> + +#define NMI_VECTOR 0x02 + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#else +# define IA32_SYSCALL_VECTOR 0x80 +#endif + +/* + * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +/* + * Vectors 0x30-0x3f are used for ISA interrupts. + */ +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#ifdef CONFIG_X86_32 + +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define INVALIDATE_TLB_VECTOR 0xfd +# define RESCHEDULE_VECTOR 0xfc +# define CALL_FUNCTION_VECTOR 0xfb +# define CALL_FUNCTION_SINGLE_VECTOR 0xfa +# define THERMAL_APIC_VECTOR 0xf0 + +#else + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define UV_BAU_MESSAGE 0xf8 +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +#define NUM_INVALIDATE_TLB_VECTORS 8 + +#endif + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x30-0xee) we + * start at 0x31(0x41) to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) + +#define NR_VECTORS 256 + +#define FPU_IRQ 13 + +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#ifdef CONFIG_X86_64 +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif + +#elif !defined(CONFIG_X86_VOYAGER) + +# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) + +# define NR_IRQS 224 + +# else /* IO_APIC || PARAVIRT */ + +# define NR_IRQS 16 + +# endif + +#else /* !VISWS && !VOYAGER */ + +# define NR_IRQS 224 + +#endif /* VISWS */ + +/* Voyager specific defines */ +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 +#define VIC_CALL_FUNCTION_SINGLE_CPI 7 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + + +#endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h new file mode 100644 index 00000000000..2bdab21f089 --- /dev/null +++ b/arch/x86/include/asm/irqflags.h @@ -0,0 +1,211 @@ +#ifndef _X86_IRQFLAGS_H_ +#define _X86_IRQFLAGS_H_ + +#include <asm/processor-flags.h> + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long native_save_fl(void) +{ + unsigned long flags; + + asm volatile("# __raw_save_flags\n\t" + "pushf ; pop %0" + : "=g" (flags) + : /* no input */ + : "memory"); + + return flags; +} + +static inline void native_restore_fl(unsigned long flags) +{ + asm volatile("push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc"); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +#endif + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#ifndef __ASSEMBLY__ + +static inline unsigned long __raw_local_save_flags(void) +{ + return native_save_fl(); +} + +static inline void raw_local_irq_restore(unsigned long flags) +{ + native_restore_fl(flags); +} + +static inline void raw_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline void raw_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + native_halt(); +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} +#else + +#define ENABLE_INTERRUPTS(x) sti +#define DISABLE_INTERRUPTS(x) cli + +#ifdef CONFIG_X86_64 +#define SWAPGS swapgs +/* + * Currently paravirt can't handle swapgs nicely when we + * don't have a stack we can rely on (such as a user space + * stack). So we either find a way around these or just fault + * and emulate if a guest tries to call swapgs directly. + * + * Either way, this is a good way to document that we don't + * have a reliable stack. x86_64 only. + */ +#define SWAPGS_UNSAFE_STACK swapgs + +#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ + +#define INTERRUPT_RETURN iretq +#define USERGS_SYSRET64 \ + swapgs; \ + sysretq; +#define USERGS_SYSRET32 \ + swapgs; \ + sysretl +#define ENABLE_INTERRUPTS_SYSEXIT32 \ + swapgs; \ + sti; \ + sysexit + +#else +#define INTERRUPT_RETURN iret +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif + + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +#else + +#ifdef CONFIG_X86_64 +#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; + +#else +#define ARCH_LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT_IRQ +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h new file mode 100644 index 00000000000..7e5dff1de0e --- /dev/null +++ b/arch/x86/include/asm/ist.h @@ -0,0 +1,34 @@ +#ifndef _ASM_X86_IST_H +#define _ASM_X86_IST_H + +/* + * Include file for the interface to IST BIOS + * Copyright 2002 Andy Grover <andrew.grover@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + + +#include <linux/types.h> + +struct ist_info { + __u32 signature; + __u32 command; + __u32 event; + __u32 perf_level; +}; + +#ifdef __KERNEL__ + +extern struct ist_info ist_info; + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_IST_H */ diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h new file mode 100644 index 00000000000..54c8cc53b24 --- /dev/null +++ b/arch/x86/include/asm/k8.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_K8_H +#define _ASM_X86_K8_H + +#include <linux/pci.h> + +extern struct pci_device_id k8_nb_ids[]; + +extern int early_is_k8_nb(u32 value); +extern struct pci_dev **k8_northbridges; +extern int num_k8_northbridges; +extern int cache_k8_northbridges(void); +extern void k8_flush_garts(void); +extern int k8_scan_nodes(unsigned long start, unsigned long end); + +#endif /* _ASM_X86_K8_H */ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h new file mode 100644 index 00000000000..fa7c0b97476 --- /dev/null +++ b/arch/x86/include/asm/kdebug.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_KDEBUG_H +#define _ASM_X86_KDEBUG_H + +#include <linux/notifier.h> + +struct pt_regs; + +/* Grossly misnamed. */ +enum die_val { + DIE_OOPS = 1, + DIE_INT3, + DIE_DEBUG, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_NMIWATCHDOG, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_NMI_IPI, + DIE_PAGE_FAULT, + DIE_NMIUNKNOWN, +}; + +extern void printk_address(unsigned long address, int reliable); +extern void die(const char *, struct pt_regs *,long); +extern int __must_check __die(const char *, struct pt_regs *, long); +extern void show_registers(struct pt_regs *regs); +extern void show_trace(struct task_struct *t, struct pt_regs *regs, + unsigned long *sp, unsigned long bp); +extern void __show_regs(struct pt_regs *regs, int all); +extern void show_regs(struct pt_regs *regs); +extern unsigned long oops_begin(void); +extern void oops_end(unsigned long, struct pt_regs *, int signr); + +#endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h new file mode 100644 index 00000000000..a1f22771a15 --- /dev/null +++ b/arch/x86/include/asm/kexec.h @@ -0,0 +1,175 @@ +#ifndef _ASM_X86_KEXEC_H +#define _ASM_X86_KEXEC_H + +#ifdef CONFIG_X86_32 +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define VA_PGD 3 +# define PA_PTE_0 4 +# define VA_PTE_0 5 +# define PA_PTE_1 6 +# define VA_PTE_1 7 +# define PA_SWAP_PAGE 8 +# ifdef CONFIG_X86_PAE +# define PA_PMD_0 9 +# define VA_PMD_0 10 +# define PA_PMD_1 11 +# define VA_PMD_1 12 +# define PAGES_NR 13 +# else +# define PAGES_NR 9 +# endif +#else +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define VA_PGD 3 +# define PA_PUD_0 4 +# define VA_PUD_0 5 +# define PA_PMD_0 6 +# define VA_PMD_0 7 +# define PA_PTE_0 8 +# define VA_PTE_0 9 +# define PA_PUD_1 10 +# define VA_PUD_1 11 +# define PA_PMD_1 12 +# define VA_PMD_1 13 +# define PA_PTE_1 14 +# define VA_PTE_1 15 +# define PA_TABLE_PAGE 16 +# define PAGES_NR 17 +#endif + +#ifdef CONFIG_X86_32 +# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +#endif + +#ifndef __ASSEMBLY__ + +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/ptrace.h> + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * So far x86_64 is limited to 40 physical address bits. + */ +#ifdef CONFIG_X86_32 +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +# define KEXEC_CONTROL_PAGE_SIZE 4096 + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_386 + +/* We can also handle crash dumps from 64 bit kernel. */ +# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) +#else +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can use for the control pages */ +# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) + +/* Allocate one page for the pdp and the second for the code */ +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_X86_64 +#endif + +/* + * CPU does not save ss and sp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ +static inline void crash_fixup_ss_esp(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ +#ifdef CONFIG_X86_32 + newregs->sp = (unsigned long)&(oldregs->sp); + asm volatile("xorl %%eax, %%eax\n\t" + "movw %%ss, %%ax\n\t" + :"=a"(newregs->ss)); +#endif +} + +/* + * This function is responsible for capturing register states if coming + * via panic otherwise just fix up the ss and sp if coming via kernel + * mode exception. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + crash_fixup_ss_esp(newregs, oldregs); + } else { +#ifdef CONFIG_X86_32 + asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); + asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); + asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); + asm volatile("movl %%esi,%0" : "=m"(newregs->si)); + asm volatile("movl %%edi,%0" : "=m"(newregs->di)); + asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); + asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); + asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); + asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); + asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); +#else + asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); + asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); + asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); + asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); + asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); + asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); + asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); + asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); + asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); + asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); + asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); + asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); + asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); + asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); + asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); + asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); +#endif + newregs->ip = (unsigned long)current_text_addr(); + } +} + +#ifdef CONFIG_X86_32 +asmlinkage unsigned long +relocate_kernel(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); +#else +NORET_TYPE void +relocate_kernel(unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address) ATTRIB_NORET; +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h new file mode 100644 index 00000000000..e6c6c808489 --- /dev/null +++ b/arch/x86/include/asm/kgdb.h @@ -0,0 +1,79 @@ +#ifndef _ASM_X86_KGDB_H +#define _ASM_X86_KGDB_H + +/* + * Copyright (C) 2001-2004 Amit S. Kale + * Copyright (C) 2008 Wind River Systems, Inc. + */ + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets + * Longer buffer is needed to list all threads + */ +#define BUFMAX 1024 + +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +#ifdef CONFIG_X86_32 +enum regnames { + GDB_AX, /* 0 */ + GDB_CX, /* 1 */ + GDB_DX, /* 2 */ + GDB_BX, /* 3 */ + GDB_SP, /* 4 */ + GDB_BP, /* 5 */ + GDB_SI, /* 6 */ + GDB_DI, /* 7 */ + GDB_PC, /* 8 also known as eip */ + GDB_PS, /* 9 also known as eflags */ + GDB_CS, /* 10 */ + GDB_SS, /* 11 */ + GDB_DS, /* 12 */ + GDB_ES, /* 13 */ + GDB_FS, /* 14 */ + GDB_GS, /* 15 */ +}; +#define NUMREGBYTES ((GDB_GS+1)*4) +#else /* ! CONFIG_X86_32 */ +enum regnames64 { + GDB_AX, /* 0 */ + GDB_BX, /* 1 */ + GDB_CX, /* 2 */ + GDB_DX, /* 3 */ + GDB_SI, /* 4 */ + GDB_DI, /* 5 */ + GDB_BP, /* 6 */ + GDB_SP, /* 7 */ + GDB_R8, /* 8 */ + GDB_R9, /* 9 */ + GDB_R10, /* 10 */ + GDB_R11, /* 11 */ + GDB_R12, /* 12 */ + GDB_R13, /* 13 */ + GDB_R14, /* 14 */ + GDB_R15, /* 15 */ + GDB_PC, /* 16 */ +}; + +enum regnames32 { + GDB_PS = 34, + GDB_CS, + GDB_SS, +}; +#define NUMREGBYTES ((GDB_SS+1)*4) +#endif /* CONFIG_X86_32 */ + +static inline void arch_kgdb_breakpoint(void) +{ + asm(" int $3"); +} +#define BREAK_INSTR_SIZE 1 +#define CACHE_FLUSH_IS_SAFE 1 + +#endif /* _ASM_X86_KGDB_H */ diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h new file mode 100644 index 00000000000..5759c165a5c --- /dev/null +++ b/arch/x86/include/asm/kmap_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_KMAP_TYPES_H +#define _ASM_X86_KMAP_TYPES_H + +#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_TYPE_NR +}; + +#undef D + +#endif /* _ASM_X86_KMAP_TYPES_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h new file mode 100644 index 00000000000..4fe681de1e7 --- /dev/null +++ b/arch/x86/include/asm/kprobes.h @@ -0,0 +1,88 @@ +#ifndef _ASM_X86_KPROBES_H +#define _ASM_X86_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * See arch/x86/kernel/kprobes.c for x86 kprobes history. + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct pt_regs; +struct kprobe; + +typedef u8 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xcc +#define RELATIVEJUMP_INSTRUCTION 0xe9 +#define MAX_INSN_SIZE 16 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) \ + (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ + THREAD_SIZE - (unsigned long)(ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + \ + THREAD_SIZE - (unsigned long)(ADDR))) + +#define flush_insn_slot(p) do { } while (0) + +extern const int kretprobe_blacklist_size; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* + * boostable = -1: This instruction type is not boostable. + * boostable = 0: This instruction type is boostable. + * boostable = 1: This instruction has been boosted: we have + * added a relative jump after the instruction copy in insn, + * so no single-step and fixup are needed (unless there's + * a post_handler or break_handler). + */ + int boostable; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_flags; + unsigned long saved_flags; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_flags; + unsigned long kprobe_saved_flags; + unsigned long *jprobe_saved_sp; + struct pt_regs jprobe_saved_regs; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; + struct prev_kprobe prev_kprobe; +}; + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h new file mode 100644 index 00000000000..b95162af0bf --- /dev/null +++ b/arch/x86/include/asm/kvm.h @@ -0,0 +1,211 @@ +#ifndef _ASM_X86_KVM_H +#define _ASM_X86_KVM_H + +/* + * KVM x86 specific structures and definitions + * + */ + +#include <asm/types.h> +#include <linux/ioctl.h> + +/* Architectural interrupt line count. */ +#define KVM_NR_INTERRUPTS 256 + +struct kvm_memory_alias { + __u32 slot; /* this has a different namespace than memory slots */ + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 target_phys_addr; +}; + +/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ +struct kvm_pic_state { + __u8 last_irr; /* edge detection */ + __u8 irr; /* interrupt request register */ + __u8 imr; /* interrupt mask register */ + __u8 isr; /* interrupt service register */ + __u8 priority_add; /* highest irq priority */ + __u8 irq_base; + __u8 read_reg_select; + __u8 poll; + __u8 special_mask; + __u8 init_state; + __u8 auto_eoi; + __u8 rotate_on_auto_eoi; + __u8 special_fully_nested_mode; + __u8 init4; /* true if 4 byte init */ + __u8 elcr; /* PIIX edge/trigger selection */ + __u8 elcr_mask; +}; + +#define KVM_IOAPIC_NUM_PINS 24 +struct kvm_ioapic_state { + __u64 base_address; + __u32 ioregsel; + __u32 id; + __u32 irr; + __u32 pad; + union { + __u64 bits; + struct { + __u8 vector; + __u8 delivery_mode:3; + __u8 dest_mode:1; + __u8 delivery_status:1; + __u8 polarity:1; + __u8 remote_irr:1; + __u8 trig_mode:1; + __u8 mask:1; + __u8 reserve:7; + __u8 reserved[4]; + __u8 dest_id; + } fields; + } redirtbl[KVM_IOAPIC_NUM_PINS]; +}; + +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 rax, rbx, rcx, rdx; + __u64 rsi, rdi, rsp, rbp; + __u64 r8, r9, r10, r11; + __u64 r12, r13, r14, r15; + __u64 rip, rflags; +}; + +/* for KVM_GET_LAPIC and KVM_SET_LAPIC */ +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + +struct kvm_segment { + __u64 base; + __u32 limit; + __u16 selector; + __u8 type; + __u8 present, dpl, db, s, l, g, avl; + __u8 unusable; + __u8 padding; +}; + +struct kvm_dtable { + __u64 base; + __u16 limit; + __u16 padding[3]; +}; + + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + +struct kvm_msr_entry { + __u32 index; + __u32 reserved; + __u64 data; +}; + +/* for KVM_GET_MSRS and KVM_SET_MSRS */ +struct kvm_msrs { + __u32 nmsrs; /* number of msrs in entries */ + __u32 pad; + + struct kvm_msr_entry entries[0]; +}; + +/* for KVM_GET_MSR_INDEX_LIST */ +struct kvm_msr_list { + __u32 nmsrs; /* number of msrs in entries */ + __u32 indices[0]; +}; + + +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 +#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 +#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 + +/* for KVM_SET_CPUID2 */ +struct kvm_cpuid2 { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry2 entries[0]; +}; + +/* for KVM_GET_PIT and KVM_SET_PIT */ +struct kvm_pit_channel_state { + __u32 count; /* can be 65536 */ + __u16 latched_count; + __u8 count_latched; + __u8 status_latched; + __u8 status; + __u8 read_state; + __u8 write_state; + __u8 write_latch; + __u8 rw_mode; + __u8 mode; + __u8 bcd; + __u8 gate; + __s64 count_load_time; +}; + +struct kvm_pit_state { + struct kvm_pit_channel_state channels[3]; +}; +#endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h new file mode 100644 index 00000000000..65679d00633 --- /dev/null +++ b/arch/x86/include/asm/kvm_host.h @@ -0,0 +1,752 @@ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This header defines architecture specific interfaces, x86 version + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _ASM_X86_KVM_HOST_H +#define _ASM_X86_KVM_HOST_H + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/mmu_notifier.h> + +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> + +#include <asm/pvclock-abi.h> +#include <asm/desc.h> + +#define KVM_MAX_VCPUS 16 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + +#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) +#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) +#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ + 0xFFFFFF0000000000ULL) + +#define KVM_GUEST_CR0_MASK \ + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ + | X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_ON \ + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ + | X86_CR0_MP) +#define KVM_GUEST_CR4_MASK \ + (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) +#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) +#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) + +#define INVALID_PAGE (~(hpa_t)0) +#define UNMAPPED_GVA (~(gpa_t)0) + +/* shadow tables are PAE even on non-PAE hosts */ +#define KVM_HPAGE_SHIFT 21 +#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT) +#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1)) + +#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) + +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + +#define SELECTOR_TI_MASK (1 << 2) +#define SELECTOR_RPL_MASK 0x03 + +#define IOPL_SHIFT 12 + +#define KVM_ALIAS_SLOTS 4 + +#define KVM_PERMILLE_MMU_PAGES 20 +#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MMU_HASH_SHIFT 10 +#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_VAR_MTRR 8 + +extern spinlock_t kvm_lock; +extern struct list_head vm_list; + +struct kvm_vcpu; +struct kvm; + +enum kvm_reg { + VCPU_REGS_RAX = 0, + VCPU_REGS_RCX = 1, + VCPU_REGS_RDX = 2, + VCPU_REGS_RBX = 3, + VCPU_REGS_RSP = 4, + VCPU_REGS_RBP = 5, + VCPU_REGS_RSI = 6, + VCPU_REGS_RDI = 7, +#ifdef CONFIG_X86_64 + VCPU_REGS_R8 = 8, + VCPU_REGS_R9 = 9, + VCPU_REGS_R10 = 10, + VCPU_REGS_R11 = 11, + VCPU_REGS_R12 = 12, + VCPU_REGS_R13 = 13, + VCPU_REGS_R14 = 14, + VCPU_REGS_R15 = 15, +#endif + VCPU_REGS_RIP, + NR_VCPU_REGS +}; + +enum { + VCPU_SREG_ES, + VCPU_SREG_CS, + VCPU_SREG_SS, + VCPU_SREG_DS, + VCPU_SREG_FS, + VCPU_SREG_GS, + VCPU_SREG_TR, + VCPU_SREG_LDTR, +}; + +#include <asm/kvm_x86_emulate.h> + +#define KVM_NR_MEM_OBJS 40 + +struct kvm_guest_debug { + int enabled; + unsigned long bp[4]; + int singlestep; +}; + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +#define NR_PTE_CHAIN_ENTRIES 5 + +struct kvm_pte_chain { + u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; + struct hlist_node link; +}; + +/* + * kvm_mmu_page_role, below, is defined as: + * + * bits 0:3 - total guest paging levels (2-4, or zero for real mode) + * bits 4:7 - page table level for this shadow (1-4) + * bits 8:9 - page table quadrant for 2-level guests + * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bits 17:19 - common access permissions for all ptes in this shadow page + */ +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned glevels:4; + unsigned level:4; + unsigned quadrant:2; + unsigned pad_for_nice_hex_output:6; + unsigned metaphysical:1; + unsigned access:3; + unsigned invalid:1; + }; +}; + +struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ + gfn_t gfn; + union kvm_mmu_page_role role; + + u64 *spt; + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; + unsigned long slot_bitmap; /* One bit set per slot which has memory + * in this shadow page. + */ + int multimapped; /* More than one parent_pte? */ + int root_count; /* Currently serving as active root */ + bool unsync; + bool unsync_children; + union { + u64 *parent_pte; /* !multimapped */ + struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ + }; + DECLARE_BITMAP(unsync_child_bitmap, 512); +}; + +struct kvm_pv_mmu_op_buffer { + void *ptr; + unsigned len; + unsigned processed; + char buf[512] __aligned(sizeof(long)); +}; + +/* + * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level + * 32-bit). The kvm_mmu structure abstracts the details of the current mmu + * mode. + */ +struct kvm_mmu { + void (*new_cr3)(struct kvm_vcpu *vcpu); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + void (*free)(struct kvm_vcpu *vcpu); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); + void (*prefetch_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page); + int (*sync_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); + hpa_t root_hpa; + int root_level; + int shadow_root_level; + + u64 *pae_root; +}; + +struct kvm_vcpu_arch { + u64 host_tsc; + int interrupt_window_open; + unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ + DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); + /* + * rip and regs accesses must go through + * kvm_{register,rip}_{read,write} functions. + */ + unsigned long regs[NR_VCPU_REGS]; + u32 regs_avail; + u32 regs_dirty; + + unsigned long cr0; + unsigned long cr2; + unsigned long cr3; + unsigned long cr4; + unsigned long cr8; + u64 pdptrs[4]; /* pae */ + u64 shadow_efer; + u64 apic_base; + struct kvm_lapic *apic; /* kernel irqchip context */ + int mp_state; + int sipi_vector; + u64 ia32_misc_enable_msr; + bool tpr_access_reporting; + + struct kvm_mmu mmu; + /* only needed in kvm_pv_mmu_op() path, but it's hot so + * put it here to avoid allocation */ + struct kvm_pv_mmu_op_buffer mmu_op_buffer; + + struct kvm_mmu_memory_cache mmu_pte_chain_cache; + struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + struct kvm_mmu_memory_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_page_header_cache; + + gfn_t last_pt_write_gfn; + int last_pt_write_count; + u64 *last_pte_updated; + gfn_t last_pte_gfn; + + struct { + gfn_t gfn; /* presumed gfn during guest pte update */ + pfn_t pfn; /* pfn corresponding to that gfn */ + int largepage; + unsigned long mmu_seq; + } update_pte; + + struct i387_fxsave_struct host_fx_image; + struct i387_fxsave_struct guest_fx_image; + + gva_t mmio_fault_cr2; + struct kvm_pio_request pio; + void *pio_data; + + struct kvm_queued_exception { + bool pending; + bool has_error_code; + u8 nr; + u32 error_code; + } exception; + + struct kvm_queued_interrupt { + bool pending; + u8 nr; + } interrupt; + + struct { + int active; + u8 save_iopl; + struct kvm_save_segment { + u16 selector; + unsigned long base; + u32 limit; + u32 ar; + } tr, es, ds, fs, gs; + } rmode; + int halt_request; /* real mode on Intel only */ + + int cpuid_nent; + struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + /* emulate context */ + + struct x86_emulate_ctxt emulate_ctxt; + + gpa_t time; + struct pvclock_vcpu_time_info hv_clock; + unsigned int hv_clock_tsc_khz; + unsigned int time_offset; + struct page *time_page; + + bool nmi_pending; + bool nmi_injected; + + u64 mtrr[0x100]; +}; + +struct kvm_mem_alias { + gfn_t base_gfn; + unsigned long npages; + gfn_t target_gfn; +}; + +struct kvm_arch{ + int naliases; + struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; + + unsigned int n_free_mmu_pages; + unsigned int n_requested_mmu_pages; + unsigned int n_alloc_mmu_pages; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + /* + * Hash table of struct kvm_mmu_page. + */ + struct list_head active_mmu_pages; + struct list_head assigned_dev_head; + struct dmar_domain *intel_iommu_domain; + struct kvm_pic *vpic; + struct kvm_ioapic *vioapic; + struct kvm_pit *vpit; + struct hlist_head irq_ack_notifier_list; + + int round_robin_prev_vcpu; + unsigned int tss_addr; + struct page *apic_access_page; + + gpa_t wall_clock; + + struct page *ept_identity_pagetable; + bool ept_identity_pagetable_done; +}; + +struct kvm_vm_stat { + u32 mmu_shadow_zapped; + u32 mmu_pte_write; + u32 mmu_pte_updated; + u32 mmu_pde_zapped; + u32 mmu_flooded; + u32 mmu_recycled; + u32 mmu_cache_miss; + u32 mmu_unsync; + u32 remote_tlb_flush; + u32 lpages; +}; + +struct kvm_vcpu_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 nmi_window_exits; + u32 halt_exits; + u32 halt_wakeup; + u32 request_irq_exits; + u32 irq_exits; + u32 host_state_reload; + u32 efer_reload; + u32 fpu_reload; + u32 insn_emulation; + u32 insn_emulation_fail; + u32 hypercalls; + u32 irq_injections; +}; + +struct descriptor_table { + u16 limit; + unsigned long base; +} __attribute__((packed)); + +struct kvm_x86_ops { + int (*cpu_has_kvm_support)(void); /* __init */ + int (*disabled_by_bios)(void); /* __init */ + void (*hardware_enable)(void *dummy); /* __init */ + void (*hardware_disable)(void *dummy); + void (*check_processor_compatibility)(void *rtn); + int (*hardware_setup)(void); /* __init */ + void (*hardware_unsetup)(void); /* __exit */ + bool (*cpu_has_accelerated_tpr)(void); + + /* Create, but do not attach this VCPU */ + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + void (*vcpu_free)(struct kvm_vcpu *vcpu); + int (*vcpu_reset)(struct kvm_vcpu *vcpu); + + void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + void (*vcpu_put)(struct kvm_vcpu *vcpu); + + int (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg); + void (*guest_debug_pre)(struct kvm_vcpu *vcpu); + int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); + void (*get_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + int (*get_cpl)(struct kvm_vcpu *vcpu); + void (*set_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); + void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); + void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, + int *exception); + void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); + unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + + void (*tlb_flush)(struct kvm_vcpu *vcpu); + + void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); + int (*get_irq)(struct kvm_vcpu *vcpu); + void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, + bool has_error_code, u32 error_code); + bool (*exception_injected)(struct kvm_vcpu *vcpu); + void (*inject_pending_irq)(struct kvm_vcpu *vcpu); + void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, + struct kvm_run *run); + + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*get_tdp_level)(void); +}; + +extern struct kvm_x86_ops *kvm_x86_ops; + +int kvm_mmu_module_init(void); +void kvm_mmu_module_exit(void); + +void kvm_mmu_destroy(struct kvm_vcpu *vcpu); +int kvm_mmu_create(struct kvm_vcpu *vcpu); +int kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_mmu_set_base_ptes(u64 base_pte); +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask); + +int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_zap_all(struct kvm *kvm); +unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); + +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); + +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes); +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, + gpa_t addr, unsigned long *ret); + +extern bool tdp_enabled; + +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ + EMULATE_FAIL, /* can't emulate this instruction */ +}; + +#define EMULTYPE_NO_DECODE (1 << 0) +#define EMULTYPE_TRAP_UD (1 << 1) +int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long cr2, u16 error_code, int emulation_type); +void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); +void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); +void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); +void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, + unsigned long *rflags); + +unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); +void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, + unsigned long *rflags); +void kvm_enable_efer_bits(u64); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + +struct x86_emulate_ctxt; + +int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned port); +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int down, + gva_t address, int rep, unsigned port); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); +int emulate_clts(struct kvm_vcpu *vcpu); +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long *dest); +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long value); + +void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg); + +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); + +void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); + +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); + +void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, + u32 error_code); + +void kvm_pic_set_irq(void *opaque, int irq, int level); + +void kvm_inject_nmi(struct kvm_vcpu *vcpu); + +void fx_init(struct kvm_vcpu *vcpu); + +int emulator_read_std(unsigned long addr, + void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); +int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + +unsigned long segment_base(u16 selector); + +void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes); +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +int kvm_mmu_load(struct kvm_vcpu *vcpu); +void kvm_mmu_unload(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +int kvm_fix_hypercall(struct kvm_vcpu *vcpu); + +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); + +void kvm_enable_tdp(void); +void kvm_disable_tdp(void); + +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); +int complete_pio(struct kvm_vcpu *vcpu); + +static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) +{ + struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); + + return (struct kvm_mmu_page *)page_private(page); +} + +static inline u16 kvm_read_fs(void) +{ + u16 seg; + asm("mov %%fs, %0" : "=g"(seg)); + return seg; +} + +static inline u16 kvm_read_gs(void) +{ + u16 seg; + asm("mov %%gs, %0" : "=g"(seg)); + return seg; +} + +static inline u16 kvm_read_ldt(void) +{ + u16 ldt; + asm("sldt %0" : "=g"(ldt)); + return ldt; +} + +static inline void kvm_load_fs(u16 sel) +{ + asm("mov %0, %%fs" : : "rm"(sel)); +} + +static inline void kvm_load_gs(u16 sel) +{ + asm("mov %0, %%gs" : : "rm"(sel)); +} + +static inline void kvm_load_ldt(u16 sel) +{ + asm("lldt %0" : : "rm"(sel)); +} + +static inline void kvm_get_idt(struct descriptor_table *table) +{ + asm("sidt %0" : "=m"(*table)); +} + +static inline void kvm_get_gdt(struct descriptor_table *table) +{ + asm("sgdt %0" : "=m"(*table)); +} + +static inline unsigned long kvm_read_tr_base(void) +{ + u16 tr; + asm("str %0" : "=g"(tr)); + return segment_base(tr); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long read_msr(unsigned long msr) +{ + u64 value; + + rdmsrl(msr, value); + return value; +} +#endif + +static inline void kvm_fx_save(struct i387_fxsave_struct *image) +{ + asm("fxsave (%0)":: "r" (image)); +} + +static inline void kvm_fx_restore(struct i387_fxsave_struct *image) +{ + asm("fxrstor (%0)":: "r" (image)); +} + +static inline void kvm_fx_finit(void) +{ + asm("finit"); +} + +static inline u32 get_rdx_init_val(void) +{ + return 0x600; /* P6 family */ +} + +static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) +{ + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); +} + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + +#define MSR_IA32_TIME_STAMP_COUNTER 0x010 + +#define TSS_IOPB_BASE_OFFSET 0x66 +#define TSS_BASE_SIZE 0x68 +#define TSS_IOPB_SIZE (65536 / 8) +#define TSS_REDIRECTION_SIZE (256 / 8) +#define RMODE_TSS_SIZE \ + (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) + +enum { + TASK_SWITCH_CALL = 0, + TASK_SWITCH_IRET = 1, + TASK_SWITCH_JMP = 2, + TASK_SWITCH_GATE = 3, +}; + +/* + * Hardware virtualization extension instructions may fault if a + * reboot turns off virtualization while processes are running. + * Trap the fault and ignore the instruction if that happens. + */ +asmlinkage void kvm_handle_fault_on_reboot(void); + +#define __kvm_handle_fault_on_reboot(insn) \ + "666: " insn "\n\t" \ + ".pushsection .fixup, \"ax\" \n" \ + "667: \n\t" \ + __ASM_SIZE(push) " $666b \n\t" \ + "jmp kvm_handle_fault_on_reboot \n\t" \ + ".popsection \n\t" \ + ".pushsection __ex_table, \"a\" \n\t" \ + _ASM_PTR " 666b, 667b \n\t" \ + ".popsection" + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); + +#endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h new file mode 100644 index 00000000000..b8a3305ae09 --- /dev/null +++ b/arch/x86/include/asm/kvm_para.h @@ -0,0 +1,147 @@ +#ifndef _ASM_X86_KVM_PARA_H +#define _ASM_X86_KVM_PARA_H + +/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It + * should be used to determine that a VM is running under KVM. + */ +#define KVM_CPUID_SIGNATURE 0x40000000 + +/* This CPUID returns a feature bitmap in eax. Before enabling a particular + * paravirtualization, the appropriate feature bit should be checked. + */ +#define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 +#define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_OP 2 + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 + +#define KVM_MAX_MMU_OP_BATCH 32 + +/* Operations for KVM_HC_MMU_OP */ +#define KVM_MMU_OP_WRITE_PTE 1 +#define KVM_MMU_OP_FLUSH_TLB 2 +#define KVM_MMU_OP_RELEASE_PT 3 + +/* Payload for KVM_HC_MMU_OP */ +struct kvm_mmu_op_header { + __u32 op; + __u32 pad; +}; + +struct kvm_mmu_op_write_pte { + struct kvm_mmu_op_header header; + __u64 pte_phys; + __u64 pte_val; +}; + +struct kvm_mmu_op_flush_tlb { + struct kvm_mmu_op_header header; +}; + +struct kvm_mmu_op_release_pt { + struct kvm_mmu_op_header header; + __u64 pt_phys; +}; + +#ifdef __KERNEL__ +#include <asm/processor.h> + +extern void kvmclock_init(void); + + +/* This instruction is vmcall. On non-VT architectures, it will generate a + * trap that we will then rewrite to the appropriate instruction. + */ +#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" + +/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun + * instruction. The hypervisor may replace it with something else but only the + * instructions are guaranteed to be supported. + * + * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. + * The hypercall number should be placed in rax and the return value will be + * placed in rax. No other registers will be clobbered unless explicited + * noted by the particular hypercall. + */ + +static inline long kvm_hypercall0(unsigned int nr) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr) + : "memory"); + return ret; +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1) + : "memory"); + return ret; +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2) + : "memory"); + return ret; +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4) + : "memory"); + return ret; +} + +static inline int kvm_para_available(void) +{ + unsigned int eax, ebx, ecx, edx; + char signature[13]; + + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); + memcpy(signature + 0, &ebx, 4); + memcpy(signature + 4, &ecx, 4); + memcpy(signature + 8, &edx, 4); + signature[12] = 0; + + if (strcmp(signature, "KVMKVMKVM") == 0) + return 1; + + return 0; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return cpuid_eax(KVM_CPUID_FEATURES); +} + +#endif + +#endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h new file mode 100644 index 00000000000..25179a29f20 --- /dev/null +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -0,0 +1,184 @@ +/****************************************************************************** + * x86_emulate.h + * + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. + * + * Copyright (c) 2005 Keir Fraser + * + * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 + */ + +#ifndef _ASM_X86_KVM_X86_EMULATE_H +#define _ASM_X86_KVM_X86_EMULATE_H + +struct x86_emulate_ctxt; + +/* + * x86_emulate_ops: + * + * These operations represent the instruction emulator's interface to memory. + * There are two categories of operation: those that act on ordinary memory + * regions (*_std), and those that act on memory regions known to require + * special treatment or emulation (*_emulated). + * + * The emulator assumes that an instruction accesses only one 'emulated memory' + * location, that this location is the given linear faulting address (cr2), and + * that this is one of the instruction's data operands. Instruction fetches and + * stack operations are assumed never to access emulated memory. The emulator + * automatically deduces which operand of a string-move operation is accessing + * emulated memory, and assumes that the other operand accesses normal memory. + * + * NOTES: + * 1. The emulator isn't very smart about emulated vs. standard memory. + * 'Emulated memory' access addresses should be checked for sanity. + * 'Normal memory' accesses may fault, and the caller must arrange to + * detect and handle reentrancy into the emulator via recursive faults. + * Accesses may be unaligned and may cross page boundaries. + * 2. If the access fails (cannot emulate, or a standard access faults) then + * it is up to the memop to propagate the fault to the guest VM via + * some out-of-band mechanism, unknown to the emulator. The memop signals + * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will + * then immediately bail. + * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only + * cmpxchg8b_emulated need support 8-byte accesses. + * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. + */ +/* Access completed successfully: continue emulation as normal. */ +#define X86EMUL_CONTINUE 0 +/* Access is unhandleable: bail from emulation and return error to caller. */ +#define X86EMUL_UNHANDLEABLE 1 +/* Terminate emulation but return success to the caller. */ +#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ +#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ +#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ +struct x86_emulate_ops { + /* + * read_std: Read bytes of standard (non-emulated/special) memory. + * Used for instruction fetch, stack operations, and others. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_std)(unsigned long addr, void *val, + unsigned int bytes, struct kvm_vcpu *vcpu); + + /* + * read_emulated: Read bytes from emulated/special memory area. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_emulated)(unsigned long addr, + void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + + /* + * write_emulated: Read bytes from emulated/special memory area. + * @addr: [IN ] Linear address to which to write. + * @val: [IN ] Value to write to memory (low-order bytes used as + * required). + * @bytes: [IN ] Number of bytes to write to memory. + */ + int (*write_emulated)(unsigned long addr, + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); + + /* + * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an + * emulated/special memory area. + * @addr: [IN ] Linear address to access. + * @old: [IN ] Value expected to be current at @addr. + * @new: [IN ] Value to write to @addr. + * @bytes: [IN ] Number of bytes to access using CMPXCHG. + */ + int (*cmpxchg_emulated)(unsigned long addr, + const void *old, + const void *new, + unsigned int bytes, + struct kvm_vcpu *vcpu); + +}; + +/* Type, address-of, and value of an instruction's operand. */ +struct operand { + enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + unsigned int bytes; + unsigned long val, orig_val, *ptr; +}; + +struct fetch_cache { + u8 data[15]; + unsigned long start; + unsigned long end; +}; + +struct decode_cache { + u8 twobyte; + u8 b; + u8 lock_prefix; + u8 rep_prefix; + u8 op_bytes; + u8 ad_bytes; + u8 rex_prefix; + struct operand src; + struct operand dst; + bool has_seg_override; + u8 seg_override; + unsigned int d; + unsigned long regs[NR_VCPU_REGS]; + unsigned long eip; + /* modrm */ + u8 modrm; + u8 modrm_mod; + u8 modrm_reg; + u8 modrm_rm; + u8 use_modrm_ea; + bool rip_relative; + unsigned long modrm_ea; + void *modrm_ptr; + unsigned long modrm_val; + struct fetch_cache fetch; +}; + +struct x86_emulate_ctxt { + /* Register state before/after emulation. */ + struct kvm_vcpu *vcpu; + + /* Linear faulting address (if emulating a page-faulting instruction) */ + unsigned long eflags; + + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ + int mode; + + u32 cs_base; + + /* decode cache */ + + struct decode_cache decode; +}; + +/* Repeat String Operation Prefix */ +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 + +/* Execution mode, passed to the emulator. */ +#define X86EMUL_MODE_REAL 0 /* Real mode. */ +#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ +#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ +#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ + +/* Host execution mode. */ +#if defined(__i386__) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 +#elif defined(CONFIG_X86_64) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 +#endif + +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); + +#endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/ldt.h b/arch/x86/include/asm/ldt.h new file mode 100644 index 00000000000..46727eb37bf --- /dev/null +++ b/arch/x86/include/asm/ldt.h @@ -0,0 +1,40 @@ +/* + * ldt.h + * + * Definitions of structures used with the modify_ldt system call. + */ +#ifndef _ASM_X86_LDT_H +#define _ASM_X86_LDT_H + +/* Maximum number of LDT entries supported. */ +#define LDT_ENTRIES 8192 +/* The size of each LDT entry. */ +#define LDT_ENTRY_SIZE 8 + +#ifndef __ASSEMBLY__ +/* + * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS + * not to the default values if you still want to do syscalls. This + * call is more for 32bit mode therefore. + */ +struct user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +#ifdef __x86_64__ + unsigned int lm:1; +#endif +}; + +#define MODIFY_LDT_CONTENTS_DATA 0 +#define MODIFY_LDT_CONTENTS_STACK 1 +#define MODIFY_LDT_CONTENTS_CODE 2 + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_LDT_H */ diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h new file mode 100644 index 00000000000..d28a507cef3 --- /dev/null +++ b/arch/x86/include/asm/lguest.h @@ -0,0 +1,94 @@ +#ifndef _ASM_X86_LGUEST_H +#define _ASM_X86_LGUEST_H + +#define GDT_ENTRY_LGUEST_CS 10 +#define GDT_ENTRY_LGUEST_DS 11 +#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) +#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) + +#ifndef __ASSEMBLY__ +#include <asm/desc.h> + +#define GUEST_PL 1 + +/* Every guest maps the core switcher code. */ +#define SHARED_SWITCHER_PAGES \ + DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) +/* Pages for switcher itself, then two pages per cpu */ +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) + +/* We map at -4M for ease of mapping into the guest (one PTE page). */ +#define SWITCHER_ADDR 0xFFC00000 + +/* Found in switcher.S */ +extern unsigned long default_idt_entries[]; + +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; + +extern void lguest_iret(void); +extern void lguest_init(void); + +struct lguest_regs { + /* Manually saved part. */ + unsigned long eax, ebx, ecx, edx; + unsigned long esi, edi, ebp; + unsigned long gs; + unsigned long fs, ds, es; + unsigned long trapnum, errcode; + /* Trap pushed part */ + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +}; + +/* This is a guest-specific page (mapped ro) into the guest. */ +struct lguest_ro_state { + /* Host information we need to restore when we switch back. */ + u32 host_cr3; + struct desc_ptr host_idt_desc; + struct desc_ptr host_gdt_desc; + u32 host_sp; + + /* Fields which are used when guest is running. */ + struct desc_ptr guest_idt_desc; + struct desc_ptr guest_gdt_desc; + struct x86_hw_tss guest_tss; + struct desc_struct guest_idt[IDT_ENTRIES]; + struct desc_struct guest_gdt[GDT_ENTRIES]; +}; + +struct lg_cpu_arch { + /* The GDT entries copied into lguest_ro_state when running. */ + struct desc_struct gdt[GDT_ENTRIES]; + + /* The IDT entries: some copied into lguest_ro_state when running. */ + struct desc_struct idt[IDT_ENTRIES]; + + /* The address of the last guest-visible pagefault (ie. cr2). */ + unsigned long last_pagefault; +}; + +static inline void lguest_set_ts(void) +{ + u32 cr0; + + cr0 = read_cr0(); + if (!(cr0 & 8)) + write_cr0(cr0 | 8); +} + +/* Full 4G segment descriptors, suitable for CS and DS. */ +#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } }) +#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } }) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_LGUEST_H */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h new file mode 100644 index 00000000000..43894428c3c --- /dev/null +++ b/arch/x86/include/asm/lguest_hcall.h @@ -0,0 +1,71 @@ +/* Architecture specific portion of the lguest hypercalls */ +#ifndef _ASM_X86_LGUEST_HCALL_H +#define _ASM_X86_LGUEST_HCALL_H + +#define LHCALL_FLUSH_ASYNC 0 +#define LHCALL_LGUEST_INIT 1 +#define LHCALL_SHUTDOWN 2 +#define LHCALL_LOAD_GDT 3 +#define LHCALL_NEW_PGTABLE 4 +#define LHCALL_FLUSH_TLB 5 +#define LHCALL_LOAD_IDT_ENTRY 6 +#define LHCALL_SET_STACK 7 +#define LHCALL_TS 8 +#define LHCALL_SET_CLOCKEVENT 9 +#define LHCALL_HALT 10 +#define LHCALL_SET_PTE 14 +#define LHCALL_SET_PMD 15 +#define LHCALL_LOAD_TLS 16 +#define LHCALL_NOTIFY 17 + +#define LGUEST_TRAP_ENTRY 0x1F + +/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */ +#define LGUEST_SHUTDOWN_POWEROFF 1 +#define LGUEST_SHUTDOWN_RESTART 2 + +#ifndef __ASSEMBLY__ +#include <asm/hw_irq.h> + +/*G:031 But first, how does our Guest contact the Host to ask for privileged + * operations? There are two ways: the direct way is to make a "hypercall", + * to make requests of the Host Itself. + * + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts). Fifteen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %edx, %ebx and %ecx. If a return + * value makes sense, it's returned in %eax. + * + * Grossly invalid calls result in Sudden Death at the hands of the vengeful + * Host, rather than returning failure. This reflects Winston Churchill's + * definition of a gentleman: "someone who is only rude intentionally". */ +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + /* "int" is the Intel instruction to trigger a trap. */ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + /* The call in %eax (aka "a") might be overwritten */ + : "=a"(call) + /* The arguments are in %eax, %edx, %ebx & %ecx */ + : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3) + /* "memory" means this might write somewhere in memory. + * This isn't true for all calls, but it's safe to tell + * gcc that it might happen so it doesn't get clever. */ + : "memory"); + return call; +} +/*:*/ + +/* Can't use our min() macro here: needs to be a constant */ +#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) + +#define LHCALL_RING_SIZE 64 +struct hcall_args { + /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ + unsigned long arg0, arg2, arg3, arg1; +}; + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_LGUEST_HCALL_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h new file mode 100644 index 00000000000..f61ee8f937e --- /dev/null +++ b/arch/x86/include/asm/linkage.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_LINKAGE_H +#define _ASM_X86_LINKAGE_H + +#undef notrace +#define notrace __attribute__((no_instrument_function)) + +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + +#ifdef CONFIG_X86_32 +#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) +/* + * For 32-bit UML - mark functions implemented in assembly that use + * regparm input parameters: + */ +#define asmregparm __attribute__((regparm(3))) + +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + * + * NOTE! On x86-64, all the arguments are in registers, so this + * only matters on a 32-bit kernel. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "g" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4), "g" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4), "g" (arg5), "g" (arg6)) + +#endif + +#ifdef CONFIG_X86_ALIGNMENT_16 +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" +#endif + +#endif /* _ASM_X86_LINKAGE_H */ + diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h new file mode 100644 index 00000000000..47b9b6f1905 --- /dev/null +++ b/arch/x86/include/asm/local.h @@ -0,0 +1,235 @@ +#ifndef _ASM_X86_LOCAL_H +#define _ASM_X86_LOCAL_H + +#include <linux/percpu.h> + +#include <asm/system.h> +#include <asm/atomic.h> +#include <asm/asm.h> + +typedef struct { + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +static inline void local_inc(local_t *l) +{ + asm volatile(_ASM_INC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_dec(local_t *l) +{ + asm volatile(_ASM_DEC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_add(long i, local_t *l) +{ + asm volatile(_ASM_ADD "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +static inline void local_sub(long i, local_t *l) +{ + asm volatile(_ASM_SUB "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_SUB "%2,%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int local_dec_and_test(local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_DEC "%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int local_inc_and_test(local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_INC "%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_ADD "%2,%0; sets %1" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static inline long local_add_return(long i, local_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(_ASM_XADD "%0, %1;" + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read(l); + local_set(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static inline long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i, l); +} + +#define local_inc_return(l) (local_add_return(1, l)) +#define local_dec_return(l) (local_sub_return(1, l)) + +#define local_cmpxchg(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix */ +#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read((l)); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +/* On x86_32, these are no better than the atomic variants. + * On x86-64 these are better than the atomic variants on SMP kernels + * because they dont use a lock prefix. + */ +#define __local_inc(l) local_inc(l) +#define __local_dec(l) local_dec(l) +#define __local_add(i, l) local_add((i), (l)) +#define __local_sub(i, l) local_sub((i), (l)) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * X86_64: This could be done better if we moved the per cpu data directly + * after GS. + */ + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(l) \ +({ \ + local_t res__; \ + preempt_disable(); \ + res__ = (l); \ + preempt_enable(); \ + res__; \ +}) +#define cpu_local_wrap(l) \ +({ \ + preempt_disable(); \ + (l); \ + preempt_enable(); \ +}) \ + +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l)))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l)))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l)))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l)))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l)))) + +#define __cpu_local_inc(l) cpu_local_inc((l)) +#define __cpu_local_dec(l) cpu_local_dec((l)) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) + +#endif /* _ASM_X86_LOCAL_H */ diff --git a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/mach-default/apm.h new file mode 100644 index 00000000000..20370c6db74 --- /dev/null +++ b/arch/x86/include/asm/mach-default/apm.h @@ -0,0 +1,73 @@ +/* + * Machine specific APM BIOS functions for generic. + * Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp> + */ + +#ifndef _ASM_X86_MACH_DEFAULT_APM_H +#define _ASM_X86_MACH_DEFAULT_APM_H + +#ifdef APM_ZERO_SEGS +# define APM_DO_ZERO_SEGS \ + "pushl %%ds\n\t" \ + "pushl %%es\n\t" \ + "xorl %%edx, %%edx\n\t" \ + "mov %%dx, %%ds\n\t" \ + "mov %%dx, %%es\n\t" \ + "mov %%dx, %%fs\n\t" \ + "mov %%dx, %%gs\n\t" +# define APM_DO_POP_SEGS \ + "popl %%es\n\t" \ + "popl %%ds\n\t" +#else +# define APM_DO_ZERO_SEGS +# define APM_DO_POP_SEGS +#endif + +static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + u32 *eax, u32 *ebx, u32 *ecx, + u32 *edx, u32 *esi) +{ + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx), + "=S" (*esi) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); +} + +static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) +{ + int cx, dx, si; + u8 error; + + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx), + "=S" (si) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); + return error; +} + +#endif /* _ASM_X86_MACH_DEFAULT_APM_H */ diff --git a/arch/x86/include/asm/mach-default/do_timer.h b/arch/x86/include/asm/mach-default/do_timer.h new file mode 100644 index 00000000000..23ecda0b28a --- /dev/null +++ b/arch/x86/include/asm/mach-default/do_timer.h @@ -0,0 +1,16 @@ +/* defines for inline arch setup functions */ +#include <linux/clockchips.h> + +#include <asm/i8259.h> +#include <asm/i8253.h> + +/** + * do_timer_interrupt_hook - hook into timer tick + * + * Call the pit clock event handler. see asm/i8253.h + **/ + +static inline void do_timer_interrupt_hook(void) +{ + global_clock_event->event_handler(global_clock_event); +} diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h new file mode 100644 index 00000000000..6b1add8e31d --- /dev/null +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -0,0 +1,36 @@ +/* + * This file is designed to contain the BUILD_INTERRUPT specifications for + * all of the extra named interrupt vectors used by the architecture. + * Usually this is the Inter Process Interrupts (IPIs) + */ + +/* + * The following vectors are part of the Linux architecture, there + * is no hardware IRQ pin equivalent for them, they are triggered + * through the ICC by us (IPIs) + */ +#ifdef CONFIG_X86_SMP +BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) +BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) +BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) +BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) +BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) +#endif + +/* + * every pentium local APIC has two 'local interrupts', with a + * soft-definable vector attached to both interrupts, one of + * which is a timer interrupt, the other one is error counter + * overflow. Linux uses the local APIC timer interrupt to get + * a much simpler SMP time architecture: + */ +#ifdef CONFIG_X86_LOCAL_APIC +BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) +BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) +BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) + +#ifdef CONFIG_X86_MCE_P4THERMAL +BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) +#endif + +#endif diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h new file mode 100644 index 00000000000..ff3a6c236c0 --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -0,0 +1,156 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H +#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H + +#ifdef CONFIG_X86_LOCAL_APIC + +#include <mach_apicdef.h> +#include <asm/smp.h> + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline cpumask_t target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_map; +#else + return cpumask_of_cpu(0); +#endif +} + +#define NO_BALANCE_IRQ (0) +#define esr_disable (0) + +#ifdef CONFIG_X86_64 +#include <asm/genapic.h> +#define INT_DELIVERY_MODE (genapic->int_delivery_mode) +#define INT_DEST_MODE (genapic->int_dest_mode) +#define TARGET_CPUS (genapic->target_cpus()) +#define apic_id_registered (genapic->apic_id_registered) +#define init_apic_ldr (genapic->init_apic_ldr) +#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define phys_pkg_id (genapic->phys_pkg_id) +#define vector_allocation_domain (genapic->vector_allocation_domain) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) +#define send_IPI_self (genapic->send_IPI_self) +extern void setup_apic_routing(void); +#else +#define INT_DELIVERY_MODE dest_LowestPrio +#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define TARGET_CPUS (target_cpus()) +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +static inline int apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + return cpus_addr(cpumask)[0]; +} + +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +static inline void setup_apic_routing(void) +{ +#ifdef CONFIG_X86_IO_APIC + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Flat", nr_ioapics); +#endif +} + +static inline int apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +static inline cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} +#endif + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} + +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; +} + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +static inline void setup_portio_remap(void) +{ +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +static inline void enable_apic_mode(void) +{ +} +#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h new file mode 100644 index 00000000000..53179936d6c --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H +#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H + +#include <asm/apic.h> + +#ifdef CONFIG_X86_64 +#define APIC_ID_MASK (genapic->apic_id_mask) +#define GET_APIC_ID(x) (genapic->get_apic_id(x)) +#define SET_APIC_ID(x) (genapic->set_apic_id(x)) +#else +#define APIC_ID_MASK (0xF<<24) +static inline unsigned get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (APIC_XAPIC(ver)) + return (((x)>>24)&0xFF); + else + return (((x)>>24)&0xF); +} + +#define GET_APIC_ID(x) get_apic_id(x) +#endif + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h new file mode 100644 index 00000000000..fabca01ebac --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -0,0 +1,64 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H +#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H + +/* Avoid include hell */ +#define NMI_VECTOR 0x02 + +void send_IPI_mask_bitmask(cpumask_t mask, int vector); +void __send_IPI_shortcut(unsigned int shortcut, int vector); + +extern int no_broadcast; + +#ifdef CONFIG_X86_64 +#include <asm/genapic.h> +#define send_IPI_mask (genapic->send_IPI_mask) +#else +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_bitmask(mask, vector); +} +#endif + +static inline void __local_send_IPI_allbutself(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) { + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + send_IPI_mask(mask, vector); + } else + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); +} + +static inline void __local_send_IPI_all(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + send_IPI_mask(cpu_online_map, vector); + else + __send_IPI_shortcut(APIC_DEST_ALLINC, vector); +} + +#ifdef CONFIG_X86_64 +#define send_IPI_allbutself (genapic->send_IPI_allbutself) +#define send_IPI_all (genapic->send_IPI_all) +#else +static inline void send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __local_send_IPI_allbutself(vector); + return; +} + +static inline void send_IPI_all(int vector) +{ + __local_send_IPI_all(vector); +} +#endif + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h new file mode 100644 index 00000000000..8c1ea21238a --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_mpparse.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H +#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H + +static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} + + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-default/mach_mpspec.h b/arch/x86/include/asm/mach-default/mach_mpspec.h new file mode 100644 index 00000000000..e85ede686be --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_mpspec.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H +#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H + +#define MAX_IRQ_SOURCES 256 + +#if CONFIG_BASE_SMALL == 0 +#define MAX_MP_BUSSES 256 +#else +#define MAX_MP_BUSSES 32 +#endif + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_timer.h b/arch/x86/include/asm/mach-default/mach_timer.h new file mode 100644 index 00000000000..853728519ae --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_timer.h @@ -0,0 +1,48 @@ +/* + * Machine specific calibrate_tsc() for generic. + * Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp> + */ +/* ------ Calibrate the TSC ------- + * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). + * Too much 64-bit arithmetic here to do this cleanly in C, and for + * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) + * output busy loop as low as possible. We avoid reading the CTC registers + * directly because of the awkward 8-bit access mechanism of the 82C54 + * device. + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TIMER_H +#define _ASM_X86_MACH_DEFAULT_MACH_TIMER_H + +#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ +#define CALIBRATE_LATCH \ + ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) + +static inline void mach_prepare_counter(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ +} + +static inline void mach_countup(unsigned long *count_p) +{ + unsigned long count = 0; + do { + count++; + } while ((inb_p(0x61) & 0x20) == 0); + *count_p = count; +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TIMER_H */ diff --git a/arch/x86/include/asm/mach-default/mach_traps.h b/arch/x86/include/asm/mach-default/mach_traps.h new file mode 100644 index 00000000000..f7920601e47 --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_traps.h @@ -0,0 +1,33 @@ +/* + * Machine specific NMI handling for generic. + * Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp> + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H +#define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H + +#include <asm/mc146818rtc.h> + +static inline unsigned char get_nmi_reason(void) +{ + return inb(0x61); +} + +static inline void reassert_nmi(void) +{ + int old_reg = -1; + + if (do_i_have_lock_cmos()) + old_reg = current_lock_cmos_reg(); + else + lock_cmos(0); /* register doesn't matter here */ + outb(0x8f, 0x70); + inb(0x71); /* dummy */ + outb(0x0f, 0x70); + inb(0x71); /* dummy */ + if (old_reg >= 0) + outb(old_reg, 0x70); + else + unlock_cmos(); +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h new file mode 100644 index 00000000000..d5c0b826a4f --- /dev/null +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H +#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H + +/* + * This file copes with machines that wakeup secondary CPUs by the + * INIT, INIT, STARTUP sequence. + */ + +#define WAKE_SECONDARY_VIA_INIT + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid + +static inline void wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + +/* Nothing to do for most platforms, since cleared by the INIT cycle */ +static inline void smp_callin_clear_local_apic(void) +{ +} + +static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +#if APIC_DEBUG + #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) +#else + #define inquire_remote_apic(apicid) {} +#endif + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-default/pci-functions.h b/arch/x86/include/asm/mach-default/pci-functions.h new file mode 100644 index 00000000000..ed0bab42735 --- /dev/null +++ b/arch/x86/include/asm/mach-default/pci-functions.h @@ -0,0 +1,19 @@ +/* + * PCI BIOS function numbering for conventional PCI BIOS + * systems + */ + +#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX +#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 +#define PCIBIOS_FIND_PCI_DEVICE 0xb102 +#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 +#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 +#define PCIBIOS_READ_CONFIG_BYTE 0xb108 +#define PCIBIOS_READ_CONFIG_WORD 0xb109 +#define PCIBIOS_READ_CONFIG_DWORD 0xb10a +#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b +#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c +#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d +#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e +#define PCIBIOS_SET_PCI_HW_INT 0xb10f + diff --git a/arch/x86/include/asm/mach-default/setup_arch.h b/arch/x86/include/asm/mach-default/setup_arch.h new file mode 100644 index 00000000000..38846208b54 --- /dev/null +++ b/arch/x86/include/asm/mach-default/setup_arch.h @@ -0,0 +1,3 @@ +/* Hook to call BIOS initialisation function */ + +/* no action for generic */ diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h new file mode 100644 index 00000000000..dbab36d64d4 --- /dev/null +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h @@ -0,0 +1,59 @@ +/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws + * which needs to alter them. */ + +static inline void smpboot_clear_io_apic_irqs(void) +{ +#ifdef CONFIG_X86_IO_APIC + io_apic_irqs = 0; +#endif +} + +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) +{ + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + pr_debug("1.\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + pr_debug("2.\n"); + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + pr_debug("3.\n"); +} + +static inline void smpboot_restore_warm_reset_vector(void) +{ + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; +} + +static inline void __init smpboot_setup_io_apic(void) +{ +#ifdef CONFIG_X86_IO_APIC + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); + else { + nr_ioapics = 0; + localise_nmi_watchdog(); + } +#endif +} + +static inline void smpboot_clear_io_apic(void) +{ +#ifdef CONFIG_X86_IO_APIC + nr_ioapics = 0; +#endif +} diff --git a/arch/x86/include/asm/mach-generic/gpio.h b/arch/x86/include/asm/mach-generic/gpio.h new file mode 100644 index 00000000000..995c45efdb3 --- /dev/null +++ b/arch/x86/include/asm/mach-generic/gpio.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_MACH_GENERIC_GPIO_H +#define _ASM_X86_MACH_GENERIC_GPIO_H + +int gpio_request(unsigned gpio, const char *label); +void gpio_free(unsigned gpio); +int gpio_direction_input(unsigned gpio); +int gpio_direction_output(unsigned gpio, int value); +int gpio_get_value(unsigned gpio); +void gpio_set_value(unsigned gpio, int value); +int gpio_to_irq(unsigned gpio); +int irq_to_gpio(unsigned irq); + +#include <asm-generic/gpio.h> /* cansleep wrappers */ + +#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h new file mode 100644 index 00000000000..5180bd7478f --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H +#define _ASM_X86_MACH_GENERIC_MACH_APIC_H + +#include <asm/genapic.h> + +#define esr_disable (genapic->ESR_DISABLE) +#define NO_BALANCE_IRQ (genapic->no_balance_irq) +#define INT_DELIVERY_MODE (genapic->int_delivery_mode) +#define INT_DEST_MODE (genapic->int_dest_mode) +#undef APIC_DEST_LOGICAL +#define APIC_DEST_LOGICAL (genapic->apic_destination_logical) +#define TARGET_CPUS (genapic->target_cpus()) +#define apic_id_registered (genapic->apic_id_registered) +#define init_apic_ldr (genapic->init_apic_ldr) +#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) +#define setup_apic_routing (genapic->setup_apic_routing) +#define multi_timer_check (genapic->multi_timer_check) +#define apicid_to_node (genapic->apicid_to_node) +#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) +#define cpu_present_to_apicid (genapic->cpu_present_to_apicid) +#define apicid_to_cpu_present (genapic->apicid_to_cpu_present) +#define setup_portio_remap (genapic->setup_portio_remap) +#define check_apicid_present (genapic->check_apicid_present) +#define check_phys_apicid_present (genapic->check_phys_apicid_present) +#define check_apicid_used (genapic->check_apicid_used) +#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define vector_allocation_domain (genapic->vector_allocation_domain) +#define enable_apic_mode (genapic->enable_apic_mode) +#define phys_pkg_id (genapic->phys_pkg_id) + +extern void generic_bigsmp_probe(void); + +#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h new file mode 100644 index 00000000000..68041f3802f --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -0,0 +1,11 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H +#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H + +#ifndef APIC_DEFINITION +#include <asm/genapic.h> + +#define GET_APIC_ID (genapic->get_apic_id) +#define APIC_ID_MASK (genapic->apic_id_mask) +#endif + +#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h new file mode 100644 index 00000000000..ffd637e3c3d --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_ipi.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H +#define _ASM_X86_MACH_GENERIC_MACH_IPI_H + +#include <asm/genapic.h> + +#define send_IPI_mask (genapic->send_IPI_mask) +#define send_IPI_allbutself (genapic->send_IPI_allbutself) +#define send_IPI_all (genapic->send_IPI_all) + +#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h new file mode 100644 index 00000000000..048f1d46853 --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H +#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H + + +extern int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); + +#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h new file mode 100644 index 00000000000..bbab5ccfd4f --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_mpspec.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H +#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H + +#define MAX_IRQ_SOURCES 256 + +/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ +/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ +#define MAX_MP_BUSSES 260 + +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); +#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h new file mode 100644 index 00000000000..c210ab5788b --- /dev/null +++ b/arch/x86/include/asm/mach-rdc321x/gpio.h @@ -0,0 +1,60 @@ +#ifndef _ASM_X86_MACH_RDC321X_GPIO_H +#define _ASM_X86_MACH_RDC321X_GPIO_H + +#include <linux/kernel.h> + +extern int rdc_gpio_get_value(unsigned gpio); +extern void rdc_gpio_set_value(unsigned gpio, int value); +extern int rdc_gpio_direction_input(unsigned gpio); +extern int rdc_gpio_direction_output(unsigned gpio, int value); +extern int rdc_gpio_request(unsigned gpio, const char *label); +extern void rdc_gpio_free(unsigned gpio); +extern void __init rdc321x_gpio_setup(void); + +/* Wrappers for the arch-neutral GPIO API */ + +static inline int gpio_request(unsigned gpio, const char *label) +{ + return rdc_gpio_request(gpio, label); +} + +static inline void gpio_free(unsigned gpio) +{ + might_sleep(); + rdc_gpio_free(gpio); +} + +static inline int gpio_direction_input(unsigned gpio) +{ + return rdc_gpio_direction_input(gpio); +} + +static inline int gpio_direction_output(unsigned gpio, int value) +{ + return rdc_gpio_direction_output(gpio, value); +} + +static inline int gpio_get_value(unsigned gpio) +{ + return rdc_gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + rdc_gpio_set_value(gpio, value); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + return gpio; +} + +static inline int irq_to_gpio(unsigned irq) +{ + return irq; +} + +/* For cansleep */ +#include <asm-generic/gpio.h> + +#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */ diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h new file mode 100644 index 00000000000..c8e9c8bed3d --- /dev/null +++ b/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h @@ -0,0 +1,12 @@ +#define PFX "rdc321x: " + +/* General purpose configuration and data registers */ +#define RDC3210_CFGREG_ADDR 0x0CF8 +#define RDC3210_CFGREG_DATA 0x0CFC + +#define RDC321X_GPIO_CTRL_REG1 0x48 +#define RDC321X_GPIO_CTRL_REG2 0x84 +#define RDC321X_GPIO_DATA_REG1 0x4c +#define RDC321X_GPIO_DATA_REG2 0x88 + +#define RDC321X_MAX_GPIO 58 diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h new file mode 100644 index 00000000000..9e5a459fd15 --- /dev/null +++ b/arch/x86/include/asm/mach-voyager/do_timer.h @@ -0,0 +1,17 @@ +/* defines for inline arch setup functions */ +#include <linux/clockchips.h> + +#include <asm/voyager.h> +#include <asm/i8253.h> + +/** + * do_timer_interrupt_hook - hook into timer tick + * + * Call the pit clock event handler. see asm/i8253.h + **/ +static inline void do_timer_interrupt_hook(void) +{ + global_clock_event->event_handler(global_clock_event); + voyager_timer_interrupt(); +} + diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h new file mode 100644 index 00000000000..ae52624b593 --- /dev/null +++ b/arch/x86/include/asm/mach-voyager/entry_arch.h @@ -0,0 +1,26 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ + +/* Copyright (C) 2002 + * + * Author: James.Bottomley@HansenPartnership.com + * + * linux/arch/i386/voyager/entry_arch.h + * + * This file builds the VIC and QIC CPI gates + */ + +/* initialise the voyager interrupt gates + * + * This uses the macros in irq.h to set up assembly jump gates. The + * calls are then redirected to the same routine with smp_ prefixed */ +BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT) +BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT) +BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0); + +/* do all the QIC interrupts */ +BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI); +BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI); +BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI); +BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI); +BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI); +BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI); diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h new file mode 100644 index 00000000000..71729ca05cd --- /dev/null +++ b/arch/x86/include/asm/mach-voyager/setup_arch.h @@ -0,0 +1,12 @@ +#include <asm/voyager.h> +#include <asm/setup.h> +#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \ + (&boot_params.apm_bios_info)) + +/* Hook to call BIOS initialisation function */ + +/* for voyager, pass the voyager BIOS/SUS info area to the detection + * routines */ + +#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO); + diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h new file mode 100644 index 00000000000..5a65b107ad5 --- /dev/null +++ b/arch/x86/include/asm/math_emu.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_MATH_EMU_H +#define _ASM_X86_MATH_EMU_H + +/* This structure matches the layout of the data saved to the stack + following a device-not-present interrupt, part of it saved + automatically by the 80386/80486. + */ +struct info { + long ___orig_eip; + long ___ebx; + long ___ecx; + long ___edx; + long ___esi; + long ___edi; + long ___ebp; + long ___eax; + long ___ds; + long ___es; + long ___fs; + long ___orig_eax; + long ___eip; + long ___cs; + long ___eflags; + long ___esp; + long ___ss; + long ___vm86_es; /* This and the following only in vm86 mode */ + long ___vm86_ds; + long ___vm86_fs; + long ___vm86_gs; +}; +#endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h new file mode 100644 index 00000000000..01fdf5674e2 --- /dev/null +++ b/arch/x86/include/asm/mc146818rtc.h @@ -0,0 +1,104 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_X86_MC146818RTC_H +#define _ASM_X86_MC146818RTC_H + +#include <asm/io.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <linux/mc146818rtc.h> + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG) +/* + * This lock provides nmi access to the CMOS/RTC registers. It has some + * special properties. It is owned by a CPU and stores the index register + * currently being accessed (if owned). The idea here is that it works + * like a normal lock (normally). However, in an NMI, the NMI code will + * first check to see if its CPU owns the lock, meaning that the NMI + * interrupted during the read/write of the device. If it does, it goes ahead + * and performs the access and then restores the index register. If it does + * not, it locks normally. + * + * Note that since we are working with NMIs, we need this lock even in + * a non-SMP machine just to mark that the lock is owned. + * + * This only works with compare-and-swap. There is no other way to + * atomically claim the lock and set the owner. + */ +#include <linux/smp.h> +extern volatile unsigned long cmos_lock; + +/* + * All of these below must be called with interrupts off, preempt + * disabled, etc. + */ + +static inline void lock_cmos(unsigned char reg) +{ + unsigned long new; + new = ((smp_processor_id() + 1) << 8) | reg; + for (;;) { + if (cmos_lock) { + cpu_relax(); + continue; + } + if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0) + return; + } +} + +static inline void unlock_cmos(void) +{ + cmos_lock = 0; +} + +static inline int do_i_have_lock_cmos(void) +{ + return (cmos_lock >> 8) == (smp_processor_id() + 1); +} + +static inline unsigned char current_lock_cmos_reg(void) +{ + return cmos_lock & 0xff; +} + +#define lock_cmos_prefix(reg) \ + do { \ + unsigned long cmos_flags; \ + local_irq_save(cmos_flags); \ + lock_cmos(reg) + +#define lock_cmos_suffix(reg) \ + unlock_cmos(); \ + local_irq_restore(cmos_flags); \ + } while (0) +#else +#define lock_cmos_prefix(reg) do {} while (0) +#define lock_cmos_suffix(reg) do {} while (0) +#define lock_cmos(reg) +#define unlock_cmos() +#define do_i_have_lock_cmos() 0 +#define current_lock_cmos_reg() 0 +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) rtc_cmos_read(addr) +#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr) +unsigned char rtc_cmos_read(unsigned char addr); +void rtc_cmos_write(unsigned char val, unsigned char addr); + +extern int mach_set_rtc_mmss(unsigned long nowtime); +extern unsigned long mach_get_cmos_time(void); + +#define RTC_IRQ 8 + +#endif /* _ASM_X86_MC146818RTC_H */ diff --git a/arch/x86/include/asm/mca.h b/arch/x86/include/asm/mca.h new file mode 100644 index 00000000000..eedbb6cc1ef --- /dev/null +++ b/arch/x86/include/asm/mca.h @@ -0,0 +1,43 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ + +/* Platform specific MCA defines */ +#ifndef _ASM_X86_MCA_H +#define _ASM_X86_MCA_H + +/* Maximal number of MCA slots - actually, some machines have less, but + * they all have sufficient number of POS registers to cover 8. + */ +#define MCA_MAX_SLOT_NR 8 + +/* Most machines have only one MCA bus. The only multiple bus machines + * I know have at most two */ +#define MAX_MCA_BUSSES 2 + +#define MCA_PRIMARY_BUS 0 +#define MCA_SECONDARY_BUS 1 + +/* Dummy slot numbers on primary MCA for integrated functions */ +#define MCA_INTEGSCSI (MCA_MAX_SLOT_NR) +#define MCA_INTEGVIDEO (MCA_MAX_SLOT_NR+1) +#define MCA_MOTHERBOARD (MCA_MAX_SLOT_NR+2) + +/* Dummy POS values for integrated functions */ +#define MCA_DUMMY_POS_START 0x10000 +#define MCA_INTEGSCSI_POS (MCA_DUMMY_POS_START+1) +#define MCA_INTEGVIDEO_POS (MCA_DUMMY_POS_START+2) +#define MCA_MOTHERBOARD_POS (MCA_DUMMY_POS_START+3) + +/* MCA registers */ + +#define MCA_MOTHERBOARD_SETUP_REG 0x94 +#define MCA_ADAPTER_SETUP_REG 0x96 +#define MCA_POS_REG(n) (0x100+(n)) + +#define MCA_ENABLED 0x01 /* POS 2, set if adapter enabled */ + +/* Max number of adapters, including both slots and various integrated + * things. + */ +#define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3) + +#endif /* _ASM_X86_MCA_H */ diff --git a/arch/x86/include/asm/mca_dma.h b/arch/x86/include/asm/mca_dma.h new file mode 100644 index 00000000000..45271aef82d --- /dev/null +++ b/arch/x86/include/asm/mca_dma.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_MCA_DMA_H +#define _ASM_X86_MCA_DMA_H + +#include <asm/io.h> +#include <linux/ioport.h> + +/* + * Microchannel specific DMA stuff. DMA on an MCA machine is fairly similar to + * standard PC dma, but it certainly has its quirks. DMA register addresses + * are in a different place and there are some added functions. Most of this + * should be pretty obvious on inspection. Note that the user must divide + * count by 2 when using 16-bit dma; that is not handled by these functions. + * + * Ramen Noodles are yummy. + * + * 1998 Tymm Twillman <tymm@computer.org> + */ + +/* + * Registers that are used by the DMA controller; FN is the function register + * (tell the controller what to do) and EXE is the execution register (how + * to do it) + */ + +#define MCA_DMA_REG_FN 0x18 +#define MCA_DMA_REG_EXE 0x1A + +/* + * Functions that the DMA controller can do + */ + +#define MCA_DMA_FN_SET_IO 0x00 +#define MCA_DMA_FN_SET_ADDR 0x20 +#define MCA_DMA_FN_GET_ADDR 0x30 +#define MCA_DMA_FN_SET_COUNT 0x40 +#define MCA_DMA_FN_GET_COUNT 0x50 +#define MCA_DMA_FN_GET_STATUS 0x60 +#define MCA_DMA_FN_SET_MODE 0x70 +#define MCA_DMA_FN_SET_ARBUS 0x80 +#define MCA_DMA_FN_MASK 0x90 +#define MCA_DMA_FN_RESET_MASK 0xA0 +#define MCA_DMA_FN_MASTER_CLEAR 0xD0 + +/* + * Modes (used by setting MCA_DMA_FN_MODE in the function register) + * + * Note that the MODE_READ is read from memory (write to device), and + * MODE_WRITE is vice-versa. + */ + +#define MCA_DMA_MODE_XFER 0x04 /* read by default */ +#define MCA_DMA_MODE_READ 0x04 /* same as XFER */ +#define MCA_DMA_MODE_WRITE 0x08 /* OR with MODE_XFER to use */ +#define MCA_DMA_MODE_IO 0x01 /* DMA from IO register */ +#define MCA_DMA_MODE_16 0x40 /* 16 bit xfers */ + + +/** + * mca_enable_dma - channel to enable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static inline void mca_enable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_RESET_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_disble_dma - channel to disable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static inline void mca_disable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_set_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * @a: 24bit bus address + * + * Load the address register in the DMA controller. This has a 24bit + * limitation (16Mb). + */ + +static inline void mca_set_dma_addr(unsigned int dmanr, unsigned int a) +{ + outb(MCA_DMA_FN_SET_ADDR | dmanr, MCA_DMA_REG_FN); + outb(a & 0xff, MCA_DMA_REG_EXE); + outb((a >> 8) & 0xff, MCA_DMA_REG_EXE); + outb((a >> 16) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * + * Read the address register in the DMA controller. This has a 24bit + * limitation (16Mb). The return is a bus address. + */ + +static inline unsigned int mca_get_dma_addr(unsigned int dmanr) +{ + unsigned int addr; + + outb(MCA_DMA_FN_GET_ADDR | dmanr, MCA_DMA_REG_FN); + addr = inb(MCA_DMA_REG_EXE); + addr |= inb(MCA_DMA_REG_EXE) << 8; + addr |= inb(MCA_DMA_REG_EXE) << 16; + + return addr; +} + +/** + * mca_set_dma_count - load a 16bit transfer count + * @dmanr: DMA channel + * @count: count + * + * Set the DMA count for this channel. This can be up to 64Kbytes. + * Setting a count of zero will not do what you expect. + */ + +static inline void mca_set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; /* transfers one more than count -- correct for this */ + + outb(MCA_DMA_FN_SET_COUNT | dmanr, MCA_DMA_REG_FN); + outb(count & 0xff, MCA_DMA_REG_EXE); + outb((count >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_residue - get the remaining bytes to transfer + * @dmanr: DMA channel + * + * This function returns the number of bytes left to transfer + * on this DMA channel. + */ + +static inline unsigned int mca_get_dma_residue(unsigned int dmanr) +{ + unsigned short count; + + outb(MCA_DMA_FN_GET_COUNT | dmanr, MCA_DMA_REG_FN); + count = 1 + inb(MCA_DMA_REG_EXE); + count += inb(MCA_DMA_REG_EXE) << 8; + + return count; +} + +/** + * mca_set_dma_io - set the port for an I/O transfer + * @dmanr: DMA channel + * @io_addr: an I/O port number + * + * Unlike the ISA bus DMA controllers the DMA on MCA bus can transfer + * with an I/O port target. + */ + +static inline void mca_set_dma_io(unsigned int dmanr, unsigned int io_addr) +{ + /* + * DMA from a port address -- set the io address + */ + + outb(MCA_DMA_FN_SET_IO | dmanr, MCA_DMA_REG_FN); + outb(io_addr & 0xff, MCA_DMA_REG_EXE); + outb((io_addr >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_set_dma_mode - set the DMA mode + * @dmanr: DMA channel + * @mode: mode to set + * + * The DMA controller supports several modes. The mode values you can + * set are- + * + * %MCA_DMA_MODE_READ when reading from the DMA device. + * + * %MCA_DMA_MODE_WRITE to writing to the DMA device. + * + * %MCA_DMA_MODE_IO to do DMA to or from an I/O port. + * + * %MCA_DMA_MODE_16 to do 16bit transfers. + */ + +static inline void mca_set_dma_mode(unsigned int dmanr, unsigned int mode) +{ + outb(MCA_DMA_FN_SET_MODE | dmanr, MCA_DMA_REG_FN); + outb(mode, MCA_DMA_REG_EXE); +} + +#endif /* _ASM_X86_MCA_DMA_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h new file mode 100644 index 00000000000..1d6e17c2f23 --- /dev/null +++ b/arch/x86/include/asm/mce.h @@ -0,0 +1,130 @@ +#ifndef _ASM_X86_MCE_H +#define _ASM_X86_MCE_H + +#ifdef __x86_64__ + +#include <asm/ioctls.h> +#include <asm/types.h> + +/* + * Machine Check support for x86 + */ + +#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ + +#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ + +#define MCI_STATUS_VAL (1UL<<63) /* valid error */ +#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1UL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ + +/* Fields are zero when not available */ +struct mce { + __u64 status; + __u64 misc; + __u64 addr; + __u64 mcgstatus; + __u64 ip; + __u64 tsc; /* cpu time stamp counter */ + __u64 res1; /* for future extension */ + __u64 res2; /* dito. */ + __u8 cs; /* code segment */ + __u8 bank; /* machine check bank */ + __u8 cpu; /* cpu that raised the error */ + __u8 finished; /* entry is valid */ + __u32 pad; +}; + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ + +#define MCE_LOG_LEN 32 + +struct mce_log { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = MCE_LOG_LEN */ + unsigned next; + unsigned flags; + unsigned pad0; + struct mce entry[MCE_LOG_LEN]; +}; + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +#define MCE_GET_RECORD_LEN _IOR('M', 1, int) +#define MCE_GET_LOG_LEN _IOR('M', 2, int) +#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) + +/* Software defined banks */ +#define MCE_EXTENDED_BANK 128 +#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 + +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ +#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) +#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) +#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) +#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) +#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) +#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) +#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) + +#endif /* __x86_64__ */ + +#ifdef __KERNEL__ + +#ifdef CONFIG_X86_32 +extern int mce_disabled; +#else /* CONFIG_X86_32 */ + +#include <asm/atomic.h> + +void mce_log(struct mce *m); +DECLARE_PER_CPU(struct sys_device, device_mce); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); + +#ifdef CONFIG_X86_MCE_INTEL +void mce_intel_feature_init(struct cpuinfo_x86 *c); +#else +static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } +#endif + +#ifdef CONFIG_X86_MCE_AMD +void mce_amd_feature_init(struct cpuinfo_x86 *c); +#else +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +#endif + +void mce_log_therm_throt_event(unsigned int cpu, __u64 status); + +extern atomic_t mce_entry; + +extern void do_machine_check(struct pt_regs *, long); +extern int mce_notify_user(void); + +#endif /* !CONFIG_X86_32 */ + + + +#ifdef CONFIG_X86_MCE +extern void mcheck_init(struct cpuinfo_x86 *c); +#else +#define mcheck_init(c) do { } while (0) +#endif +extern void stop_mce(void); +extern void restart_mce(void); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h new file mode 100644 index 00000000000..c882664716c --- /dev/null +++ b/arch/x86/include/asm/microcode.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_MICROCODE_H +#define _ASM_X86_MICROCODE_H + +struct cpu_signature { + unsigned int sig; + unsigned int pf; + unsigned int rev; +}; + +struct device; + +struct microcode_ops { + int (*request_microcode_user) (int cpu, const void __user *buf, size_t size); + int (*request_microcode_fw) (int cpu, struct device *device); + + void (*apply_microcode) (int cpu); + + int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); + void (*microcode_fini_cpu) (int cpu); +}; + +struct ucode_cpu_info { + struct cpu_signature cpu_sig; + int valid; + void *mc; +}; +extern struct ucode_cpu_info ucode_cpu_info[]; + +#ifdef CONFIG_MICROCODE_INTEL +extern struct microcode_ops * __init init_intel_microcode(void); +#else +static inline struct microcode_ops * __init init_intel_microcode(void) +{ + return NULL; +} +#endif /* CONFIG_MICROCODE_INTEL */ + +#ifdef CONFIG_MICROCODE_AMD +extern struct microcode_ops * __init init_amd_microcode(void); +#else +static inline struct microcode_ops * __init init_amd_microcode(void) +{ + return NULL; +} +#endif + +#endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h new file mode 100644 index 00000000000..90bc4108a4f --- /dev/null +++ b/arch/x86/include/asm/mman.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_MMAN_H +#define _ASM_X86_MMAN_H + +#include <asm-generic/mman.h> + +#define MAP_32BIT 0x40 /* only give out 32bit addresses */ + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/mmconfig.h b/arch/x86/include/asm/mmconfig.h new file mode 100644 index 00000000000..9b119da1d10 --- /dev/null +++ b/arch/x86/include/asm/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_MMCONFIG_H +#define _ASM_X86_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __cpuinit check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif /* _ASM_X86_MMCONFIG_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h new file mode 100644 index 00000000000..80a1dee5bea --- /dev/null +++ b/arch/x86/include/asm/mmu.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_MMU_H +#define _ASM_X86_MMU_H + +#include <linux/spinlock.h> +#include <linux/mutex.h> + +/* + * The x86 doesn't have a mmu context, but + * we put the segment information here. + */ +typedef struct { + void *ldt; + int size; + struct mutex lock; + void *vdso; +} mm_context_t; + +#ifdef CONFIG_SMP +void leave_mm(int cpu); +#else +static inline void leave_mm(int cpu) +{ +} +#endif + +#endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h new file mode 100644 index 00000000000..8aeeb3fd73d --- /dev/null +++ b/arch/x86/include/asm/mmu_context.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_MMU_CONTEXT_H +#define _ASM_X86_MMU_CONTEXT_H + +#include <asm/desc.h> +#include <asm/atomic.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/paravirt.h> +#ifndef CONFIG_PARAVIRT +#include <asm-generic/mm_hooks.h> + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + +/* + * Used for LDT copy/destruction. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context(struct mm_struct *mm); + +#ifdef CONFIG_X86_32 +# include "mmu_context_32.h" +#else +# include "mmu_context_64.h" +#endif + +#define activate_mm(prev, next) \ +do { \ + paravirt_activate_mm((prev), (next)); \ + switch_mm((prev), (next), NULL); \ +} while (0); + + +#endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h new file mode 100644 index 00000000000..8e10015781f --- /dev/null +++ b/arch/x86/include/asm/mmu_context_32.h @@ -0,0 +1,56 @@ +#ifndef _ASM_X86_MMU_CONTEXT_32_H +#define _ASM_X86_MMU_CONTEXT_32_H + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + unsigned cpu = smp_processor_id(); + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) + per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; +#endif +} + +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + int cpu = smp_processor_id(); + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; + per_cpu(cpu_tlbstate, cpu).active_mm = next; +#endif + cpu_set(cpu, next->cpu_vm_mask); + + /* Re-load page tables */ + load_cr3(next->pgd); + + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; + BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload %cr3. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} + +#define deactivate_mm(tsk, mm) \ + asm("movl %0,%%gs": :"r" (0)); + +#endif /* _ASM_X86_MMU_CONTEXT_32_H */ diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h new file mode 100644 index 00000000000..677d36e9540 --- /dev/null +++ b/arch/x86/include/asm/mmu_context_64.h @@ -0,0 +1,54 @@ +#ifndef _ASM_X86_MMU_CONTEXT_64_H +#define _ASM_X86_MMU_CONTEXT_64_H + +#include <asm/pda.h> + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + if (read_pda(mmu_state) == TLBSTATE_OK) + write_pda(mmu_state, TLBSTATE_LAZY); +#endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + write_pda(mmu_state, TLBSTATE_OK); + write_pda(active_mm, next); +#endif + cpu_set(cpu, next->cpu_vm_mask); + load_cr3(next->pgd); + + if (unlikely(next->context.ldt != prev->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + write_pda(mmu_state, TLBSTATE_OK); + if (read_pda(active_mm) != next) + BUG(); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} + +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + asm volatile("movl %0,%%fs"::"r"(0)); \ +} while (0) + +#endif /* _ASM_X86_MMU_CONTEXT_64_H */ diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h new file mode 100644 index 00000000000..5cbf3135b97 --- /dev/null +++ b/arch/x86/include/asm/mmx.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_MMX_H +#define _ASM_X86_MMX_H + +/* + * MMX 3Dnow! helper operations + */ + +#include <linux/types.h> + +extern void *_mmx_memcpy(void *to, const void *from, size_t size); +extern void mmx_clear_page(void *page); +extern void mmx_copy_page(void *to, void *from); + +#endif /* _ASM_X86_MMX_H */ diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h new file mode 100644 index 00000000000..64217ea16a3 --- /dev/null +++ b/arch/x86/include/asm/mmzone.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "mmzone_32.h" +#else +# include "mmzone_64.h" +#endif diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h new file mode 100644 index 00000000000..485bdf059ff --- /dev/null +++ b/arch/x86/include/asm/mmzone_32.h @@ -0,0 +1,134 @@ +/* + * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002 + * + */ + +#ifndef _ASM_X86_MMZONE_32_H +#define _ASM_X86_MMZONE_32_H + +#include <asm/smp.h> + +#ifdef CONFIG_NUMA +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) + +#include <asm/numaq.h> +/* summit or generic arch */ +#include <asm/srat.h> + +extern int get_memcfg_numa_flat(void); +/* + * This allows any one NUMA architecture to be compiled + * for, and still fall back to the flat function if it + * fails. + */ +static inline void get_memcfg_numa(void) +{ + + if (get_memcfg_numaq()) + return; + if (get_memcfg_from_srat()) + return; + get_memcfg_numa_flat(); +} + +extern int early_pfn_to_nid(unsigned long pfn); + +#else /* !CONFIG_NUMA */ + +#define get_memcfg_numa get_memcfg_numa_flat + +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_DISCONTIGMEM + +/* + * generic node memory support, the following assumptions apply: + * + * 1) memory comes in 64Mb contigious chunks which are either present or not + * 2) we will not have more than 64Gb in total + * + * for now assume that 64Gb is max amount of RAM for whole system + * 64Gb / 4096bytes/page = 16777216 pages + */ +#define MAX_NR_PAGES 16777216 +#define MAX_ELEMENTS 1024 +#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) + +extern s8 physnode_map[]; + +static inline int pfn_to_nid(unsigned long pfn) +{ +#ifdef CONFIG_NUMA + return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]); +#else + return 0; +#endif +} + +/* + * Following are macros that each numa implmentation must define. + */ + +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_end_pfn(nid) \ +({ \ + pg_data_t *__pgdat = NODE_DATA(nid); \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ +}) + +static inline int pfn_valid(int pfn) +{ + int nid = pfn_to_nid(pfn); + + if (nid >= 0) + return (pfn < node_end_pfn(nid)); + return 0; +} + +#endif /* CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_NEED_MULTIPLE_NODES + +/* + * Following are macros that are specific to this numa platform. + */ +#define reserve_bootmem(addr, size, flags) \ + reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags)) +#define alloc_bootmem(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) +#define alloc_bootmem_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) +#define alloc_bootmem_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_pages_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_low_pages_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ +}) +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#endif /* _ASM_X86_MMZONE_32_H */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h new file mode 100644 index 00000000000..a5b3817d4b9 --- /dev/null +++ b/arch/x86/include/asm/mmzone_64.h @@ -0,0 +1,51 @@ +/* K8 NUMA support */ +/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */ +/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */ +#ifndef _ASM_X86_MMZONE_64_H +#define _ASM_X86_MMZONE_64_H + + +#ifdef CONFIG_NUMA + +#include <linux/mmdebug.h> + +#include <asm/smp.h> + +/* Simple perfect hash to map physical addresses to node numbers */ +struct memnode { + int shift; + unsigned int mapsize; + s16 *map; + s16 embedded_map[64 - 8]; +} ____cacheline_aligned; /* total size = 128 bytes */ +extern struct memnode memnode; +#define memnode_shift memnode.shift +#define memnodemap memnode.map +#define memnodemapsize memnode.mapsize + +extern struct pglist_data *node_data[]; + +static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) +{ + unsigned nid; + VIRTUAL_BUG_ON(!memnodemap); + nid = memnodemap[addr >> memnode_shift]; + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); + return nid; +} + +#define NODE_DATA(nid) (node_data[nid]) + +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ + NODE_DATA(nid)->node_spanned_pages) + +extern int early_pfn_to_nid(unsigned long pfn); + +#ifdef CONFIG_NUMA_EMU +#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) +#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) +#endif + +#endif +#endif /* _ASM_X86_MMZONE_64_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h new file mode 100644 index 00000000000..47d62743c4d --- /dev/null +++ b/arch/x86/include/asm/module.h @@ -0,0 +1,80 @@ +#ifndef _ASM_X86_MODULE_H +#define _ASM_X86_MODULE_H + +/* x86_32/64 are simple */ +struct mod_arch_specific {}; + +#ifdef CONFIG_X86_32 +# define Elf_Shdr Elf32_Shdr +# define Elf_Sym Elf32_Sym +# define Elf_Ehdr Elf32_Ehdr +#else +# define Elf_Shdr Elf64_Shdr +# define Elf_Sym Elf64_Sym +# define Elf_Ehdr Elf64_Ehdr +#endif + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M386 +#define MODULE_PROC_FAMILY "386 " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_X86_ELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_4KSTACKS +# define MODULE_STACKSIZE "4KSTACKS " +# else +# define MODULE_STACKSIZE "" +# endif +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#endif + +#endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h new file mode 100644 index 00000000000..91885c28f66 --- /dev/null +++ b/arch/x86/include/asm/mpspec.h @@ -0,0 +1,145 @@ +#ifndef _ASM_X86_MPSPEC_H +#define _ASM_X86_MPSPEC_H + +#include <linux/init.h> + +#include <asm/mpspec_def.h> + +extern int apic_version[MAX_APICS]; + +#ifdef CONFIG_X86_32 +#include <mach_mpspec.h> + +extern unsigned int def_to_bigsmp; +extern u8 apicid_2_node[]; +extern int pic_mode; + +#ifdef CONFIG_X86_NUMAQ +extern int mp_bus_id_to_node[MAX_MP_BUSSES]; +extern int mp_bus_id_to_local[MAX_MP_BUSSES]; +extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +#endif + +#define MAX_APICID 256 + +#else + +#define MAX_MP_BUSSES 256 +/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) + +#endif + +extern void early_find_smp_config(void); +extern void early_get_smp_config(void); + +#if defined(CONFIG_MCA) || defined(CONFIG_EISA) +extern int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +extern unsigned int boot_cpu_physical_apicid; +extern unsigned int max_physical_apicid; +extern int smp_found_config; +extern int mpc_default_type; +extern unsigned long mp_lapic_addr; + +extern void find_smp_config(void); +extern void get_smp_config(void); +#ifdef CONFIG_X86_MPPARSE +extern void early_reserve_e820_mpc_new(void); +#else +static inline void early_reserve_e820_mpc_new(void) { } +#endif + +void __cpuinit generic_processor_info(int apicid, int version); +#ifdef CONFIG_ACPI +extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); +extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi); +extern void mp_config_acpi_legacy_irqs(void); +extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +#ifdef CONFIG_X86_IO_APIC +extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity); +#else +static inline int +mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI */ + +#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) + +struct physid_mask { + unsigned long mask[PHYSID_ARRAY_SIZE]; +}; + +typedef struct physid_mask physid_mask_t; + +#define physid_set(physid, map) set_bit(physid, (map).mask) +#define physid_clear(physid, map) clear_bit(physid, (map).mask) +#define physid_isset(physid, map) test_bit(physid, (map).mask) +#define physid_test_and_set(physid, map) \ + test_and_set_bit(physid, (map).mask) + +#define physids_and(dst, src1, src2) \ + bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) + +#define physids_or(dst, src1, src2) \ + bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) + +#define physids_clear(map) \ + bitmap_zero((map).mask, MAX_APICS) + +#define physids_complement(dst, src) \ + bitmap_complement((dst).mask, (src).mask, MAX_APICS) + +#define physids_empty(map) \ + bitmap_empty((map).mask, MAX_APICS) + +#define physids_equal(map1, map2) \ + bitmap_equal((map1).mask, (map2).mask, MAX_APICS) + +#define physids_weight(map) \ + bitmap_weight((map).mask, MAX_APICS) + +#define physids_shift_right(d, s, n) \ + bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) + +#define physids_shift_left(d, s, n) \ + bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) + +#define physids_coerce(map) ((map).mask[0]) + +#define physids_promote(physids) \ + ({ \ + physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ + __physid_mask.mask[0] = physids; \ + __physid_mask; \ + }) + +/* Note: will create very large stack frames if physid_mask_t is big */ +#define physid_mask_of_physid(physid) \ + ({ \ + physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ + physid_set(physid, __physid_mask); \ + __physid_mask; \ + }) + +static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +{ + physids_clear(*map); + physid_set(physid, *map); +} + +#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } +#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } + +extern physid_mask_t phys_cpu_present_map; + +#endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h new file mode 100644 index 00000000000..e3ace7d1d35 --- /dev/null +++ b/arch/x86/include/asm/mpspec_def.h @@ -0,0 +1,180 @@ +#ifndef _ASM_X86_MPSPEC_DEF_H +#define _ASM_X86_MPSPEC_DEF_H + +/* + * Structure definitions for SMP machines following the + * Intel Multiprocessing Specification 1.1 and 1.4. + */ + +/* + * This tag identifies where the SMP configuration + * information is. + */ + +#define SMP_MAGIC_IDENT (('_'<<24) | ('P'<<16) | ('M'<<8) | '_') + +#ifdef CONFIG_X86_32 +# define MAX_MPC_ENTRY 1024 +# define MAX_APICS 256 +#else +# if NR_CPUS <= 255 +# define MAX_APICS 255 +# else +# define MAX_APICS 32768 +# endif +#endif + +struct intel_mp_floating { + char mpf_signature[4]; /* "_MP_" */ + unsigned int mpf_physptr; /* Configuration table address */ + unsigned char mpf_length; /* Our length (paragraphs) */ + unsigned char mpf_specification;/* Specification version */ + unsigned char mpf_checksum; /* Checksum (makes sum 0) */ + unsigned char mpf_feature1; /* Standard or configuration ? */ + unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ + unsigned char mpf_feature3; /* Unused (0) */ + unsigned char mpf_feature4; /* Unused (0) */ + unsigned char mpf_feature5; /* Unused (0) */ +}; + +#define MPC_SIGNATURE "PCMP" + +struct mp_config_table { + char mpc_signature[4]; + unsigned short mpc_length; /* Size of table */ + char mpc_spec; /* 0x01 */ + char mpc_checksum; + char mpc_oem[8]; + char mpc_productid[12]; + unsigned int mpc_oemptr; /* 0 if not present */ + unsigned short mpc_oemsize; /* 0 if not present */ + unsigned short mpc_oemcount; + unsigned int mpc_lapic; /* APIC address */ + unsigned int reserved; +}; + +/* Followed by entries */ + +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +/* Used by IBM NUMA-Q to describe node locality */ +#define MP_TRANSLATION 192 + +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ + +#define CPU_STEPPING_MASK 0x000F +#define CPU_MODEL_MASK 0x00F0 +#define CPU_FAMILY_MASK 0x0F00 + +struct mpc_config_processor { + unsigned char mpc_type; + unsigned char mpc_apicid; /* Local APIC number */ + unsigned char mpc_apicver; /* Its versions */ + unsigned char mpc_cpuflag; + unsigned int mpc_cpufeature; + unsigned int mpc_featureflag; /* CPUID feature value */ + unsigned int mpc_reserved[2]; +}; + +struct mpc_config_bus { + unsigned char mpc_type; + unsigned char mpc_busid; + unsigned char mpc_bustype[6]; +}; + +/* List of Bus Type string values, Intel MP Spec. */ +#define BUSTYPE_EISA "EISA" +#define BUSTYPE_ISA "ISA" +#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ +#define BUSTYPE_MCA "MCA" +#define BUSTYPE_VL "VL" /* Local bus */ +#define BUSTYPE_PCI "PCI" +#define BUSTYPE_PCMCIA "PCMCIA" +#define BUSTYPE_CBUS "CBUS" +#define BUSTYPE_CBUSII "CBUSII" +#define BUSTYPE_FUTURE "FUTURE" +#define BUSTYPE_MBI "MBI" +#define BUSTYPE_MBII "MBII" +#define BUSTYPE_MPI "MPI" +#define BUSTYPE_MPSA "MPSA" +#define BUSTYPE_NUBUS "NUBUS" +#define BUSTYPE_TC "TC" +#define BUSTYPE_VME "VME" +#define BUSTYPE_XPRESS "XPRESS" + +#define MPC_APIC_USABLE 0x01 + +struct mpc_config_ioapic { + unsigned char mpc_type; + unsigned char mpc_apicid; + unsigned char mpc_apicver; + unsigned char mpc_flags; + unsigned int mpc_apicaddr; +}; + +struct mpc_config_intsrc { + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbus; + unsigned char mpc_srcbusirq; + unsigned char mpc_dstapic; + unsigned char mpc_dstirq; +}; + +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; + +#define MP_IRQDIR_DEFAULT 0 +#define MP_IRQDIR_HIGH 1 +#define MP_IRQDIR_LOW 3 + +#define MP_APIC_ALL 0xFF + +struct mpc_config_lintsrc { + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbusid; + unsigned char mpc_srcbusirq; + unsigned char mpc_destapic; + unsigned char mpc_destapiclint; +}; + +#define MPC_OEM_SIGNATURE "_OEM" + +struct mp_config_oemtable { + char oem_signature[4]; + unsigned short oem_length; /* Size of table */ + char oem_rev; /* 0x01 */ + char oem_checksum; + char mpc_oem[8]; +}; + +/* + * Default configurations + * + * 1 2 CPU ISA 82489DX + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining + * 3 2 CPU EISA 82489DX + * 4 2 CPU MCA 82489DX + * 5 2 CPU ISA+PCI + * 6 2 CPU EISA+PCI + * 7 2 CPU MCA+PCI + */ + +enum mp_bustype { + MP_BUS_ISA = 1, + MP_BUS_EISA, + MP_BUS_PCI, + MP_BUS_MCA, +}; +#endif /* _ASM_X86_MPSPEC_DEF_H */ diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h new file mode 100644 index 00000000000..7e4e9481f51 --- /dev/null +++ b/arch/x86/include/asm/msgbuf.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_MSGBUF_H +#define _ASM_X86_MSGBUF_H + +/* + * The msqid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space on i386 is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + * + * Pad space on x8664 is left for: + * - 2 miscellaneous 64-bit values + */ +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ +#ifdef __i386__ + unsigned long __unused1; +#endif + __kernel_time_t msg_rtime; /* last msgrcv time */ +#ifdef __i386__ + unsigned long __unused2; +#endif + __kernel_time_t msg_ctime; /* last change time */ +#ifdef __i386__ + unsigned long __unused3; +#endif + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* _ASM_X86_MSGBUF_H */ diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h new file mode 100644 index 00000000000..6706b3006f1 --- /dev/null +++ b/arch/x86/include/asm/msidef.h @@ -0,0 +1,55 @@ +#ifndef _ASM_X86_MSIDEF_H +#define _ASM_X86_MSIDEF_H + +/* + * Constants for Intel APIC based MSI messages. + */ + +/* + * Shifts for MSI data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR_MASK 0x000000ff +#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \ + MSI_DATA_VECTOR_MASK) + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for msi address + */ + +#define MSI_ADDR_BASE_HI 0 +#define MSI_ADDR_BASE_LO 0xfee00000 + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) + /* dedicated cpu */ +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + /* lowest priority */ + +#define MSI_ADDR_DEST_ID_SHIFT 12 +#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 +#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ + MSI_ADDR_DEST_ID_MASK) + +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) +#endif /* _ASM_X86_MSIDEF_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h new file mode 100644 index 00000000000..e38859d577a --- /dev/null +++ b/arch/x86/include/asm/msr-index.h @@ -0,0 +1,332 @@ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +/* CPU model specific register (MSR) numbers */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) + +/* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ +#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ + +#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) +#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSCTL 0xc001103a + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_HWCR 0xc0010015 +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_EFER 0xc0000080 +#define MSR_K6_STAR 0xc0000081 +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b +#define MSR_IA32_THERM_STATUS 0x0000019c +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +/* Intel Model 6 */ +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c + +#endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h new file mode 100644 index 00000000000..46be2fa7ac2 --- /dev/null +++ b/arch/x86/include/asm/msr.h @@ -0,0 +1,247 @@ +#ifndef _ASM_X86_MSR_H +#define _ASM_X86_MSR_H + +#include <asm/msr-index.h> + +#ifndef __ASSEMBLY__ +# include <linux/types.h> +#endif + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include <asm/asm.h> +#include <asm/errno.h> + +static inline unsigned long long native_read_tscp(unsigned int *aux) +{ + unsigned long low, high; + asm volatile(".byte 0x0f,0x01,0xf9" + : "=a" (low), "=d" (high), "=c" (*aux)); + return low | ((u64)high << 32); +} + +/* + * i386 calling convention returns 64-bit value in edx:eax, while + * x86_64 returns at rax. Also, the "A" constraint does not really + * mean rdx:rax in x86_64, so we need specialized behaviour for each + * architecture + */ +#ifdef CONFIG_X86_64 +#define DECLARE_ARGS(val, low, high) unsigned low, high +#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) +#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +#define DECLARE_ARGS(val, low, high) unsigned long long val +#define EAX_EDX_VAL(val, low, high) (val) +#define EAX_EDX_ARGS(val, low, high) "A" (val) +#define EAX_EDX_RET(val, low, high) "=A" (val) +#endif + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + return EAX_EDX_VAL(val, low, high); +} + +static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %[err],%[err]\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : [err] "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), [fault] "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); +} + +static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); +} + +static inline void native_write_msr(unsigned int msr, + unsigned low, unsigned high) +{ + asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); +} + +static inline int native_write_msr_safe(unsigned int msr, + unsigned low, unsigned high) +{ + int err; + asm volatile("2: wrmsr ; xor %[err],%[err]\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : [err] "=a" (err) + : "c" (msr), "0" (low), "d" (high), + [fault] "i" (-EFAULT) + : "memory"); + return err; +} + +extern unsigned long long native_read_tsc(void); + +static __always_inline unsigned long long __native_read_tsc(void) +{ + DECLARE_ARGS(val, low, high); + + rdtsc_barrier(); + asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); + rdtsc_barrier(); + + return EAX_EDX_VAL(val, low, high); +} + +static inline unsigned long long native_read_pmc(int counter) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + return EAX_EDX_VAL(val, low, high); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#include <linux/errno.h> +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr, val1, val2) \ +do { \ + u64 __val = native_read_msr((msr)); \ + (val1) = (u32)__val; \ + (val2) = (u32)(__val >> 32); \ +} while (0) + +static inline void wrmsr(unsigned msr, unsigned low, unsigned high) +{ + native_write_msr(msr, low, high); +} + +#define rdmsrl(msr, val) \ + ((val) = native_read_msr((msr))) + +#define wrmsrl(msr, val) \ + native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32)) + +/* wrmsr with exception handling */ +static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) +{ + return native_write_msr_safe(msr, low, high); +} + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr, p1, p2) \ +({ \ + int __err; \ + u64 __val = native_read_msr_safe((msr), &__err); \ + (*p1) = (u32)__val; \ + (*p2) = (u32)(__val >> 32); \ + __err; \ +}) + +static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_safe(msr, &err); + return err; +} +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_amd_safe(msr, &err); + return err; +} + +#define rdtscl(low) \ + ((low) = (u32)native_read_tsc()) + +#define rdtscll(val) \ + ((val) = native_read_tsc()) + +#define rdpmc(counter, low, high) \ +do { \ + u64 _l = native_read_pmc((counter)); \ + (low) = (u32)_l; \ + (high) = (u32)(_l >> 32); \ +} while (0) + +#define rdtscp(low, high, aux) \ +do { \ + unsigned long long _val = native_read_tscp(&(aux)); \ + (low) = (u32)_val; \ + (high) = (u32)(_val >> 32); \ +} while (0) + +#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) + +#endif /* !CONFIG_PARAVIRT */ + + +#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ + (u32)((val) >> 32)) + +#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) + +#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) + +#ifdef CONFIG_SMP +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +#else /* CONFIG_SMP */ +static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + rdmsr(msr_no, *l, *h); + return 0; +} +static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + wrmsr(msr_no, l, h); + return 0; +} +static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, + u32 *l, u32 *h) +{ + return rdmsr_safe(msr_no, l, h); +} +static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + return wrmsr_safe(msr_no, l, h); +} +#endif /* CONFIG_SMP */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + + +#endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h new file mode 100644 index 00000000000..7c1e4258b31 --- /dev/null +++ b/arch/x86/include/asm/mtrr.h @@ -0,0 +1,173 @@ +/* Generic MTRR (Memory Type Range Register) ioctls. + + Copyright (C) 1997-1999 Richard Gooch + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ +#ifndef _ASM_X86_MTRR_H +#define _ASM_X86_MTRR_H + +#include <linux/ioctl.h> +#include <linux/errno.h> + +#define MTRR_IOCTL_BASE 'M' + +struct mtrr_sentry { + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +/* Warning: this structure has a different order from i386 + on x86-64. The 32bit emulation code takes care of that. + But you need to use this for 64bit, otherwise your X server + will break. */ + +#ifdef __i386__ +struct mtrr_gentry { + unsigned int regnum; /* Register number */ + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +#else /* __i386__ */ + +struct mtrr_gentry { + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int regnum; /* Register number */ + unsigned int type; /* Type of region */ +}; +#endif /* !__i386__ */ + +/* These are the various ioctls */ +#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) +#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) +#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry) +#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry) +#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry) +#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry) +#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry) +#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry) +#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) +#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) + +/* These are the region types */ +#define MTRR_TYPE_UNCACHABLE 0 +#define MTRR_TYPE_WRCOMB 1 +/*#define MTRR_TYPE_ 2*/ +/*#define MTRR_TYPE_ 3*/ +#define MTRR_TYPE_WRTHROUGH 4 +#define MTRR_TYPE_WRPROT 5 +#define MTRR_TYPE_WRBACK 6 +#define MTRR_NUM_TYPES 7 + +#ifdef __KERNEL__ + +/* The following functions are for use by other drivers */ +# ifdef CONFIG_MTRR +extern u8 mtrr_type_lookup(u64 addr, u64 end); +extern void mtrr_save_fixed_ranges(void *); +extern void mtrr_save_state(void); +extern int mtrr_add(unsigned long base, unsigned long size, + unsigned int type, bool increment); +extern int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, bool increment); +extern int mtrr_del(int reg, unsigned long base, unsigned long size); +extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); +extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); +extern void mtrr_ap_init(void); +extern void mtrr_bp_init(void); +extern int mtrr_trim_uncached_memory(unsigned long end_pfn); +extern int amd_special_default_mtrr(void); +# else +static inline u8 mtrr_type_lookup(u64 addr, u64 end) +{ + /* + * Return no-MTRRs: + */ + return 0xff; +} +#define mtrr_save_fixed_ranges(arg) do {} while (0) +#define mtrr_save_state() do {} while (0) +static inline int mtrr_add(unsigned long base, unsigned long size, + unsigned int type, bool increment) +{ + return -ENODEV; +} +static inline int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, bool increment) +{ + return -ENODEV; +} +static inline int mtrr_del(int reg, unsigned long base, unsigned long size) +{ + return -ENODEV; +} +static inline int mtrr_del_page(int reg, unsigned long base, unsigned long size) +{ + return -ENODEV; +} +static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + return 0; +} +static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +{ +} + +#define mtrr_ap_init() do {} while (0) +#define mtrr_bp_init() do {} while (0) +# endif + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +struct mtrr_sentry32 { + compat_ulong_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +struct mtrr_gentry32 { + compat_ulong_t regnum; /* Register number */ + compat_uint_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +#define MTRR_IOCTL_BASE 'M' + +#define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32) +#define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32) +#define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32) +#define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32) +#define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32) +#define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32) +#define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32) +#define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32) +#define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32) +#define MTRRIOC32_KILL_PAGE_ENTRY \ + _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h new file mode 100644 index 00000000000..a731b9c573a --- /dev/null +++ b/arch/x86/include/asm/mutex.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "mutex_32.h" +#else +# include "mutex_64.h" +#endif diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h new file mode 100644 index 00000000000..03f90c8a5a7 --- /dev/null +++ b/arch/x86/include/asm/mutex_32.h @@ -0,0 +1,125 @@ +/* + * Assembly implementation of the mutex fastpath, based on atomic + * decrement/increment. + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#ifndef _ASM_X86_MUTEX_32_H +#define _ASM_X86_MUTEX_32_H + +#include <asm/alternative.h> + +/** + * __mutex_fastpath_lock - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call <fn> if it + * wasn't 1 originally. This function MUST leave the value lower than 1 + * even when the "1" assertion wasn't true. + */ +#define __mutex_fastpath_lock(count, fail_fn) \ +do { \ + unsigned int dummy; \ + \ + typecheck(atomic_t *, count); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ + \ + asm volatile(LOCK_PREFIX " decl (%%eax)\n" \ + " jns 1f \n" \ + " call " #fail_fn "\n" \ + "1:\n" \ + : "=a" (dummy) \ + : "a" (count) \ + : "memory", "ecx", "edx"); \ +} while (0) + + +/** + * __mutex_fastpath_lock_retval - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call <fail_fn> if it + * wasn't 1 originally. This function returns 0 if the fastpath succeeds, + * or anything the slow path function returns + */ +static inline int __mutex_fastpath_lock_retval(atomic_t *count, + int (*fail_fn)(atomic_t *)) +{ + if (unlikely(atomic_dec_return(count) < 0)) + return fail_fn(count); + else + return 0; +} + +/** + * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1 + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 0 + * + * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>. + * In the failure case, this function is allowed to either set the value + * to 1, or to set it to a value lower than 1. + * + * If the implementation sets it to a value of lower than 1, the + * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs + * to return 0 otherwise. + */ +#define __mutex_fastpath_unlock(count, fail_fn) \ +do { \ + unsigned int dummy; \ + \ + typecheck(atomic_t *, count); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ + \ + asm volatile(LOCK_PREFIX " incl (%%eax)\n" \ + " jg 1f\n" \ + " call " #fail_fn "\n" \ + "1:\n" \ + : "=a" (dummy) \ + : "a" (count) \ + : "memory", "ecx", "edx"); \ +} while (0) + +#define __mutex_slowpath_needs_to_unlock() 1 + +/** + * __mutex_fastpath_trylock - try to acquire the mutex, without waiting + * + * @count: pointer of type atomic_t + * @fail_fn: fallback function + * + * Change the count from 1 to a value lower than 1, and return 0 (failure) + * if it wasn't 1 originally, or return 1 (success) otherwise. This function + * MUST leave the value lower than 1 even when the "1" assertion wasn't true. + * Additionally, if the value was < 0 originally, this function must not leave + * it to 0 on failure. + */ +static inline int __mutex_fastpath_trylock(atomic_t *count, + int (*fail_fn)(atomic_t *)) +{ + /* + * We have two variants here. The cmpxchg based one is the best one + * because it never induce a false contention state. It is included + * here because architectures using the inc/dec algorithms over the + * xchg ones are much more likely to support cmpxchg natively. + * + * If not we fall back to the spinlock based variant - that is + * just as efficient (and simpler) as a 'destructive' probing of + * the mutex state would be. + */ +#ifdef __HAVE_ARCH_CMPXCHG + if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + return 1; + return 0; +#else + return fail_fn(count); +#endif +} + +#endif /* _ASM_X86_MUTEX_32_H */ diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h new file mode 100644 index 00000000000..68a87b0f8e2 --- /dev/null +++ b/arch/x86/include/asm/mutex_64.h @@ -0,0 +1,100 @@ +/* + * Assembly implementation of the mutex fastpath, based on atomic + * decrement/increment. + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#ifndef _ASM_X86_MUTEX_64_H +#define _ASM_X86_MUTEX_64_H + +/** + * __mutex_fastpath_lock - decrement and call function if negative + * @v: pointer of type atomic_t + * @fail_fn: function to call if the result is negative + * + * Atomically decrements @v and calls <fail_fn> if the result is negative. + */ +#define __mutex_fastpath_lock(v, fail_fn) \ +do { \ + unsigned long dummy; \ + \ + typecheck(atomic_t *, v); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ + \ + asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \ + " jns 1f \n" \ + " call " #fail_fn "\n" \ + "1:" \ + : "=D" (dummy) \ + : "D" (v) \ + : "rax", "rsi", "rdx", "rcx", \ + "r8", "r9", "r10", "r11", "memory"); \ +} while (0) + +/** + * __mutex_fastpath_lock_retval - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call <fail_fn> if + * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, + * or anything the slow path function returns + */ +static inline int __mutex_fastpath_lock_retval(atomic_t *count, + int (*fail_fn)(atomic_t *)) +{ + if (unlikely(atomic_dec_return(count) < 0)) + return fail_fn(count); + else + return 0; +} + +/** + * __mutex_fastpath_unlock - increment and call function if nonpositive + * @v: pointer of type atomic_t + * @fail_fn: function to call if the result is nonpositive + * + * Atomically increments @v and calls <fail_fn> if the result is nonpositive. + */ +#define __mutex_fastpath_unlock(v, fail_fn) \ +do { \ + unsigned long dummy; \ + \ + typecheck(atomic_t *, v); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ + \ + asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \ + " jg 1f\n" \ + " call " #fail_fn "\n" \ + "1:" \ + : "=D" (dummy) \ + : "D" (v) \ + : "rax", "rsi", "rdx", "rcx", \ + "r8", "r9", "r10", "r11", "memory"); \ +} while (0) + +#define __mutex_slowpath_needs_to_unlock() 1 + +/** + * __mutex_fastpath_trylock - try to acquire the mutex, without waiting + * + * @count: pointer of type atomic_t + * @fail_fn: fallback function + * + * Change the count from 1 to 0 and return 1 (success), or return 0 (failure) + * if it wasn't 1 originally. [the fallback function is never used on + * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.] + */ +static inline int __mutex_fastpath_trylock(atomic_t *count, + int (*fail_fn)(atomic_t *)) +{ + if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + return 1; + else + return 0; +} + +#endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h new file mode 100644 index 00000000000..c45a0a568df --- /dev/null +++ b/arch/x86/include/asm/nmi.h @@ -0,0 +1,81 @@ +#ifndef _ASM_X86_NMI_H +#define _ASM_X86_NMI_H + +#include <linux/pm.h> +#include <asm/irq.h> +#include <asm/io.h> + +#ifdef ARCH_HAS_NMI_WATCHDOG + +/** + * do_nmi_callback + * + * Check to see if a callback exists and execute it. Return 1 + * if the handler exists and was handled successfully. + */ +int do_nmi_callback(struct pt_regs *regs, int cpu); + +extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); +extern int check_nmi_watchdog(void); +extern int nmi_watchdog_enabled; +extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); +extern int avail_to_resrv_perfctr_nmi(unsigned int); +extern int reserve_perfctr_nmi(unsigned int); +extern void release_perfctr_nmi(unsigned int); +extern int reserve_evntsel_nmi(unsigned int); +extern void release_evntsel_nmi(unsigned int); + +extern void setup_apic_nmi_watchdog(void *); +extern void stop_apic_nmi_watchdog(void *); +extern void disable_timer_nmi_watchdog(void); +extern void enable_timer_nmi_watchdog(void); +extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); +extern void cpu_nmi_set_wd_enabled(void); + +extern atomic_t nmi_active; +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +struct ctl_table; +struct file; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); +extern int unknown_nmi_panic; + +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + +static inline void localise_nmi_watchdog(void) +{ + if (nmi_watchdog == NMI_IO_APIC) + nmi_watchdog = NMI_LOCAL_APIC; +} + +/* check if nmi_watchdog is active (ie was specified at boot) */ +static inline int nmi_watchdog_active(void) +{ + /* + * actually it should be: + * return (nmi_watchdog == NMI_LOCAL_APIC || + * nmi_watchdog == NMI_IO_APIC) + * but since they are power of two we could use a + * cheaper way --cvg + */ + return nmi_watchdog & 0x3; +} +#endif + +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +int lapic_watchdog_ok(void); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); +void stop_nmi(void); +void restart_nmi(void); + +#endif /* _ASM_X86_NMI_H */ diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h new file mode 100644 index 00000000000..ad2668ee1aa --- /dev/null +++ b/arch/x86/include/asm/nops.h @@ -0,0 +1,118 @@ +#ifndef _ASM_X86_NOPS_H +#define _ASM_X86_NOPS_H + +/* Define nops for use with alternative() */ + +/* generic versions from gas + 1: nop + the following instructions are NOT nops in 64-bit mode, + for 64-bit mode use K8 or P6 nops instead + 2: movl %esi,%esi + 3: leal 0x00(%esi),%esi + 4: leal 0x00(,%esi,1),%esi + 6: leal 0x00000000(%esi),%esi + 7: leal 0x00000000(,%esi,1),%esi +*/ +#define GENERIC_NOP1 ".byte 0x90\n" +#define GENERIC_NOP2 ".byte 0x89,0xf6\n" +#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" +#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" +#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 +#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 + +/* Opteron 64bit nops + 1: nop + 2: osp nop + 3: osp osp nop + 4: osp osp osp nop +*/ +#define K8_NOP1 GENERIC_NOP1 +#define K8_NOP2 ".byte 0x66,0x90\n" +#define K8_NOP3 ".byte 0x66,0x66,0x90\n" +#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" +#define K8_NOP5 K8_NOP3 K8_NOP2 +#define K8_NOP6 K8_NOP3 K8_NOP3 +#define K8_NOP7 K8_NOP4 K8_NOP3 +#define K8_NOP8 K8_NOP4 K8_NOP4 + +/* K7 nops + uses eax dependencies (arbitary choice) + 1: nop + 2: movl %eax,%eax + 3: leal (,%eax,1),%eax + 4: leal 0x00(,%eax,1),%eax + 6: leal 0x00000000(%eax),%eax + 7: leal 0x00000000(,%eax,1),%eax +*/ +#define K7_NOP1 GENERIC_NOP1 +#define K7_NOP2 ".byte 0x8b,0xc0\n" +#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" +#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" +#define K7_NOP5 K7_NOP4 ASM_NOP1 +#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" +#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" +#define K7_NOP8 K7_NOP7 ASM_NOP1 + +/* P6 nops + uses eax dependencies (Intel-recommended choice) + 1: nop + 2: osp nop + 3: nopl (%eax) + 4: nopl 0x00(%eax) + 5: nopl 0x00(%eax,%eax,1) + 6: osp nopl 0x00(%eax,%eax,1) + 7: nopl 0x00000000(%eax) + 8: nopl 0x00000000(%eax,%eax,1) +*/ +#define P6_NOP1 GENERIC_NOP1 +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + +#if defined(CONFIG_MK7) +#define ASM_NOP1 K7_NOP1 +#define ASM_NOP2 K7_NOP2 +#define ASM_NOP3 K7_NOP3 +#define ASM_NOP4 K7_NOP4 +#define ASM_NOP5 K7_NOP5 +#define ASM_NOP6 K7_NOP6 +#define ASM_NOP7 K7_NOP7 +#define ASM_NOP8 K7_NOP8 +#elif defined(CONFIG_X86_P6_NOP) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 +#elif defined(CONFIG_X86_64) +#define ASM_NOP1 K8_NOP1 +#define ASM_NOP2 K8_NOP2 +#define ASM_NOP3 K8_NOP3 +#define ASM_NOP4 K8_NOP4 +#define ASM_NOP5 K8_NOP5 +#define ASM_NOP6 K8_NOP6 +#define ASM_NOP7 K8_NOP7 +#define ASM_NOP8 K8_NOP8 +#else +#define ASM_NOP1 GENERIC_NOP1 +#define ASM_NOP2 GENERIC_NOP2 +#define ASM_NOP3 GENERIC_NOP3 +#define ASM_NOP4 GENERIC_NOP4 +#define ASM_NOP5 GENERIC_NOP5 +#define ASM_NOP6 GENERIC_NOP6 +#define ASM_NOP7 GENERIC_NOP7 +#define ASM_NOP8 GENERIC_NOP8 +#endif + +#define ASM_NOP_MAX 8 + +#endif /* _ASM_X86_NOPS_H */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h new file mode 100644 index 00000000000..27da400d313 --- /dev/null +++ b/arch/x86/include/asm/numa.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "numa_32.h" +#else +# include "numa_64.h" +#endif diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h new file mode 100644 index 00000000000..e9f5db79624 --- /dev/null +++ b/arch/x86/include/asm/numa_32.h @@ -0,0 +1,11 @@ +#ifndef _ASM_X86_NUMA_32_H +#define _ASM_X86_NUMA_32_H + +extern int pxm_to_nid(int pxm); +extern void numa_remove_cpu(int cpu); + +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(void); +#endif + +#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h new file mode 100644 index 00000000000..064ed6df4cb --- /dev/null +++ b/arch/x86/include/asm/numa_64.h @@ -0,0 +1,43 @@ +#ifndef _ASM_X86_NUMA_64_H +#define _ASM_X86_NUMA_64_H + +#include <linux/nodemask.h> +#include <asm/apicdef.h> + +struct bootnode { + u64 start; + u64 end; +}; + +extern int compute_hash_shift(struct bootnode *nodes, int numblks, + int *nodeids); + +#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) + +extern void numa_init_array(void); +extern int numa_off; + +extern void srat_reserve_add_area(int nodeid); +extern int hotadd_percent; + +extern s16 apicid_to_node[MAX_LOCAL_APIC]; + +extern unsigned long numa_free_all_bootmem(void); +extern void setup_node_bootmem(int nodeid, unsigned long start, + unsigned long end); + +#ifdef CONFIG_NUMA +extern void __init init_cpu_to_node(void); +extern void __cpuinit numa_set_node(int cpu, int node); +extern void __cpuinit numa_clear_node(int cpu); +extern void __cpuinit numa_add_cpu(int cpu); +extern void __cpuinit numa_remove_cpu(int cpu); +#else +static inline void init_cpu_to_node(void) { } +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +static inline void numa_add_cpu(int cpu, int node) { } +static inline void numa_remove_cpu(int cpu) { } +#endif + +#endif /* _ASM_X86_NUMA_64_H */ diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h new file mode 100644 index 00000000000..1e8bd30b4c1 --- /dev/null +++ b/arch/x86/include/asm/numaq.h @@ -0,0 +1,169 @@ +/* + * Written by: Patricia Gaughen, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to <gone@us.ibm.com> + */ + +#ifndef _ASM_X86_NUMAQ_H +#define _ASM_X86_NUMAQ_H + +#ifdef CONFIG_X86_NUMAQ + +extern int found_numaq; +extern int get_memcfg_numaq(void); + +/* + * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the + */ +#define SYS_CFG_DATA_PRIV_ADDR 0x0009d000 /* place for scd in private + quad space */ + +/* + * Communication area for each processor on lynxer-processor tests. + * + * NOTE: If you change the size of this eachproc structure you need + * to change the definition for EACH_QUAD_SIZE. + */ +struct eachquadmem { + unsigned int priv_mem_start; /* Starting address of this */ + /* quad's private memory. */ + /* This is always 0. */ + /* In MB. */ + unsigned int priv_mem_size; /* Size of this quad's */ + /* private memory. */ + /* In MB. */ + unsigned int low_shrd_mem_strp_start;/* Starting address of this */ + /* quad's low shared block */ + /* (untranslated). */ + /* In MB. */ + unsigned int low_shrd_mem_start; /* Starting address of this */ + /* quad's low shared memory */ + /* (untranslated). */ + /* In MB. */ + unsigned int low_shrd_mem_size; /* Size of this quad's low */ + /* shared memory. */ + /* In MB. */ + unsigned int lmmio_copb_start; /* Starting address of this */ + /* quad's local memory */ + /* mapped I/O in the */ + /* compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_copb_size; /* Size of this quad's local */ + /* memory mapped I/O in the */ + /* compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_nopb_start; /* Starting address of this */ + /* quad's local memory */ + /* mapped I/O in the */ + /* non-compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_nopb_size; /* Size of this quad's local */ + /* memory mapped I/O in the */ + /* non-compatibility OPB. */ + /* In MB. */ + unsigned int io_apic_0_start; /* Starting address of I/O */ + /* APIC 0. */ + unsigned int io_apic_0_sz; /* Size I/O APIC 0. */ + unsigned int io_apic_1_start; /* Starting address of I/O */ + /* APIC 1. */ + unsigned int io_apic_1_sz; /* Size I/O APIC 1. */ + unsigned int hi_shrd_mem_start; /* Starting address of this */ + /* quad's high shared memory.*/ + /* In MB. */ + unsigned int hi_shrd_mem_size; /* Size of this quad's high */ + /* shared memory. */ + /* In MB. */ + unsigned int mps_table_addr; /* Address of this quad's */ + /* MPS tables from BIOS, */ + /* in system space.*/ + unsigned int lcl_MDC_pio_addr; /* Port-I/O address for */ + /* local access of MDC. */ + unsigned int rmt_MDC_mmpio_addr; /* MM-Port-I/O address for */ + /* remote access of MDC. */ + unsigned int mm_port_io_start; /* Starting address of this */ + /* quad's memory mapped Port */ + /* I/O space. */ + unsigned int mm_port_io_size; /* Size of this quad's memory*/ + /* mapped Port I/O space. */ + unsigned int mm_rmt_io_apic_start; /* Starting address of this */ + /* quad's memory mapped */ + /* remote I/O APIC space. */ + unsigned int mm_rmt_io_apic_size; /* Size of this quad's memory*/ + /* mapped remote I/O APIC */ + /* space. */ + unsigned int mm_isa_start; /* Starting address of this */ + /* quad's memory mapped ISA */ + /* space (contains MDC */ + /* memory space). */ + unsigned int mm_isa_size; /* Size of this quad's memory*/ + /* mapped ISA space (contains*/ + /* MDC memory space). */ + unsigned int rmt_qmi_addr; /* Remote addr to access QMI.*/ + unsigned int lcl_qmi_addr; /* Local addr to access QMI. */ +}; + +/* + * Note: This structure must be NOT be changed unless the multiproc and + * OS are changed to reflect the new structure. + */ +struct sys_cfg_data { + unsigned int quad_id; + unsigned int bsp_proc_id; /* Boot Strap Processor in this quad. */ + unsigned int scd_version; /* Version number of this table. */ + unsigned int first_quad_id; + unsigned int quads_present31_0; /* 1 bit for each quad */ + unsigned int quads_present63_32; /* 1 bit for each quad */ + unsigned int config_flags; + unsigned int boot_flags; + unsigned int csr_start_addr; /* Absolute value (not in MB) */ + unsigned int csr_size; /* Absolute value (not in MB) */ + unsigned int lcl_apic_start_addr; /* Absolute value (not in MB) */ + unsigned int lcl_apic_size; /* Absolute value (not in MB) */ + unsigned int low_shrd_mem_base; /* 0 or 512MB or 1GB */ + unsigned int low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */ + /* may not be totally populated */ + unsigned int split_mem_enbl; /* 0 for no low shared memory */ + unsigned int mmio_sz; /* Size of total system memory mapped I/O */ + /* (in MB). */ + unsigned int quad_spin_lock; /* Spare location used for quad */ + /* bringup. */ + unsigned int nonzero55; /* For checksumming. */ + unsigned int nonzeroaa; /* For checksumming. */ + unsigned int scd_magic_number; + unsigned int system_type; + unsigned int checksum; + /* + * memory configuration area for each quad + */ + struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ +}; + +void numaq_tsc_disable(void); + +#else +static inline int get_memcfg_numaq(void) +{ + return 0; +} +#endif /* CONFIG_X86_NUMAQ */ +#endif /* _ASM_X86_NUMAQ_H */ + diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h new file mode 100644 index 00000000000..0bf2a06b7a4 --- /dev/null +++ b/arch/x86/include/asm/numaq/apic.h @@ -0,0 +1,136 @@ +#ifndef __ASM_NUMAQ_APIC_H +#define __ASM_NUMAQ_APIC_H + +#include <asm/io.h> +#include <linux/mmzone.h> +#include <linux/nodemask.h> + +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline cpumask_t target_cpus(void) +{ + return CPU_MASK_ALL; +} + +#define NO_BALANCE_IRQ (1) +#define esr_disable (1) + +#define INT_DELIVERY_MODE dest_LowestPrio +#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int apic_id_registered(void) +{ + return 1; +} + +static inline void init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; +static inline int cpu_to_logical_apicid(int cpu) +{ + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) +{ + int node = apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline void setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return (1); +} + +static inline void enable_apic_mode(void) +{ +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + return (int) 0xF; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h new file mode 100644 index 00000000000..e012a46cc22 --- /dev/null +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -0,0 +1,14 @@ +#ifndef __ASM_NUMAQ_APICDEF_H +#define __ASM_NUMAQ_APICDEF_H + + +#define APIC_ID_MASK (0xF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0x0F); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h new file mode 100644 index 00000000000..935588d286c --- /dev/null +++ b/arch/x86/include/asm/numaq/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_NUMAQ_IPI_H +#define __ASM_NUMAQ_IPI_H + +void send_IPI_mask_sequence(cpumask_t, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h new file mode 100644 index 00000000000..252292e077b --- /dev/null +++ b/arch/x86/include/asm/numaq/mpparse.h @@ -0,0 +1,7 @@ +#ifndef __ASM_NUMAQ_MPPARSE_H +#define __ASM_NUMAQ_MPPARSE_H + +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h new file mode 100644 index 00000000000..c577bda5b1c --- /dev/null +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -0,0 +1,43 @@ +#ifndef __ASM_NUMAQ_WAKECPU_H +#define __ASM_NUMAQ_WAKECPU_H + +/* This file copes with machines that wakeup secondary CPUs by NMIs */ + +#define WAKE_SECONDARY_VIA_NMI + +#define TRAMPOLINE_LOW phys_to_virt(0x8) +#define TRAMPOLINE_HIGH phys_to_virt(0xa) + +#define boot_cpu_apicid boot_cpu_logical_apicid + +/* We don't do anything here because we use NMI's to boot instead */ +static inline void wait_for_init_deassert(atomic_t *deassert) +{ +} + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it. + */ +static inline void smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); + *low = *((volatile unsigned short *) TRAMPOLINE_LOW); +} + +static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Restoring NMI vector\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; + *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; +} + +#define inquire_remote_apic(apicid) {} + +#endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h new file mode 100644 index 00000000000..834a30295fa --- /dev/null +++ b/arch/x86/include/asm/olpc.h @@ -0,0 +1,132 @@ +/* OLPC machine specific definitions */ + +#ifndef _ASM_X86_OLPC_H +#define _ASM_X86_OLPC_H + +#include <asm/geode.h> + +struct olpc_platform_t { + int flags; + uint32_t boardrev; + int ecver; +}; + +#define OLPC_F_PRESENT 0x01 +#define OLPC_F_DCON 0x02 +#define OLPC_F_VSA 0x04 + +#ifdef CONFIG_OLPC + +extern struct olpc_platform_t olpc_platform_info; + +/* + * OLPC board IDs contain the major build number within the mask 0x0ff0, + * and the minor build number withing 0x000f. Pre-builds have a minor + * number less than 8, and normal builds start at 8. For example, 0x0B10 + * is a PreB1, and 0x0C18 is a C1. + */ + +static inline uint32_t olpc_board(uint8_t id) +{ + return (id << 4) | 0x8; +} + +static inline uint32_t olpc_board_pre(uint8_t id) +{ + return id << 4; +} + +static inline int machine_is_olpc(void) +{ + return (olpc_platform_info.flags & OLPC_F_PRESENT) ? 1 : 0; +} + +/* + * The DCON is OLPC's Display Controller. It has a number of unique + * features that we might want to take advantage of.. + */ +static inline int olpc_has_dcon(void) +{ + return (olpc_platform_info.flags & OLPC_F_DCON) ? 1 : 0; +} + +/* + * The VSA is software from AMD that typical Geode bioses will include. + * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does + * not include the VSA; instead, PCI is emulated by the kernel. + * + * The VSA is described further in arch/x86/pci/olpc.c. + */ +static inline int olpc_has_vsa(void) +{ + return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0; +} + +/* + * The "Mass Production" version of OLPC's XO is identified as being model + * C2. During the prototype phase, the following models (in chronological + * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models + * were based on Geode GX CPUs, and models after that were based upon + * Geode LX CPUs. There were also some hand-assembled models floating + * around, referred to as PreB1, PreB2, etc. + */ +static inline int olpc_board_at_least(uint32_t rev) +{ + return olpc_platform_info.boardrev >= rev; +} + +#else + +static inline int machine_is_olpc(void) +{ + return 0; +} + +static inline int olpc_has_dcon(void) +{ + return 0; +} + +static inline int olpc_has_vsa(void) +{ + return 0; +} + +#endif + +/* EC related functions */ + +extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen); + +extern int olpc_ec_mask_set(uint8_t bits); +extern int olpc_ec_mask_unset(uint8_t bits); + +/* EC commands */ + +#define EC_FIRMWARE_REV 0x08 + +/* SCI source values */ + +#define EC_SCI_SRC_EMPTY 0x00 +#define EC_SCI_SRC_GAME 0x01 +#define EC_SCI_SRC_BATTERY 0x02 +#define EC_SCI_SRC_BATSOC 0x04 +#define EC_SCI_SRC_BATERR 0x08 +#define EC_SCI_SRC_EBOOK 0x10 +#define EC_SCI_SRC_WLAN 0x20 +#define EC_SCI_SRC_ACPWR 0x40 +#define EC_SCI_SRC_ALL 0x7F + +/* GPIO assignments */ + +#define OLPC_GPIO_MIC_AC geode_gpio(1) +#define OLPC_GPIO_DCON_IRQ geode_gpio(7) +#define OLPC_GPIO_THRM_ALRM geode_gpio(10) +#define OLPC_GPIO_SMB_CLK geode_gpio(14) +#define OLPC_GPIO_SMB_DATA geode_gpio(15) +#define OLPC_GPIO_WORKAUX geode_gpio(24) +#define OLPC_GPIO_LID geode_gpio(26) +#define OLPC_GPIO_ECSCI geode_gpio(27) + +#endif /* _ASM_X86_OLPC_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h new file mode 100644 index 00000000000..e9873a2e869 --- /dev/null +++ b/arch/x86/include/asm/page.h @@ -0,0 +1,209 @@ +#ifndef _ASM_X86_PAGE_H +#define _ASM_X86_PAGE_H + +#include <linux/const.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifdef __KERNEL__ + +#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) + +/* Cast PAGE_MASK to a signed type so that it is sign-extended if + virtual addresses are 32-bits but physical addresses are larger + (ie, 32-bit PAE). */ +#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) + +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) + +#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) + +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define HUGE_MAX_HSTATE 2 + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif + +#ifdef CONFIG_X86_64 +#include <asm/page_64.h> +#else +#include <asm/page_32.h> +#endif /* CONFIG_X86_64 */ + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +#define VM_DATA_DEFAULT_FLAGS \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + +#ifndef __ASSEMBLY__ + +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + +extern int page_is_ram(unsigned long pagenr); +extern int pagerange_is_ram(unsigned long start, unsigned long end); +extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); + +extern unsigned long max_low_pfn_mapped; +extern unsigned long max_pfn_mapped; + +struct page; + +static inline void clear_user_page(void *page, unsigned long vaddr, + struct page *pg) +{ + clear_page(page); +} + +static inline void copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *topage) +{ + copy_page(to, from); +} + +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +#if PAGETABLE_LEVELS >= 3 +#if PAGETABLE_LEVELS == 4 +typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} +#else /* PAGETABLE_LEVELS == 3 */ +#include <asm-generic/pgtable-nopud.h> + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.pgd); +} +#endif /* PAGETABLE_LEVELS == 4 */ + +typedef struct { pmdval_t pmd; } pmd_t; + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} +#else /* PAGETABLE_LEVELS == 2 */ +#include <asm-generic/pgtable-nopmd.h> + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.pgd); +} +#endif /* PAGETABLE_LEVELS >= 3 */ + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline pteval_t native_pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ + +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + +#define pte_val(x) native_pte_val(x) +#define pte_flags(x) native_pte_flags(x) +#define __pte(x) native_make_pte(x) + +#endif /* CONFIG_PARAVIRT */ + +#define __pa(x) __phys_addr((unsigned long)(x)) +#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) +/* __pa_symbol should be used for C visible symbols. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) + +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + +#define __boot_va(x) __va(x) +#define __boot_pa(x) __pa(x) + +/* + * virt_to_page(kaddr) returns a valid pointer if and only if + * virt_addr_valid(kaddr) returns true. + */ +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +extern bool __virt_addr_valid(unsigned long kaddr); +#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) + +#endif /* __ASSEMBLY__ */ + +#include <asm-generic/memory_model.h> +#include <asm-generic/page.h> + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_PAGE_H */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h new file mode 100644 index 00000000000..bcde0d7b432 --- /dev/null +++ b/arch/x86/include/asm/page_32.h @@ -0,0 +1,136 @@ +#ifndef _ASM_X86_PAGE_32_H +#define _ASM_X86_PAGE_32_H + +/* + * This handles the memory map. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ +#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) + +#ifdef CONFIG_4KSTACKS +#define THREAD_ORDER 0 +#else +#define THREAD_ORDER 1 +#endif +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + +#define STACKFAULT_STACK 0 +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 0 +#define DEBUG_STACK 0 +#define MCE_STACK 0 +#define N_EXCEPTION_STACKS 1 + +#ifdef CONFIG_X86_PAE +/* 44=32+12, the limit we can fit into an unsigned long pfn */ +#define __PHYSICAL_MASK_SHIFT 44 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 3 + +#ifndef __ASSEMBLY__ +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#endif /* __ASSEMBLY__ + */ +#else /* !CONFIG_X86_PAE */ +#define __PHYSICAL_MASK_SHIFT 32 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 2 + +#ifndef __ASSEMBLY__ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef union { + pteval_t pte; + pteval_t pte_low; +} pte_t; + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_X86_PAE */ + +#ifndef __ASSEMBLY__ +typedef struct page *pgtable_t; +#endif + +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif + +#ifndef __ASSEMBLY__ +#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) +#ifdef CONFIG_DEBUG_VIRTUAL +extern unsigned long __phys_addr(unsigned long); +#else +#define __phys_addr(x) __phys_addr_nodebug(x) +#endif +#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif /* CONFIG_FLATMEM */ + +extern int nx_enabled; + +/* + * This much address space is reserved for vmalloc() and iomap() + * as well as fixmap mappings. + */ +extern unsigned int __VMALLOC_RESERVE; +extern int sysctl_legacy_va_layout; + +extern void find_low_pfn_range(void); +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); +extern void initmem_init(unsigned long, unsigned long); +extern void free_initmem(void); +extern void setup_bootmem_allocator(void); + + +#ifdef CONFIG_X86_USE_3DNOW +#include <asm/mmx.h> + +static inline void clear_page(void *page) +{ + mmx_clear_page(page); +} + +static inline void copy_page(void *to, void *from) +{ + mmx_copy_page(to, from); +} +#else /* !CONFIG_X86_USE_3DNOW */ +#include <linux/string.h> + +static inline void clear_page(void *page) +{ + memset(page, 0, PAGE_SIZE); +} + +static inline void copy_page(void *to, void *from) +{ + memcpy(to, from, PAGE_SIZE); +} +#endif /* CONFIG_X86_3DNOW */ +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h new file mode 100644 index 00000000000..5ebca29f44f --- /dev/null +++ b/arch/x86/include/asm/page_64.h @@ -0,0 +1,105 @@ +#ifndef _ASM_X86_PAGE_64_H +#define _ASM_X86_PAGE_64_H + +#define PAGETABLE_LEVELS 4 + +#define THREAD_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define CURRENT_MASK (~(THREAD_SIZE - 1)) + +#define EXCEPTION_STACK_ORDER 0 +#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) + +#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) +#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) + +#define IRQSTACK_ORDER 2 +#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) + +#define STACKFAULT_STACK 1 +#define DOUBLEFAULT_STACK 2 +#define NMI_STACK 3 +#define DEBUG_STACK 4 +#define MCE_STACK 5 +#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ + +#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) + +/* + * Set __PAGE_OFFSET to the most negative possible address + + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a + * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's + * what Xen requires. + */ +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) + +#define __PHYSICAL_START CONFIG_PHYSICAL_START +#define __KERNEL_ALIGN 0x200000 + +/* + * Make sure kernel is aligned to 2MB address. Catching it at compile + * time is better. Change your config file and compile the kernel + * for a 2MB aligned address (CONFIG_PHYSICAL_START) + */ +#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 +#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" +#endif + +#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +#define __START_KERNEL_map _AC(0xffffffff80000000, UL) + +/* See Documentation/x86_64/mm.txt for a description of the memory map. */ +#define __PHYSICAL_MASK_SHIFT 46 +#define __VIRTUAL_MASK_SHIFT 48 + +/* + * Kernel image size is limited to 512 MB (see level2_kernel_pgt in + * arch/x86/kernel/head_64.S), and it is mapped here: + */ +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) + +#ifndef __ASSEMBLY__ +void clear_page(void *page); +void copy_page(void *to, void *from); + +/* duplicated to the one in bootmem.h */ +extern unsigned long max_pfn; +extern unsigned long phys_base; + +extern unsigned long __phys_addr(unsigned long); +#define __phys_reloc_hide(x) (x) + +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef struct page *pgtable_t; + +typedef struct { pteval_t pte; } pte_t; + +#define vmemmap ((struct page *)VMEMMAP_START) + +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); + +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); + +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_pfn) +#endif + + +#endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h new file mode 100644 index 00000000000..6f0d0422f4c --- /dev/null +++ b/arch/x86/include/asm/param.h @@ -0,0 +1,22 @@ +#ifndef _ASM_X86_PARAM_H +#define _ASM_X86_PARAM_H + +#ifdef __KERNEL__ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ +# define USER_HZ 100 /* some user interfaces are */ +# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ +#endif + +#ifndef HZ +#define HZ 100 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#endif /* _ASM_X86_PARAM_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h new file mode 100644 index 00000000000..ba3e2ff6aed --- /dev/null +++ b/arch/x86/include/asm/paravirt.h @@ -0,0 +1,1650 @@ +#ifndef _ASM_X86_PARAVIRT_H +#define _ASM_X86_PARAVIRT_H +/* Various instructions on x86 need to be replaced for + * para-virtualization: those hooks are defined here. */ + +#ifdef CONFIG_PARAVIRT +#include <asm/page.h> +#include <asm/asm.h> + +/* Bitmask of what can be clobbered: usually at least eax. */ +#define CLBR_NONE 0 +#define CLBR_EAX (1 << 0) +#define CLBR_ECX (1 << 1) +#define CLBR_EDX (1 << 2) + +#ifdef CONFIG_X86_64 +#define CLBR_RSI (1 << 3) +#define CLBR_RDI (1 << 4) +#define CLBR_R8 (1 << 5) +#define CLBR_R9 (1 << 6) +#define CLBR_R10 (1 << 7) +#define CLBR_R11 (1 << 8) +#define CLBR_ANY ((1 << 9) - 1) +#include <asm/desc_defs.h> +#else +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 3) - 1) +#endif /* X86_64 */ + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#include <linux/cpumask.h> +#include <asm/kmap_types.h> +#include <asm/desc_defs.h> + +struct page; +struct thread_struct; +struct desc_ptr; +struct tss_struct; +struct mm_struct; +struct desc_struct; + +/* general info */ +struct pv_info { + unsigned int kernel_rpl; + int shared_kernel_pmd; + int paravirt_enabled; + const char *name; +}; + +struct pv_init_ops { + /* + * Patch may replace one of the defined code sequences with + * arbitrary code, subject to the same register constraints. + * This generally means the code is not free to clobber any + * registers other than EAX. The patch function should return + * the number of bytes of code generated, as we nop pad the + * rest in generic code. + */ + unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, + unsigned long addr, unsigned len); + + /* Basic arch-specific setup */ + void (*arch_setup)(void); + char *(*memory_setup)(void); + void (*post_allocator_init)(void); + + /* Print a banner to identify the environment */ + void (*banner)(void); +}; + + +struct pv_lazy_ops { + /* Set deferred update mode, used for batching operations. */ + void (*enter)(void); + void (*leave)(void); +}; + +struct pv_time_ops { + void (*time_init)(void); + + /* Set and set time of day */ + unsigned long (*get_wallclock)(void); + int (*set_wallclock)(unsigned long); + + unsigned long long (*sched_clock)(void); + unsigned long (*get_tsc_khz)(void); +}; + +struct pv_cpu_ops { + /* hooks for various privileged instructions */ + unsigned long (*get_debugreg)(int regno); + void (*set_debugreg)(int regno, unsigned long value); + + void (*clts)(void); + + unsigned long (*read_cr0)(void); + void (*write_cr0)(unsigned long); + + unsigned long (*read_cr4_safe)(void); + unsigned long (*read_cr4)(void); + void (*write_cr4)(unsigned long); + +#ifdef CONFIG_X86_64 + unsigned long (*read_cr8)(void); + void (*write_cr8)(unsigned long); +#endif + + /* Segment descriptor handling */ + void (*load_tr_desc)(void); + void (*load_gdt)(const struct desc_ptr *); + void (*load_idt)(const struct desc_ptr *); + void (*store_gdt)(struct desc_ptr *); + void (*store_idt)(struct desc_ptr *); + void (*set_ldt)(const void *desc, unsigned entries); + unsigned long (*store_tr)(void); + void (*load_tls)(struct thread_struct *t, unsigned int cpu); +#ifdef CONFIG_X86_64 + void (*load_gs_index)(unsigned int idx); +#endif + void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, + const void *desc); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, const void *desc, int size); + void (*write_idt_entry)(gate_desc *, + int entrynum, const gate_desc *gate); + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + + void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); + + void (*set_iopl_mask)(unsigned mask); + + void (*wbinvd)(void); + void (*io_delay)(void); + + /* cpuid emulation, mostly so that caps bits can be disabled */ + void (*cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + /* MSR, PMC and TSR operations. + err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (*read_msr_amd)(unsigned int msr, int *err); + u64 (*read_msr)(unsigned int msr, int *err); + int (*write_msr)(unsigned int msr, unsigned low, unsigned high); + + u64 (*read_tsc)(void); + u64 (*read_pmc)(int counter); + unsigned long long (*read_tscp)(unsigned int *aux); + + /* + * Atomically enable interrupts and return to userspace. This + * is only ever used to return to 32-bit processes; in a + * 64-bit kernel, it's used for 32-on-64 compat processes, but + * never native 64-bit processes. (Jump, not call.) + */ + void (*irq_enable_sysexit)(void); + + /* + * Switch to usermode gs and return to 64-bit usermode using + * sysret. Only used in 64-bit kernels to return to 64-bit + * processes. Usermode register state, including %rsp, must + * already be restored. + */ + void (*usergs_sysret64)(void); + + /* + * Switch to usermode gs and return to 32-bit usermode using + * sysret. Used to return to 32-on-64 compat processes. + * Other usermode register state, including %esp, must already + * be restored. + */ + void (*usergs_sysret32)(void); + + /* Normal iret. Jump to this with the standard iret stack + frame set up. */ + void (*iret)(void); + + void (*swapgs)(void); + + struct pv_lazy_ops lazy_mode; +}; + +struct pv_irq_ops { + void (*init_IRQ)(void); + + /* + * Get/set interrupt state. save_fl and restore_fl are only + * expected to use X86_EFLAGS_IF; all other bits + * returned from save_fl are undefined, and may be ignored by + * restore_fl. + */ + unsigned long (*save_fl)(void); + void (*restore_fl)(unsigned long); + void (*irq_disable)(void); + void (*irq_enable)(void); + void (*safe_halt)(void); + void (*halt)(void); + +#ifdef CONFIG_X86_64 + void (*adjust_exception_frame)(void); +#endif +}; + +struct pv_apic_ops { +#ifdef CONFIG_X86_LOCAL_APIC + void (*setup_boot_clock)(void); + void (*setup_secondary_clock)(void); + + void (*startup_ipi_hook)(int phys_apicid, + unsigned long start_eip, + unsigned long start_esp); +#endif +}; + +struct pv_mmu_ops { + /* + * Called before/after init_mm pagetable setup. setup_start + * may reset %cr3, and may pre-install parts of the pagetable; + * pagetable setup is expected to preserve any existing + * mapping. + */ + void (*pagetable_setup_start)(pgd_t *pgd_base); + void (*pagetable_setup_done)(pgd_t *pgd_base); + + unsigned long (*read_cr2)(void); + void (*write_cr2)(unsigned long); + + unsigned long (*read_cr3)(void); + void (*write_cr3)(unsigned long); + + /* + * Hooks for intercepting the creation/use/destruction of an + * mm_struct. + */ + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + + + /* TLB operations */ + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va); + + /* Hooks for allocating and freeing a pagetable top-level */ + int (*pgd_alloc)(struct mm_struct *mm); + void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); + + /* + * Hooks for allocating/releasing pagetable pages when they're + * attached to a pagetable + */ + void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); + void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); + void (*release_pte)(unsigned long pfn); + void (*release_pmd)(unsigned long pfn); + void (*release_pud)(unsigned long pfn); + + /* Pagetable manipulation functions */ + void (*set_pte)(pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); + void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (*pte_update)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + + pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + + pteval_t (*pte_val)(pte_t); + pteval_t (*pte_flags)(pte_t); + pte_t (*make_pte)(pteval_t pte); + + pgdval_t (*pgd_val)(pgd_t); + pgd_t (*make_pgd)(pgdval_t pgd); + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*pmd_clear)(pmd_t *pmdp); + +#endif /* CONFIG_X86_PAE */ + + void (*set_pud)(pud_t *pudp, pud_t pudval); + + pmdval_t (*pmd_val)(pmd_t); + pmd_t (*make_pmd)(pmdval_t pmd); + +#if PAGETABLE_LEVELS == 4 + pudval_t (*pud_val)(pud_t); + pud_t (*make_pud)(pudval_t pud); + + void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); +#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* PAGETABLE_LEVELS >= 3 */ + +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + + struct pv_lazy_ops lazy_mode; + + /* dom0 ops */ + + /* Sometimes the physical address is a pfn, and sometimes its + an mfn. We can tell which is which from the index. */ + void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags); +}; + +struct raw_spinlock; +struct pv_lock_ops { + int (*spin_is_locked)(struct raw_spinlock *lock); + int (*spin_is_contended)(struct raw_spinlock *lock); + void (*spin_lock)(struct raw_spinlock *lock); + void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); + int (*spin_trylock)(struct raw_spinlock *lock); + void (*spin_unlock)(struct raw_spinlock *lock); +}; + +/* This contains all the paravirt structures: we get a convenient + * number for each function using the offset which we use to indicate + * what to patch. */ +struct paravirt_patch_template { + struct pv_init_ops pv_init_ops; + struct pv_time_ops pv_time_ops; + struct pv_cpu_ops pv_cpu_ops; + struct pv_irq_ops pv_irq_ops; + struct pv_apic_ops pv_apic_ops; + struct pv_mmu_ops pv_mmu_ops; + struct pv_lock_ops pv_lock_ops; +}; + +extern struct pv_info pv_info; +extern struct pv_init_ops pv_init_ops; +extern struct pv_time_ops pv_time_ops; +extern struct pv_cpu_ops pv_cpu_ops; +extern struct pv_irq_ops pv_irq_ops; +extern struct pv_apic_ops pv_apic_ops; +extern struct pv_mmu_ops pv_mmu_ops; +extern struct pv_lock_ops pv_lock_ops; + +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) + +#define paravirt_type(op) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ + [paravirt_opptr] "m" (op) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + ".popsection\n" + +/* Generate patchable code, with the default asm parameters. */ +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + +/* Simple instruction patching code. */ +#define DEF_NATIVE(ops, name, code) \ + extern const char start_##ops##_##name[], end_##ops##_##name[]; \ + asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") + +unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ignore(unsigned len); +unsigned paravirt_patch_call(void *insnbuf, + const void *target, u16 tgt_clobbers, + unsigned long addr, u16 site_clobbers, + unsigned len); +unsigned paravirt_patch_jmp(void *insnbuf, const void *target, + unsigned long addr, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len); + +unsigned paravirt_patch_insns(void *insnbuf, unsigned len, + const char *start, const char *end); + +unsigned native_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len); + +int paravirt_disable_iospace(void); + +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_patch_template structure, and can therefore be + * freely converted back into a structure offset. + */ +#define PARAVIRT_CALL "call *%[paravirt_opptr];" + +/* + * These macros are intended to wrap calls through one of the paravirt + * ops structs, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (pv_op_struct.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to e a simple direct call, or + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * For i386, these macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * X86_64, on the other hand, already specifies a register-based calling + * conventions, returning at %rax, with parameteres going on %rdi, %rsi, + * %rdx, and %rcx. Note that for this reason, x86_64 does not need any + * special handling for dealing with 4 arguments, unlike i386. + * However, x86_64 also have to clobber all caller saved registers, which + * unfortunately, are quite a bit (r8 - r11) + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend pv_init_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * the two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether its a 32 or 64 bit value, and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). For x86_64 machines, it just returns at %rax regardless of + * the return value size. + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments + * in low,high order + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do this. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ +#ifdef CONFIG_X86_32 +#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS +#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS +#define EXTRA_CLOBBERS +#define VEXTRA_CLOBBERS +#else +#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax +#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ + "=S" (__esi), "=d" (__edx), \ + "=c" (__ecx) + +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) + +#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" +#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" +#endif + +#ifdef CONFIG_PARAVIRT_DEBUG +#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) +#else +#define PVOP_TEST_NULL(op) ((void)op) +#endif + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ({ \ + rettype __ret; \ + PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ + /* This is 32-bit specific, but is okay in 64-bit */ \ + /* since this condition will never hold */ \ + if (sizeof(rettype) > sizeof(unsigned long)) { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : PVOP_CALL_CLOBBERS \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ + : "memory", "cc" EXTRA_CLOBBERS); \ + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ + } else { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : PVOP_CALL_CLOBBERS \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ + : "memory", "cc" EXTRA_CLOBBERS); \ + __ret = (rettype)__eax; \ + } \ + __ret; \ + }) +#define __PVOP_VCALL(op, pre, post, ...) \ + ({ \ + PVOP_VCALL_ARGS; \ + PVOP_TEST_NULL(op); \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : PVOP_VCALL_CLOBBERS \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ + : "memory", "cc" VEXTRA_CLOBBERS); \ + }) + +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op, "", "") +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op, "", "") + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1" ((unsigned long)(arg2))) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1" ((unsigned long)(arg2))) + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + +/* This is the only difference in x86_64. We can make it much simpler */ +#ifdef CONFIG_X86_32 +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#else +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ + "3"((unsigned long)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ + "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ + "3"((unsigned long)(arg4))) +#endif + +static inline int paravirt_enabled(void) +{ + return pv_info.paravirt_enabled; +} + +static inline void load_sp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); +} + +#define ARCH_SETUP pv_init_ops.arch_setup(); +static inline unsigned long get_wallclock(void) +{ + return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); +} + +static inline int set_wallclock(unsigned long nowtime) +{ + return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); +} + +static inline void (*choose_time_init(void))(void) +{ + return pv_time_ops.time_init; +} + +/* The paravirtualized CPUID instruction. */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); +} + +/* + * These special macros can be used to get or set a debugging register + */ +static inline unsigned long paravirt_get_debugreg(int reg) +{ + return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); +} +#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) +static inline void set_debugreg(unsigned long val, int reg) +{ + PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); +} + +static inline void clts(void) +{ + PVOP_VCALL0(pv_cpu_ops.clts); +} + +static inline unsigned long read_cr0(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); +} + +static inline void write_cr0(unsigned long x) +{ + PVOP_VCALL1(pv_cpu_ops.write_cr0, x); +} + +static inline unsigned long read_cr2(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); +} + +static inline void write_cr2(unsigned long x) +{ + PVOP_VCALL1(pv_mmu_ops.write_cr2, x); +} + +static inline unsigned long read_cr3(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); +} + +static inline void write_cr3(unsigned long x) +{ + PVOP_VCALL1(pv_mmu_ops.write_cr3, x); +} + +static inline unsigned long read_cr4(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); +} +static inline unsigned long read_cr4_safe(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); +} + +static inline void write_cr4(unsigned long x) +{ + PVOP_VCALL1(pv_cpu_ops.write_cr4, x); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long read_cr8(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); +} + +static inline void write_cr8(unsigned long x) +{ + PVOP_VCALL1(pv_cpu_ops.write_cr8, x); +} +#endif + +static inline void raw_safe_halt(void) +{ + PVOP_VCALL0(pv_irq_ops.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(pv_irq_ops.safe_halt); +} + +static inline void wbinvd(void) +{ + PVOP_VCALL0(pv_cpu_ops.wbinvd); +} + +#define get_kernel_rpl() (pv_info.kernel_rpl) + +static inline u64 paravirt_read_msr(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); +} +static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); +} +static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +{ + return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); +} + +/* These should all do BUG_ON(_err), but our headers are too tangled. */ +#define rdmsr(msr, val1, val2) \ +do { \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ +} while (0) + +#define wrmsr(msr, val1, val2) \ +do { \ + paravirt_write_msr(msr, val1, val2); \ +} while (0) + +#define rdmsrl(msr, val) \ +do { \ + int _err; \ + val = paravirt_read_msr(msr, &_err); \ +} while (0) + +#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) +#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr, a, b) \ +({ \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; \ +}) + +static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = paravirt_read_msr(msr, &err); + return err; +} +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = paravirt_read_msr_amd(msr, &err); + return err; +} + +static inline u64 paravirt_read_tsc(void) +{ + return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); +} + +#define rdtscl(low) \ +do { \ + u64 _l = paravirt_read_tsc(); \ + low = (int)_l; \ +} while (0) + +#define rdtscll(val) (val = paravirt_read_tsc()) + +static inline unsigned long long paravirt_sched_clock(void) +{ + return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); +} +#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) + +static inline unsigned long long paravirt_read_pmc(int counter) +{ + return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); +} + +#define rdpmc(counter, low, high) \ +do { \ + u64 _l = paravirt_read_pmc(counter); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while (0) + +static inline unsigned long long paravirt_rdtscp(unsigned int *aux) +{ + return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); +} + +#define rdtscp(low, high, aux) \ +do { \ + int __aux; \ + unsigned long __val = paravirt_rdtscp(&__aux); \ + (low) = (u32)__val; \ + (high) = (u32)(__val >> 32); \ + (aux) = __aux; \ +} while (0) + +#define rdtscpll(val, aux) \ +do { \ + unsigned long __aux; \ + val = paravirt_rdtscp(&__aux); \ + (aux) = __aux; \ +} while (0) + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); +} + +static inline void load_TR_desc(void) +{ + PVOP_VCALL0(pv_cpu_ops.load_tr_desc); +} +static inline void load_gdt(const struct desc_ptr *dtr) +{ + PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); +} +static inline void load_idt(const struct desc_ptr *dtr) +{ + PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); +} +static inline void set_ldt(const void *addr, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); +} +static inline void store_gdt(struct desc_ptr *dtr) +{ + PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); +} +static inline void store_idt(struct desc_ptr *dtr) +{ + PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); +} +static inline unsigned long paravirt_store_tr(void) +{ + return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); +} +#define store_tr(tr) ((tr) = paravirt_store_tr()) +static inline void load_TLS(struct thread_struct *t, unsigned cpu) +{ + PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); +} + +#ifdef CONFIG_X86_64 +static inline void load_gs_index(unsigned int gs) +{ + PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); +} +#endif + +static inline void write_ldt_entry(struct desc_struct *dt, int entry, + const void *desc) +{ + PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); +} + +static inline void write_gdt_entry(struct desc_struct *dt, int entry, + void *desc, int type) +{ + PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); +} + +static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) +{ + PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); +} +static inline void set_iopl_mask(unsigned mask) +{ + PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); +} + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) +{ + pv_cpu_ops.io_delay(); +#ifdef REALLY_SLOW_IO + pv_cpu_ops.io_delay(); + pv_cpu_ops.io_delay(); + pv_cpu_ops.io_delay(); +#endif +} + +#ifdef CONFIG_X86_LOCAL_APIC +static inline void setup_boot_clock(void) +{ + PVOP_VCALL0(pv_apic_ops.setup_boot_clock); +} + +static inline void setup_secondary_clock(void) +{ + PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); +} +#endif + +static inline void paravirt_post_allocator_init(void) +{ + if (pv_init_ops.post_allocator_init) + (*pv_init_ops.post_allocator_init)(); +} + +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + (*pv_mmu_ops.pagetable_setup_start)(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + (*pv_mmu_ops.pagetable_setup_done)(base); +} + +#ifdef CONFIG_SMP +static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, + unsigned long start_esp) +{ + PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, + phys_apicid, start_eip, start_esp); +} +#endif + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); +} + +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); +} +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); +} +static inline void __flush_tlb_single(unsigned long addr) +{ + PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); +} + +static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); +} + +static inline int paravirt_pgd_alloc(struct mm_struct *mm) +{ + return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); +} + +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); +} + +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); +} +static inline void paravirt_release_pte(unsigned long pfn) +{ + PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); +} + +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); +} + +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, + unsigned long start, unsigned long count) +{ + PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); +} +static inline void paravirt_release_pmd(unsigned long pfn) +{ + PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); +} + +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); +} +static inline void paravirt_release_pud(unsigned long pfn) +{ + PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); +} + +#ifdef CONFIG_HIGHPTE +static inline void *kmap_atomic_pte(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); + return (void *)ret; +} +#endif + +static inline void pte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); +} + +static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); +} + +static inline pte_t __pte(pteval_t val) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pteval_t, + pv_mmu_ops.make_pte, + val); + + return (pte_t) { .pte = ret }; +} + +static inline pteval_t pte_val(pte_t pte) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); + + return ret; +} + +static inline pteval_t pte_flags(pte_t pte) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, + pte.pte); + +#ifdef CONFIG_PARAVIRT_DEBUG + BUG_ON(ret & PTE_PFN_MASK); +#endif + return ret; +} + +static inline pgd_t __pgd(pgdval_t val) +{ + pgdval_t ret; + + if (sizeof(pgdval_t) > sizeof(long)) + ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, + val); + + return (pgd_t) { ret }; +} + +static inline pgdval_t pgd_val(pgd_t pgd) +{ + pgdval_t ret; + + if (sizeof(pgdval_t) > sizeof(long)) + ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); + else + ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); + + return ret; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pteval_t ret; + + ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, + mm, addr, ptep); + + return (pte_t) { .pte = ret }; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (sizeof(pteval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); + else + PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, + mm, addr, ptep, pte.pte); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + if (sizeof(pteval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, + pte.pte, (u64)pte.pte >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, + pte.pte); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (sizeof(pteval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); + else + PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + pmdval_t val = native_pmd_val(pmd); + + if (sizeof(pmdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); +} + +#if PAGETABLE_LEVELS >= 3 +static inline pmd_t __pmd(pmdval_t val) +{ + pmdval_t ret; + + if (sizeof(pmdval_t) > sizeof(long)) + ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, + val); + + return (pmd_t) { ret }; +} + +static inline pmdval_t pmd_val(pmd_t pmd) +{ + pmdval_t ret; + + if (sizeof(pmdval_t) > sizeof(long)) + ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); + else + ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); + + return ret; +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + pudval_t val = native_pud_val(pud); + + if (sizeof(pudval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, + val); +} +#if PAGETABLE_LEVELS == 4 +static inline pud_t __pud(pudval_t val) +{ + pudval_t ret; + + if (sizeof(pudval_t) > sizeof(long)) + ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); + else + ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, + val); + + return (pud_t) { ret }; +} + +static inline pudval_t pud_val(pud_t pud) +{ + pudval_t ret; + + if (sizeof(pudval_t) > sizeof(long)) + ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); + else + ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); + + return ret; +} + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgdval_t val = native_pgd_val(pgd); + + if (sizeof(pgdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, + val); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud(0)); +} + +#endif /* PAGETABLE_LEVELS == 4 */ + +#endif /* PAGETABLE_LEVELS >= 3 */ + +#ifdef CONFIG_X86_PAE +/* Special-case pte-setting operations for PAE, which can't update a + 64-bit pte atomically */ +static inline void set_pte_atomic(pte_t *ptep, pte_t pte) +{ + PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, + pte.pte, pte.pte >> 32); +} + +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + /* 5 arg words */ + pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); +} +#else /* !CONFIG_X86_PAE */ +static inline void set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_pte(ptep, pte); +} + +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte(ptep, pte); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd(0)); +} +#endif /* CONFIG_X86_PAE */ + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void); +void paravirt_enter_lazy_cpu(void); +void paravirt_leave_lazy_cpu(void); +void paravirt_enter_lazy_mmu(void); +void paravirt_leave_lazy_mmu(void); +void paravirt_leave_lazy(enum paravirt_lazy_mode mode); + +#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE +static inline void arch_enter_lazy_cpu_mode(void) +{ + PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); +} + +static inline void arch_leave_lazy_cpu_mode(void) +{ + PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); +} + +static inline void arch_flush_lazy_cpu_mode(void) +{ + if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } +} + + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE +static inline void arch_enter_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } +} + +static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags) +{ + pv_mmu_ops.set_fixmap(idx, phys, flags); +} + +void _paravirt_nop(void); +#define paravirt_nop ((void *)_paravirt_nop) + +void paravirt_use_bytelocks(void); + +#ifdef CONFIG_SMP + +static inline int __raw_spin_is_locked(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); +} + +static inline int __raw_spin_is_contended(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); +} + +static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_lock, lock); +} + +static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, + unsigned long flags) +{ + PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); +} + +static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); +} + +static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); +} + +#endif + +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch_site { + u8 *instr; /* original instructions */ + u8 instrtype; /* type of this instruction */ + u8 len; /* length of original instruction */ + u16 clobbers; /* what registers you may clobber */ +}; + +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + +#ifdef CONFIG_X86_32 +#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" +#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_FLAGS_ARG "0" +#define PV_EXTRA_CLOBBERS +#define PV_VEXTRA_CLOBBERS +#else +/* We save some registers, but all of them, that's too much. We clobber all + * caller saved registers but the argument parameter */ +#define PV_SAVE_REGS "pushq %%rdi;" +#define PV_RESTORE_REGS "popq %%rdi;" +#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi" +#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi" +#define PV_FLAGS_ARG "D" +#endif + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long f; + + asm volatile(paravirt_alt(PV_SAVE_REGS + PARAVIRT_CALL + PV_RESTORE_REGS) + : "=a"(f) + : paravirt_type(pv_irq_ops.save_fl), + paravirt_clobber(CLBR_EAX) + : "memory", "cc" PV_VEXTRA_CLOBBERS); + return f; +} + +static inline void raw_local_irq_restore(unsigned long f) +{ + asm volatile(paravirt_alt(PV_SAVE_REGS + PARAVIRT_CALL + PV_RESTORE_REGS) + : "=a"(f) + : PV_FLAGS_ARG(f), + paravirt_type(pv_irq_ops.restore_fl), + paravirt_clobber(CLBR_EAX) + : "memory", "cc" PV_EXTRA_CLOBBERS); +} + +static inline void raw_local_irq_disable(void) +{ + asm volatile(paravirt_alt(PV_SAVE_REGS + PARAVIRT_CALL + PV_RESTORE_REGS) + : + : paravirt_type(pv_irq_ops.irq_disable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); +} + +static inline void raw_local_irq_enable(void) +{ + asm volatile(paravirt_alt(PV_SAVE_REGS + PARAVIRT_CALL + PV_RESTORE_REGS) + : + : paravirt_type(pv_irq_ops.irq_enable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long f; + + f = __raw_local_save_flags(); + raw_local_irq_disable(); + return f; +} + + +/* Make sure as little as possible of this mess escapes. */ +#undef PARAVIRT_CALL +#undef __PVOP_CALL +#undef __PVOP_VCALL +#undef PVOP_VCALL0 +#undef PVOP_CALL0 +#undef PVOP_VCALL1 +#undef PVOP_CALL1 +#undef PVOP_VCALL2 +#undef PVOP_CALL2 +#undef PVOP_VCALL3 +#undef PVOP_CALL3 +#undef PVOP_VCALL4 +#undef PVOP_CALL4 + +#else /* __ASSEMBLY__ */ + +#define _PVSITE(ptype, clobbers, ops, word, algn) \ +771:; \ + ops; \ +772:; \ + .pushsection .parainstructions,"a"; \ + .align algn; \ + word 771b; \ + .byte ptype; \ + .byte 772b-771b; \ + .short clobbers; \ + .popsection + + +#ifdef CONFIG_X86_64 +#define PV_SAVE_REGS \ + push %rax; \ + push %rcx; \ + push %rdx; \ + push %rsi; \ + push %rdi; \ + push %r8; \ + push %r9; \ + push %r10; \ + push %r11 +#define PV_RESTORE_REGS \ + pop %r11; \ + pop %r10; \ + pop %r9; \ + pop %r8; \ + pop %rdi; \ + pop %rsi; \ + pop %rdx; \ + pop %rcx; \ + pop %rax +#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) +#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) +#define PARA_INDIRECT(addr) *addr(%rip) +#else +#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx +#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) +#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +#define PARA_INDIRECT(addr) *%cs:addr +#endif + +#define INTERRUPT_RETURN \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ + PV_SAVE_REGS; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ + PV_RESTORE_REGS;) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ + PV_SAVE_REGS; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ + PV_RESTORE_REGS;) + +#define USERGS_SYSRET32 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) + +#ifdef CONFIG_X86_32 +#define GET_CR0_INTO_EAX \ + push %ecx; push %edx; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ + pop %edx; pop %ecx + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) + + +#else /* !CONFIG_X86_32 */ + +/* + * If swapgs is used while the userspace stack is still current, + * there's no way to call a pvop. The PV replacement *must* be + * inlined, or the swapgs instruction must be trapped and emulated. + */ +#define SWAPGS_UNSAFE_STACK \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ + swapgs) + +#define SWAPGS \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ + PV_SAVE_REGS; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ + PV_RESTORE_REGS \ + ) + +#define GET_CR2_INTO_RCX \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ + movq %rax, %rcx; \ + xorq %rax, %rax; + +#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ + CLBR_NONE, \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) + +#define USERGS_SYSRET64 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + +#define ENABLE_INTERRUPTS_SYSEXIT32 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ +#endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/parport.h b/arch/x86/include/asm/parport.h new file mode 100644 index 00000000000..3c4ffeb467e --- /dev/null +++ b/arch/x86/include/asm/parport.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_PARPORT_H +#define _ASM_X86_PARPORT_H + +static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return parport_pc_find_isa_ports(autoirq, autodma); +} + +#endif /* _ASM_X86_PARPORT_H */ diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h new file mode 100644 index 00000000000..b8493b3b989 --- /dev/null +++ b/arch/x86/include/asm/pat.h @@ -0,0 +1,22 @@ +#ifndef _ASM_X86_PAT_H +#define _ASM_X86_PAT_H + +#include <linux/types.h> + +#ifdef CONFIG_X86_PAT +extern int pat_enabled; +extern void validate_pat_support(struct cpuinfo_x86 *c); +#else +static const int pat_enabled; +static inline void validate_pat_support(struct cpuinfo_x86 *c) { } +#endif + +extern void pat_init(void); + +extern int reserve_memtype(u64 start, u64 end, + unsigned long req_type, unsigned long *ret_type); +extern int free_memtype(u64 start, u64 end); + +extern void pat_disable(char *reason); + +#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h new file mode 100644 index 00000000000..b1e7a45d868 --- /dev/null +++ b/arch/x86/include/asm/pci-direct.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_PCI_DIRECT_H +#define _ASM_X86_PCI_DIRECT_H + +#include <linux/types.h> + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); +extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); +extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); +extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); +extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); +extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); + +extern int early_pci_allowed(void); + +extern unsigned int pci_early_dump_regs; +extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); +extern void early_dump_pci_devices(void); +#endif /* _ASM_X86_PCI_DIRECT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h new file mode 100644 index 00000000000..875b38edf19 --- /dev/null +++ b/arch/x86/include/asm/pci.h @@ -0,0 +1,114 @@ +#ifndef _ASM_X86_PCI_H +#define _ASM_X86_PCI_H + +#include <linux/mm.h> /* for struct page */ +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <asm/scatterlist.h> +#include <asm/io.h> + +#ifdef __KERNEL__ + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +#ifdef CONFIG_X86_64 + void *iommu; /* IOMMU private data */ +#endif +}; + +extern int pci_routeirq; + +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, + int node); +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} + + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif +#define pcibios_scan_all_fns(a, b) 0 + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +void pcibios_config_init(void); +struct pci_bus *pcibios_scan_root(int bus); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq, int active); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine); + + +#ifdef CONFIG_PCI +extern void early_quirks(void); +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + *strat = PCI_DMA_BURST_INFINITY; + *strategy_parameter = ~0UL; +} +#else +static inline void early_quirks(void) { } +#endif + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_32 +# include "pci_32.h" +#else +# include "pci_64.h" +#endif + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include <asm-generic/pci-dma-compat.h> + +/* generic pci stuff */ +#include <asm-generic/pci.h> + +#ifdef CONFIG_NUMA +/* Returns the node based on pci bus */ +static inline int __pcibus_to_node(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + + return sd->node; +} + +static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) +{ + return node_to_cpumask(__pcibus_to_node(bus)); +} +#endif + +#endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/pci_32.h b/arch/x86/include/asm/pci_32.h new file mode 100644 index 00000000000..6f1213a6ef4 --- /dev/null +++ b/arch/x86/include/asm/pci_32.h @@ -0,0 +1,34 @@ +#ifndef _ASM_X86_PCI_32_H +#define _ASM_X86_PCI_32_H + + +#ifdef __KERNEL__ + + +/* Dynamic DMA mapping stuff. + * i386 has everything mapped statically. + */ + +struct pci_dev; + +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) + +/* pci_unmap_{page,single} is a nop so... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; +#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) +#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { break; } while (pci_unmap_len(PTR, LEN_NAME)) + + +#endif /* __KERNEL__ */ + + +#endif /* _ASM_X86_PCI_32_H */ diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h new file mode 100644 index 00000000000..5b28995d664 --- /dev/null +++ b/arch/x86/include/asm/pci_64.h @@ -0,0 +1,66 @@ +#ifndef _ASM_X86_PCI_64_H +#define _ASM_X86_PCI_64_H + +#ifdef __KERNEL__ + +#ifdef CONFIG_CALGARY_IOMMU +static inline void *pci_iommu(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->iommu; +} + +static inline void set_pci_iommu(struct pci_bus *bus, void *val) +{ + struct pci_sysdata *sd = bus->sysdata; + sd->iommu = val; +} +#endif /* CONFIG_CALGARY_IOMMU */ + +extern int (*pci_config_read)(int seg, int bus, int dev, int fn, + int reg, int len, u32 *value); +extern int (*pci_config_write)(int seg, int bus, int dev, int fn, + int reg, int len, u32 value); + +extern void dma32_reserve_bootmem(void); +extern void pci_iommu_alloc(void); + +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions + * + * On AMD64 it mostly equals, but we set it to zero if a hardware + * IOMMU (gart) of sotware IOMMU (swiotlb) is available. + */ +#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) + +#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) + +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + +#else +/* No IOMMU */ + +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) +#define pci_unmap_addr(PTR, ADDR_NAME) (0) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) +#define pci_unmap_len(PTR, LEN_NAME) (0) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) + +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h new file mode 100644 index 00000000000..2fbfff88df3 --- /dev/null +++ b/arch/x86/include/asm/pda.h @@ -0,0 +1,137 @@ +#ifndef _ASM_X86_PDA_H +#define _ASM_X86_PDA_H + +#ifndef __ASSEMBLY__ +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/cache.h> +#include <asm/page.h> + +/* Per processor datastructure. %gs points to it while the kernel runs */ +struct x8664_pda { + struct task_struct *pcurrent; /* 0 Current process */ + unsigned long data_offset; /* 8 Per cpu data offset from linker + address */ + unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long oldrsp; /* 24 user rsp for system call */ + int irqcount; /* 32 Irq nesting counter. Starts -1 */ + unsigned int cpunumber; /* 36 Logical CPU number */ +#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ +#endif + char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ + unsigned int __softirq_pending; + unsigned int __nmi_count; /* number of NMI on this CPUs */ + short mmu_state; + short isidle; + struct mm_struct *active_mm; + unsigned apic_timer_irqs; + unsigned irq0_irqs; + unsigned irq_resched_count; + unsigned irq_call_count; + unsigned irq_tlb_count; + unsigned irq_thermal_count; + unsigned irq_threshold_count; + unsigned irq_spurious_count; +} ____cacheline_aligned_in_smp; + +extern struct x8664_pda **_cpu_pda; +extern void pda_init(int); + +#define cpu_pda(i) (_cpu_pda[i]) + +/* + * There is no fast way to get the base address of the PDA, all the accesses + * have to mention %fs/%gs. So it needs to be done this Torvaldian way. + */ +extern void __bad_pda_field(void) __attribute__((noreturn)); + +/* + * proxy_pda doesn't actually exist, but tell gcc it is accessed for + * all PDA accesses so it gets read/write dependencies right. + */ +extern struct x8664_pda _proxy_pda; + +#define pda_offset(field) offsetof(struct x8664_pda, field) + +#define pda_to_op(op, field, val) \ +do { \ + typedef typeof(_proxy_pda.field) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ + switch (sizeof(_proxy_pda.field)) { \ + case 2: \ + asm(op "w %1,%%gs:%c2" : \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 4: \ + asm(op "l %1,%%gs:%c2" : \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i" (pda_offset(field))); \ + break; \ + case 8: \ + asm(op "q %1,%%gs:%c2": \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + default: \ + __bad_pda_field(); \ + } \ +} while (0) + +#define pda_from_op(op, field) \ +({ \ + typeof(_proxy_pda.field) ret__; \ + switch (sizeof(_proxy_pda.field)) { \ + case 2: \ + asm(op "w %%gs:%c1,%0" : \ + "=r" (ret__) : \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 4: \ + asm(op "l %%gs:%c1,%0": \ + "=r" (ret__): \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 8: \ + asm(op "q %%gs:%c1,%0": \ + "=r" (ret__) : \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + default: \ + __bad_pda_field(); \ + } \ + ret__; \ +}) + +#define read_pda(field) pda_from_op("mov", field) +#define write_pda(field, val) pda_to_op("mov", field, val) +#define add_pda(field, val) pda_to_op("add", field, val) +#define sub_pda(field, val) pda_to_op("sub", field, val) +#define or_pda(field, val) pda_to_op("or", field, val) + +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define test_and_clear_bit_pda(bit, field) \ +({ \ + int old__; \ + asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (_proxy_pda.field) \ + : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ + old__; \ +}) + +#endif + +#define PDA_STACKOFFSET (5*8) + +#endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h new file mode 100644 index 00000000000..ece72053ba6 --- /dev/null +++ b/arch/x86/include/asm/percpu.h @@ -0,0 +1,218 @@ +#ifndef _ASM_X86_PERCPU_H +#define _ASM_X86_PERCPU_H + +#ifdef CONFIG_X86_64 +#include <linux/compiler.h> + +/* Same as asm-generic/percpu.h, except that we store the per cpu offset + in the PDA. Longer term the PDA and every per cpu variable + should be just put into a single section and referenced directly + from %gs */ + +#ifdef CONFIG_SMP +#include <asm/pda.h> + +#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) +#define __my_cpu_offset read_pda(data_offset) + +#define per_cpu_offset(x) (__per_cpu_offset(x)) + +#endif +#include <asm-generic/percpu.h> + +DECLARE_PER_CPU(struct x8664_pda, pda); + +/* + * These are supposed to be implemented as a single instruction which + * operates on the per-cpu data base segment. x86-64 doesn't have + * that yet, so this is a fairly inefficient workaround for the + * meantime. The single instruction is atomic with respect to + * preemption and interrupts, so we need to explicitly disable + * interrupts here to achieve the same effect. However, because it + * can be used from within interrupt-disable/enable, we can't actually + * disable interrupts; disabling preemption is enough. + */ +#define x86_read_percpu(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp; \ + preempt_disable(); \ + __tmp = __get_cpu_var(var); \ + preempt_enable(); \ + __tmp; \ + }) + +#define x86_write_percpu(var, val) \ + do { \ + preempt_disable(); \ + __get_cpu_var(var) = (val); \ + preempt_enable(); \ + } while(0) + +#else /* CONFIG_X86_64 */ + +#ifdef __ASSEMBLY__ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * reg - 32bit register + * + * The resulting address is stored in the "reg" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var +#else /* ! SMP */ +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg +#define PER_CPU_VAR(var) per_cpu__##var +#endif /* SMP */ + +#else /* ...!ASSEMBLY */ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * cpu - 32bit register containing the current CPU number + * + * The resulting address is stored in the "cpu" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP + +#define __my_cpu_offset x86_read_percpu(this_cpu_off) + +/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ +#define __percpu_seg "%%fs:" + +#else /* !SMP */ + +#define __percpu_seg "" + +#endif /* SMP */ + +#include <asm-generic/percpu.h> + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +/* For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ +extern void __bad_percpu_size(void); + +#define percpu_to_op(op, var, val) \ +do { \ + typedef typeof(var) T__; \ + if (0) { \ + T__ tmp__; \ + tmp__ = (val); \ + } \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b %1,"__percpu_seg"%0" \ + : "+m" (var) \ + : "ri" ((T__)val)); \ + break; \ + case 2: \ + asm(op "w %1,"__percpu_seg"%0" \ + : "+m" (var) \ + : "ri" ((T__)val)); \ + break; \ + case 4: \ + asm(op "l %1,"__percpu_seg"%0" \ + : "+m" (var) \ + : "ri" ((T__)val)); \ + break; \ + default: __bad_percpu_size(); \ + } \ +} while (0) + +#define percpu_from_op(op, var) \ +({ \ + typeof(var) ret__; \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + ret__; \ +}) + +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) +#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) +#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#endif /* !__ASSEMBLY__ */ +#endif /* !CONFIG_X86_64 */ + +#ifdef CONFIG_SMP + +/* + * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu + * variables that are initialized and accessed before there are per_cpu + * areas allocated. + */ + +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) +#define early_per_cpu(_name, _cpu) \ + (early_per_cpu_ptr(_name) ? \ + early_per_cpu_ptr(_name)[_cpu] : \ + per_cpu(_name, _cpu)) + +#else /* !CONFIG_SMP */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name) + +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL +/* no early_per_cpu_map() */ + +#endif /* !CONFIG_SMP */ + +#endif /* _ASM_X86_PERCPU_H */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h new file mode 100644 index 00000000000..cb7c151a8bf --- /dev/null +++ b/arch/x86/include/asm/pgalloc.h @@ -0,0 +1,114 @@ +#ifndef _ASM_X86_PGALLOC_H +#define _ASM_X86_PGALLOC_H + +#include <linux/threads.h> +#include <linux/mm.h> /* for struct page */ +#include <linux/pagemap.h> + +static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, + unsigned long start, unsigned long count) {} +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {} +static inline void paravirt_release_pte(unsigned long pfn) {} +static inline void paravirt_release_pmd(unsigned long pfn) {} +static inline void paravirt_release_pud(unsigned long pfn) {} +#endif + +/* + * Allocate and free page tables. + */ +extern pgd_t *pgd_alloc(struct mm_struct *); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); + +/* Should really implement gc for free page table pages. This could be + done with a reference count in struct page. */ + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); + free_page((unsigned long)pte); +} + +static inline void pte_free(struct mm_struct *mm, struct page *pte) +{ + __free_page(pte); +} + +extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte); + +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + struct page *pte) +{ + unsigned long pfn = page_to_pfn(pte); + + paravirt_alloc_pte(mm, pfn); + set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); +} + +#define pmd_pgtable(pmd) pmd_page(pmd) + +#if PAGETABLE_LEVELS > 2 +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + free_page((unsigned long)pmd); +} + +extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); + +#ifdef CONFIG_X86_PAE +extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); +#else /* !CONFIG_X86_PAE */ +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} +#endif /* CONFIG_X86_PAE */ + +#if PAGETABLE_LEVELS > 3 +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))); +} + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); +#endif /* PAGETABLE_LEVELS > 3 */ +#endif /* PAGETABLE_LEVELS > 2 */ + +#endif /* _ASM_X86_PGALLOC_H */ diff --git a/arch/x86/include/asm/pgtable-2level-defs.h b/arch/x86/include/asm/pgtable-2level-defs.h new file mode 100644 index 00000000000..d77db8990ea --- /dev/null +++ b/arch/x86/include/asm/pgtable-2level-defs.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H +#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H + +#define SHARED_KERNEL_PMD 0 + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ + +#define PTRS_PER_PTE 1024 + +#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h new file mode 100644 index 00000000000..b17edfd2362 --- /dev/null +++ b/arch/x86/include/asm/pgtable-2level.h @@ -0,0 +1,79 @@ +#ifndef _ASM_X86_PGTABLE_2LEVEL_H +#define _ASM_X86_PGTABLE_2LEVEL_H + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void native_set_pte(pte_t *ptep , pte_t pte) +{ + *ptep = pte; +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline void native_set_pte_present(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline void native_pmd_clear(pmd_t *pmdp) +{ + native_set_pmd(pmdp, __pmd(0)); +} + +static inline void native_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *xp) +{ + *xp = native_make_pte(0); +} + +#ifdef CONFIG_SMP +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ + return __pte(xchg(&xp->pte_low, 0)); +} +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif + +#define pte_none(x) (!(x).pte_low) + +/* + * Bits 0, 6 and 7 are taken, split up the 29 bits of offset + * into this range: + */ +#define PTE_FILE_MAX_BITS 29 + +#define pte_to_pgoff(pte) \ + ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) + +#define pgoff_to_pte(off) \ + ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ + (((off) >> 5) << 8) + _PAGE_FILE }) + +/* Encode and de-code a swap entry */ +#define __swp_type(x) (((x).val >> 1) & 0x1f) +#define __swp_offset(x) ((x).val >> 8) +#define __swp_entry(type, offset) \ + ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) + +#endif /* _ASM_X86_PGTABLE_2LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-3level-defs.h b/arch/x86/include/asm/pgtable-3level-defs.h new file mode 100644 index 00000000000..62561367653 --- /dev/null +++ b/arch/x86/include/asm/pgtable-3level-defs.h @@ -0,0 +1,28 @@ +#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H +#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H + +#ifdef CONFIG_PARAVIRT +#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) +#else +#define SHARED_KERNEL_PMD 1 +#endif + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 30 +#define PTRS_PER_PGD 4 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h new file mode 100644 index 00000000000..fb16cec702e --- /dev/null +++ b/arch/x86/include/asm/pgtable-3level.h @@ -0,0 +1,175 @@ +#ifndef _ASM_X86_PGTABLE_3LEVEL_H +#define _ASM_X86_PGTABLE_3LEVEL_H + +/* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ + __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016Lx).\n", \ + __FILE__, __LINE__, &(e), pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016Lx).\n", \ + __FILE__, __LINE__, &(e), pgd_val(e)) + +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == 0; +} + +static inline int pud_bad(pud_t pud) +{ + return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; +} + +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) & _PAGE_PRESENT; +} + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +/* + * Since this is only called on user PTEs, and the page fault handler + * must handle the already racy situation of simultaneous page faults, + * we are justified in merely clearing the PTE present bit, followed + * by a set. The ordering here is important. + */ +static inline void native_set_pte_present(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); +} + +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); +} + +/* + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table + * entry, so clear the bottom half first and enforce ordering with a compiler + * barrier. + */ +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + u32 *tmp = (u32 *)pmd; + *tmp = 0; + smp_wmb(); + *(tmp + 1) = 0; +} + +static inline void pud_clear(pud_t *pudp) +{ + unsigned long pgd; + + set_pud(pudp, __pud(0)); + + /* + * According to Intel App note "TLBs, Paging-Structure Caches, + * and Their Invalidation", April 2007, document 317080-001, + * section 8.1: in PAE mode we explicitly have to flush the + * TLB via cr3 if the top-level pgd is changed... + * + * Make sure the pud entry we're updating is within the + * current pgd to avoid unnecessary TLB flushes. + */ + pgd = read_cr3(); + if (__pa(pudp) >= pgd && __pa(pudp) < + (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) + write_cr3(pgd); +} + +#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) + +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) + + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \ + pmd_index(address)) + +#ifdef CONFIG_SMP +static inline pte_t native_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + /* xchg acts as a barrier before the setting of the high bits */ + res.pte_low = xchg(&ptep->pte_low, 0); + res.pte_high = ptep->pte_high; + ptep->pte_high = 0; + + return res; +} +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return a.pte_low == b.pte_low && a.pte_high == b.pte_high; +} + +static inline int pte_none(pte_t pte) +{ + return !pte.pte_low && !pte.pte_high; +} + +/* + * Bits 0, 6 and 7 are taken in the low part of the pte, + * put the 32 bits of offset into the high part. + */ +#define pte_to_pgoff(pte) ((pte).pte_high) +#define pgoff_to_pte(off) \ + ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } }) +#define PTE_FILE_MAX_BITS 32 + +/* Encode and de-code a swap entry */ +#define __swp_type(x) (((x).val) & 0x1f) +#define __swp_offset(x) ((x).val >> 5) +#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) +#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) +#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) + +#endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h new file mode 100644 index 00000000000..c012f3b1167 --- /dev/null +++ b/arch/x86/include/asm/pgtable.h @@ -0,0 +1,562 @@ +#ifndef _ASM_X86_PGTABLE_H +#define _ASM_X86_PGTABLE_H + +#define FIRST_USER_ADDRESS 0 + +#define _PAGE_BIT_PRESENT 0 /* is present */ +#define _PAGE_BIT_RW 1 /* writeable */ +#define _PAGE_BIT_USER 2 /* userspace addressable */ +#define _PAGE_BIT_PWT 3 /* page write through */ +#define _PAGE_BIT_PCD 4 /* page cache disabled */ +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ +#define _PAGE_BIT_FILE 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ +#define _PAGE_BIT_PAT 7 /* on 4KB pages */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ +#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) +#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) +#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define __HAVE_ARCH_PTE_SPECIAL + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +#else +#define _PAGE_NX (_AT(pteval_t, 0)) +#endif + +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, + * saved PTE; unset:swap */ +#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; + pte_present gives true */ + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ + _PAGE_DIRTY) + +/* Set of bits not changed in pte_modify */ +#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) +#define _PAGE_CACHE_WB (0) +#define _PAGE_CACHE_WC (_PAGE_PWT) +#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) +#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ + _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) +#define PAGE_COPY PAGE_COPY_NOEXEC +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) +#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) + +#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) +#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + +#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) + +#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) +#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) +#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) +#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) +#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) +#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) +#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) +#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) +#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) +#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) + +#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) +#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) + +/* xwr */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +/* + * early identity mapping pte attrib macros. + */ +#ifdef CONFIG_X86_64 +#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC +#else +/* + * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection + * bits are combined, this will alow user to access the high address mapped + * VDSO in the presence of CONFIG_COMPAT_VDSO + */ +#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ +#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ +#endif + +#ifndef __ASSEMBLY__ + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern spinlock_t pgd_lock; +extern struct list_head pgd_list; + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_DIRTY; +} + +static inline int pte_young(pte_t pte) +{ + return pte_flags(pte) & _PAGE_ACCESSED; +} + +static inline int pte_write(pte_t pte) +{ + return pte_flags(pte) & _PAGE_RW; +} + +static inline int pte_file(pte_t pte) +{ + return pte_flags(pte) & _PAGE_FILE; +} + +static inline int pte_huge(pte_t pte) +{ + return pte_flags(pte) & _PAGE_PSE; +} + +static inline int pte_global(pte_t pte) +{ + return pte_flags(pte) & _PAGE_GLOBAL; +} + +static inline int pte_exec(pte_t pte) +{ + return !(pte_flags(pte) & _PAGE_NX); +} + +static inline int pte_special(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SPECIAL; +} + +static inline unsigned long pte_pfn(pte_t pte) +{ + return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) + +static inline int pmd_large(pmd_t pte) +{ + return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_NX); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_PSE); +} + +static inline pte_t pte_clrhuge(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_PSE); +} + +static inline pte_t pte_mkglobal(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_GLOBAL); +} + +static inline pte_t pte_clrglobal(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_GLOBAL); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPECIAL); +} + +extern pteval_t __supported_pte_mask; + +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); +} + +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pteval_t val = pte_val(pte); + + /* + * Chop off the NX bit (if present), and add the NX portion of + * the newprot (if present): + */ + val &= _PAGE_CHG_MASK; + val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + + return __pte(val); +} + +/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +#define pgprot_modify pgprot_modify +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +{ + pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; + pgprotval_t addbits = pgprot_val(newprot); + return __pgprot(preservebits | addbits); +} + +#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) + +#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) + +#ifndef __ASSEMBLY__ +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot); +#endif + +/* Install a pte for a particular vaddr in kernel space. */ +void set_pte_vaddr(unsigned long vaddr, pte_t pte); + +#ifdef CONFIG_X86_32 +extern void native_pagetable_setup_start(pgd_t *base); +extern void native_pagetable_setup_done(pgd_t *base); +#else +static inline void native_pagetable_setup_start(pgd_t *base) {} +static inline void native_pagetable_setup_done(pgd_t *base) {} +#endif + +struct seq_file; +extern void arch_report_meminfo(struct seq_file *m); + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) + +#define set_pte_present(mm, addr, ptep, pte) \ + native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + +#ifndef __PAGETABLE_PUD_FOLDED +#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define pgd_clear(pgd) native_pgd_clear(pgd) +#endif + +#ifndef set_pud +# define set_pud(pudp, pud) native_set_pud(pudp, pud) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pud_clear(pud) native_pud_clear(pud) +#endif + +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) + +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) + +static inline void __init paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void __init paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} +#endif /* CONFIG_PARAVIRT */ + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +# include "pgtable_32.h" +#else +# include "pgtable_64.h" +#endif + +/* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * this macro returns the index of the entry in the pgd page which would + * control the given virtual address + */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + +/* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; + */ +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +/* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) + + +#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) + +#ifndef __ASSEMBLY__ + +enum { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_NUM +}; + +#ifdef CONFIG_PROC_FS +extern void update_page_count(int level, unsigned long pages); +#else +static inline void update_page_count(int level, unsigned long pages) { } +#endif + +/* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ +extern pte_t *lookup_address(unsigned long address, unsigned int *level); + +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} + +#ifndef CONFIG_PARAVIRT +/* + * Rules for using pte_update - it must be called after any PTE update which + * has not been done using the set_pte / clear_pte interfaces. It is used by + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE + * updates should either be sets, clears, or set_pte_atomic for P->P + * transitions, which means this hook should only be called for user PTEs. + * This hook implies a P->P protection or access change has taken place, which + * requires a subsequent TLB flush. The notification can optionally be delayed + * until the TLB flush event by using the pte_update_defer form of the + * interface, but care must be taken to assure that the flush happens while + * still holding the same page table lock so that the shadow and primary pages + * do not become out of sync on SMP. + */ +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) +#endif + +/* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ +struct vm_area_struct; + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +extern int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +extern int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t pte = native_ptep_get_and_clear(ptep); + pte_update(mm, addr, ptep); + return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + int full) +{ + pte_t pte; + if (full) { + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); + } else { + pte = ptep_get_and_clear(mm, addr, ptep); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); + pte_update(mm, addr, ptep); +} + +/* + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); + * + * dst - pointer to pgd range anwhere on a pgd page + * src - "" + * count - the number of pgds to copy. + * + * dst and src can be on the same page, but the range must not overlap, + * and must not cross a page boundary. + */ +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +{ + memcpy(dst, src, count * sizeof(pgd_t)); +} + + +#include <asm-generic/pgtable.h> +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h new file mode 100644 index 00000000000..f9d5889b336 --- /dev/null +++ b/arch/x86/include/asm/pgtable_32.h @@ -0,0 +1,191 @@ +#ifndef _ASM_X86_PGTABLE_32_H +#define _ASM_X86_PGTABLE_32_H + + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include <asm/processor.h> +#include <asm/fixmap.h> +#include <linux/threads.h> +#include <asm/paravirt.h> + +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +struct mm_struct; +struct vm_area_struct; + +extern pgd_t swapper_pg_dir[1024]; + +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } +void paging_init(void); + +extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifdef CONFIG_X86_PAE +# include <asm/pgtable-3level-defs.h> +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE - 1)) +#else +# include <asm/pgtable-2level-defs.h> +#endif + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ + & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +#endif + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +/* + * Define this if things work differently on an i386 and an i486: + * it will (on an i486) warn about kernel memory accesses that are + * done without a 'access_ok(VERIFY_WRITE,..)' + */ +#undef TEST_ACCESS_OK + +/* The boot page tables (all created as a single array) */ +extern unsigned long pg0[]; + +#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ +#define pmd_none(x) (!(unsigned long)pmd_val((x))) +#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) +#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +#ifdef CONFIG_X86_PAE +# include <asm/pgtable-3level.h> +#else +# include <asm/pgtable-2level.h> +#endif + +/* + * Macro to mark a page protection value as "uncacheable". + * On processors which do not support it, this is a no-op. + */ +#define pgprot_noncached(prot) \ + ((boot_cpu_data.x86 > 3) \ + ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \ + : (prot)) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + + +static inline int pud_large(pud_t pud) { return 0; } + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +#define pmd_index(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + +/* + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * this macro returns the index of the entry in the pte page which would + * control the given virtual address + */ +#define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) + +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) + +#define pmd_page_vaddr(pmd) \ + ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) + +#if defined(CONFIG_HIGHPTE) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + pte_index((address))) +#define pte_offset_map_nested(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ + pte_index((address))) +#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) +#else +#define pte_offset_map(dir, address) \ + ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) +#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) +#endif + +/* Clear a kernel PTE and flush it from the TLB */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, (vaddr), (ptep)); \ + __flush_tlb_one((vaddr)); \ +} while (0) + +/* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma, address, pte) do { } while (0) + +#endif /* !__ASSEMBLY__ */ + +/* + * kern_addr_valid() is (1) for FLATMEM and (0) for + * SPARSEMEM and DISCONTIGMEM + */ +#ifdef CONFIG_FLATMEM +#define kern_addr_valid(addr) (1) +#else +#define kern_addr_valid(kaddr) (0) +#endif + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h new file mode 100644 index 00000000000..545a0e042bb --- /dev/null +++ b/arch/x86/include/asm/pgtable_64.h @@ -0,0 +1,285 @@ +#ifndef _ASM_X86_PGTABLE_64_H +#define _ASM_X86_PGTABLE_64_H + +#include <linux/const.h> +#ifndef __ASSEMBLY__ + +/* + * This file contains the functions and defines necessary to modify and use + * the x86-64 page table tree. + */ +#include <asm/processor.h> +#include <linux/bitops.h> +#include <linux/threads.h> +#include <asm/pda.h> + +extern pud_t level3_kernel_pgt[512]; +extern pud_t level3_ident_pgt[512]; +extern pmd_t level2_kernel_pgt[512]; +extern pmd_t level2_fixmap_pgt[512]; +extern pmd_t level2_ident_pgt[512]; +extern pgd_t init_level4_pgt[]; + +#define swapper_pg_dir init_level4_pgt + +extern void paging_init(void); + +#endif /* !__ASSEMBLY__ */ + +#define SHARED_KERNEL_PMD 0 + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 + +/* + * 3rd level page + */ +#define PUD_SHIFT 30 +#define PTRS_PER_PUD 512 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#ifndef __ASSEMBLY__ + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pud_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pgd_val(e)) + +#define pgd_none(x) (!pgd_val(x)) +#define pud_none(x) (!pud_val(x)) + +struct mm_struct; + +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); + + +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + *ptep = native_make_pte(0); +} + +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + *ptep = pte; +} + +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte(ptep, pte); +} + +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ +#ifdef CONFIG_SMP + return native_make_pte(xchg(&xp->pte, 0)); +#else + /* native_local_ptep_get_and_clear, + but duplicated because of cyclic dependency */ + pte_t ret = *xp; + native_pte_clear(NULL, 0, xp); + return ret; +#endif +} + +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + native_set_pmd(pmd, native_make_pmd(0)); +} + +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; +} + +static inline void native_pud_clear(pud_t *pud) +{ + native_set_pud(pud, native_make_pud(0)); +} + +static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; +} + +static inline void native_pgd_clear(pgd_t *pgd) +{ + native_set_pgd(pgd, native_make_pgd(0)); +} + +#define pte_same(a, b) ((a).pte == (b).pte) + +#endif /* !__ASSEMBLY__ */ + +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + + +#define MAXMEM _AC(0x00003fffffffffff, UL) +#define VMALLOC_START _AC(0xffffc20000000000, UL) +#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffe20000000000, UL) +#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_END _AC(0xffffffffff000000, UL) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#ifndef __ASSEMBLY__ + +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +} + +static inline int pud_bad(pud_t pud) +{ + return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +} + +static inline int pmd_bad(pmd_t pmd) +{ + return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +} + +#define pte_none(x) (!pte_val((x))) +#define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ + +/* + * Macro to mark a page protection value as "uncacheable". + */ +#define pgprot_noncached(prot) \ + (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +/* + * Level 4 access. + */ +#define pgd_page_vaddr(pgd) \ + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) +#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) +#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) +static inline int pgd_large(pgd_t pgd) { return 0; } +#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) + +/* PUD - Level3 access */ +/* to find an entry in a page-table-directory. */ +#define pud_page_vaddr(pud) \ + ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK)) +#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgd, address) \ + ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) +#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT) + +static inline int pud_large(pud_t pte) +{ + return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} + +/* PMD - Level 2 access */ +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) +#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) + +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ + pmd_index(address)) +#define pmd_none(x) (!pmd_val((x))) +#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) +#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) +#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) + +#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) +#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ + _PAGE_FILE }) +#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT + +/* PTE - Level 1 access. */ + +/* page, protection -> pte */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot)) + +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ + pte_index((address))) + +/* x86-64 always has all page tables mapped. */ +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) +#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) +#define pte_unmap(pte) /* NOP */ +#define pte_unmap_nested(pte) /* NOP */ + +#define update_mmu_cache(vma, address, pte) do { } while (0) + +extern int direct_gbpages; + +/* Encode and de-code a swap entry */ +#define __swp_type(x) (((x).val >> 1) & 0x3f) +#define __swp_offset(x) ((x).val >> 8) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ + ((offset) << 8) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) + +extern int kern_addr_valid(unsigned long addr); +extern void cleanup_highmap(void); + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#define pgtable_cache_init() do { } while (0) +#define check_pgt_cache() do { } while (0) + +#define PAGE_AGP PAGE_KERNEL_NOCACHE +#define HAVE_PAGE_AGP 1 + +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) +#define kc_offset_to_vaddr(o) \ + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \ + ? ((o) | ~__VIRTUAL_MASK) \ + : (o)) + +#define __HAVE_ARCH_PTE_SAME +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/poll.h b/arch/x86/include/asm/poll.h new file mode 100644 index 00000000000..c98509d3149 --- /dev/null +++ b/arch/x86/include/asm/poll.h @@ -0,0 +1 @@ +#include <asm-generic/poll.h> diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h new file mode 100644 index 00000000000..bb7133dc155 --- /dev/null +++ b/arch/x86/include/asm/posix_types.h @@ -0,0 +1,13 @@ +#ifdef __KERNEL__ +# ifdef CONFIG_X86_32 +# include "posix_types_32.h" +# else +# include "posix_types_64.h" +# endif +#else +# ifdef __i386__ +# include "posix_types_32.h" +# else +# include "posix_types_64.h" +# endif +#endif diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h new file mode 100644 index 00000000000..f7d9adf82e5 --- /dev/null +++ b/arch/x86/include/asm/posix_types_32.h @@ -0,0 +1,85 @@ +#ifndef _ASM_X86_POSIX_TYPES_32_H +#define _ASM_X86_POSIX_TYPES_32_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef unsigned short __kernel_old_dev_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +#endif + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +#if defined(__KERNEL__) + +#undef __FD_SET +#define __FD_SET(fd,fdsetp) \ + asm volatile("btsl %1,%0": \ + "+m" (*(__kernel_fd_set *)(fdsetp)) \ + : "r" ((int)(fd))) + +#undef __FD_CLR +#define __FD_CLR(fd,fdsetp) \ + asm volatile("btrl %1,%0": \ + "+m" (*(__kernel_fd_set *)(fdsetp)) \ + : "r" ((int) (fd))) + +#undef __FD_ISSET +#define __FD_ISSET(fd,fdsetp) \ + (__extension__ \ + ({ \ + unsigned char __result; \ + asm volatile("btl %1,%2 ; setb %0" \ + : "=q" (__result) \ + : "r" ((int)(fd)), \ + "m" (*(__kernel_fd_set *)(fdsetp))); \ + __result; \ +})) + +#undef __FD_ZERO +#define __FD_ZERO(fdsetp) \ +do { \ + int __d0, __d1; \ + asm volatile("cld ; rep ; stosl" \ + : "=m" (*(__kernel_fd_set *)(fdsetp)), \ + "=&c" (__d0), "=&D" (__d1) \ + : "a" (0), "1" (__FDSET_LONGS), \ + "2" ((__kernel_fd_set *)(fdsetp)) \ + : "memory"); \ +} while (0) + +#endif /* defined(__KERNEL__) */ + +#endif /* _ASM_X86_POSIX_TYPES_32_H */ diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/asm/posix_types_64.h new file mode 100644 index 00000000000..eb8d2d92b63 --- /dev/null +++ b/arch/x86/include/asm/posix_types_64.h @@ -0,0 +1,119 @@ +#ifndef _ASM_X86_POSIX_TYPES_64_H +#define _ASM_X86_POSIX_TYPES_64_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned long __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +#endif + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; + +typedef unsigned long __kernel_old_dev_t; + +#ifdef __KERNEL__ + +#undef __FD_SET +static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] |= (1UL<<_rem); +} + +#undef __FD_CLR +static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); +} + +#undef __FD_ISSET +static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant cases (8 or 32 longs, + * for 256 and 1024-bit fd_sets respectively) + */ +#undef __FD_ZERO +static inline void __FD_ZERO(__kernel_fd_set *p) +{ + unsigned long *tmp = p->fds_bits; + int i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 32: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; + tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; + tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; + tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; + return; + case 16: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + return; + case 8: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + return; + case 4: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + return; + } + } + i = __FDSET_LONGS; + while (i) { + i--; + *tmp = 0; + tmp++; + } +} + +#endif /* defined(__KERNEL__) */ + +#endif /* _ASM_X86_POSIX_TYPES_64_H */ diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h new file mode 100644 index 00000000000..fe681147a4f --- /dev/null +++ b/arch/x86/include/asm/prctl.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_PRCTL_H +#define _ASM_X86_PRCTL_H + +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 + + +#endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h new file mode 100644 index 00000000000..1198f2a0e42 --- /dev/null +++ b/arch/x86/include/asm/processor-cyrix.h @@ -0,0 +1,38 @@ +/* + * NSC/Cyrix CPU indexed register access. Must be inlined instead of + * macros to ensure correct access ordering + * Access order is always 0x22 (=offset), 0x23 (=value) + * + * When using the old macros a line like + * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + * gets expanded to: + * do { + * outb((CX86_CCR2), 0x22); + * outb((({ + * outb((CX86_CCR2), 0x22); + * inb(0x23); + * }) | 0x88), 0x23); + * } while (0); + * + * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23). + */ + +static inline u8 getCx86(u8 reg) +{ + outb(reg, 0x22); + return inb(0x23); +} + +static inline void setCx86(u8 reg, u8 data) +{ + outb(reg, 0x22); + outb(data, 0x23); +} + +#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86_old(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h new file mode 100644 index 00000000000..7a3e836eb2a --- /dev/null +++ b/arch/x86/include/asm/processor-flags.h @@ -0,0 +1,100 @@ +#ifndef _ASM_X86_PROCESSOR_FLAGS_H +#define _ASM_X86_PROCESSOR_FLAGS_H +/* Various flags defined: can be included from assembler. */ + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE 0x00000001 /* Protection Enable */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ +#define X86_CR0_EM 0x00000004 /* Emulation */ +#define X86_CR0_TS 0x00000008 /* Task Switched */ +#define X86_CR0_ET 0x00000010 /* Extension Type */ +#define X86_CR0_NE 0x00000020 /* Numeric Error */ +#define X86_CR0_WP 0x00010000 /* Write Protect */ +#define X86_CR0_AM 0x00040000 /* Alignment Mask */ +#define X86_CR0_NW 0x20000000 /* Not Write-through */ +#define X86_CR0_CD 0x40000000 /* Cache Disable */ +#define X86_CR0_PG 0x80000000 /* Paging */ + +/* + * Paging options in CR3 + */ +#define X86_CR3_PWT 0x00000008 /* Page Write Through */ +#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x00000008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x00000040 /* Machine check enable */ +#define X86_CR4_PGE 0x00000080 /* enable global pages */ +#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ +#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ +#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ + +/* + * x86-64 Task Priority Register, CR8 + */ +#define X86_CR8_TPR 0x0000000F /* task priority register */ + +/* + * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> + */ + +/* + * NSC/Cyrix CPU configuration register indexes + */ +#define CX86_PCR0 0x20 +#define CX86_GCR 0xb8 +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_PCR1 0xf0 +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +#ifdef __KERNEL__ +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif +#endif + +#endif /* _ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h new file mode 100644 index 00000000000..5ca01e38326 --- /dev/null +++ b/arch/x86/include/asm/processor.h @@ -0,0 +1,936 @@ +#ifndef _ASM_X86_PROCESSOR_H +#define _ASM_X86_PROCESSOR_H + +#include <asm/processor-flags.h> + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +#include <asm/vm86.h> +#include <asm/math_emu.h> +#include <asm/segment.h> +#include <asm/types.h> +#include <asm/sigcontext.h> +#include <asm/current.h> +#include <asm/cpufeature.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/percpu.h> +#include <asm/msr.h> +#include <asm/desc_defs.h> +#include <asm/nops.h> +#include <asm/ds.h> + +#include <linux/personality.h> +#include <linux/cpumask.h> +#include <linux/cache.h> +#include <linux/threads.h> +#include <linux/init.h> + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +static inline void *current_text_addr(void) +{ + void *pc; + + asm volatile("mov $1f, %0; 1:":"=r" (pc)); + + return pc; +} + +#ifdef CONFIG_X86_VSMP +# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) +# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) +#else +# define ARCH_MIN_TASKALIGN 16 +# define ARCH_MIN_MMSTRUCT_ALIGN 0 +#endif + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; +#ifdef CONFIG_X86_32 + char wp_works_ok; /* It doesn't on 386's */ + + /* Problems on some 486Dx4's and old 386's: */ + char hlt_works_ok; + char hard_math; + char rfu; + char fdiv_bug; + char f00f_bug; + char coma_bug; + char pad0; +#else + /* Number of 4K pages in DTLB/ITLB combined(in pages): */ + int x86_tlbsize; + __u8 x86_virt_bits; + __u8 x86_phys_bits; +#endif + /* CPUID returned core id bits: */ + __u8 x86_coreid_bits; + /* Max extended CPUID function supported: */ + __u32 extended_cpuid_level; + /* Maximum supported CPUID level, -1=no CPUID: */ + int cpuid_level; + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + /* in KB - valid for CPUS which support this call: */ + int x86_cache_size; + int x86_cache_alignment; /* In bytes */ + int x86_power; + unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + /* cpus sharing the last level cache: */ + cpumask_t llc_shared_map; +#endif + /* cpuid returned max cores value: */ + u16 x86_max_cores; + u16 apicid; + u16 initial_apicid; + u16 x86_clflush_size; +#ifdef CONFIG_SMP + /* number of cores as seen by the OS: */ + u16 booted_cores; + /* Physical processor id: */ + u16 phys_proc_id; + /* Core id: */ + u16 cpu_core_id; + /* Index into per_cpu list: */ + u16 cpu_index; +#endif +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_NUM 9 + +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ +extern struct cpuinfo_x86 boot_cpu_data; +extern struct cpuinfo_x86 new_cpu_data; + +extern struct tss_struct doublefault_tss; +extern __u32 cleared_cpu_caps[NCAPINTS]; + +#ifdef CONFIG_SMP +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data __get_cpu_var(cpu_info) +#else +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data +#endif + +extern const struct seq_operations cpuinfo_op; + +static inline int hlt_works(int cpu) +{ +#ifdef CONFIG_X86_32 + return cpu_data(cpu).hlt_works_ok; +#else + return 1; +#endif +} + +#define cache_line_size() (boot_cpu_data.x86_cache_alignment) + +extern void cpu_detect(struct cpuinfo_x86 *c); + +extern struct pt_regs *idle_regs(struct pt_regs *); + +extern void early_cpu_init(void); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; + +extern void detect_extended_topology(struct cpuinfo_x86 *c); +extern void detect_ht(struct cpuinfo_x86 *c); + +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void load_cr3(pgd_t *pgdir) +{ + write_cr3(__pa(pgdir)); +} + +#ifdef CONFIG_X86_32 +/* This is the TSS defined by the hardware. */ +struct x86_hw_tss { + unsigned short back_link, __blh; + unsigned long sp0; + unsigned short ss0, __ss0h; + unsigned long sp1; + /* ss1 caches MSR_IA32_SYSENTER_CS: */ + unsigned short ss1, __ss1h; + unsigned long sp2; + unsigned short ss2, __ss2h; + unsigned long __cr3; + unsigned long ip; + unsigned long flags; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long bx; + unsigned long sp; + unsigned long bp; + unsigned long si; + unsigned long di; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace; + unsigned short io_bitmap_base; + +} __attribute__((packed)); +#else +struct x86_hw_tss { + u32 reserved1; + u64 sp0; + u64 sp1; + u64 sp2; + u64 reserved2; + u64 ist[7]; + u32 reserved3; + u32 reserved4; + u16 reserved5; + u16 io_bitmap_base; + +} __attribute__((packed)) ____cacheline_aligned; +#endif + +/* + * IO-bitmap sizes: + */ +#define IO_BITMAP_BITS 65536 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 + +struct tss_struct { + /* + * The hardware state: + */ + struct x86_hw_tss x86_tss; + + /* + * The extra 1 is there because the CPU will access an + * additional byte beyond the end of the IO permission + * bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ + unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + /* + * Cache the current maximum and the last task that used the bitmap: + */ + unsigned long io_bitmap_max; + struct thread_struct *io_bitmap_owner; + + /* + * .. and then another 0x100 bytes for the emergency kernel stack: + */ + unsigned long stack[64]; + +} ____cacheline_aligned; + +DECLARE_PER_CPU(struct tss_struct, init_tss); + +/* + * Save the original ist values for checking stack pointers during debugging + */ +struct orig_ist { + unsigned long ist[7]; +}; + +#define MXCSR_DEFAULT 0x1f80 + +struct i387_fsave_struct { + u32 cwd; /* FPU Control Word */ + u32 swd; /* FPU Status Word */ + u32 twd; /* FPU Tag Word */ + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Pointer Offset */ + u32 fos; /* FPU Operand Pointer Selector */ + + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + + /* Software status information [not touched by FSAVE ]: */ + u32 status; +}; + +struct i387_fxsave_struct { + u16 cwd; /* Control Word */ + u16 swd; /* Status Word */ + u16 twd; /* Tag Word */ + u16 fop; /* Last Instruction Opcode */ + union { + struct { + u64 rip; /* Instruction Pointer */ + u64 rdp; /* Data Pointer */ + }; + struct { + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Offset */ + u32 fos; /* FPU Operand Selector */ + }; + }; + u32 mxcsr; /* MXCSR Register State */ + u32 mxcsr_mask; /* MXCSR Mask */ + + /* 8*16 bytes for each FP-reg = 128 bytes: */ + u32 st_space[32]; + + /* 16*16 bytes for each XMM-reg = 256 bytes: */ + u32 xmm_space[64]; + + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; + +} __attribute__((aligned(16))); + +struct i387_soft_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + u8 ftop; + u8 changed; + u8 lookahead; + u8 no_update; + u8 rm; + u8 alimit; + struct info *info; + u32 entry_eip; +}; + +struct xsave_hdr_struct { + u64 xstate_bv; + u64 reserved1[2]; + u64 reserved2[5]; +} __attribute__((packed)); + +struct xsave_struct { + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; + /* new processor state extensions will go here */ +} __attribute__ ((packed, aligned (64))); + +union thread_xstate { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; + struct xsave_struct xsave; +}; + +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(struct orig_ist, orig_ist); +#endif + +extern void print_cpu_info(struct cpuinfo_x86 *); +extern unsigned int xstate_size; +extern void free_thread_xstate(struct task_struct *); +extern struct kmem_cache *task_xstate_cachep; +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; + +struct thread_struct { + /* Cached TLS descriptors: */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; + unsigned long sp0; + unsigned long sp; +#ifdef CONFIG_X86_32 + unsigned long sysenter_cs; +#else + unsigned long usersp; /* Copy from PDA */ + unsigned short es; + unsigned short ds; + unsigned short fsindex; + unsigned short gsindex; +#endif + unsigned long ip; + unsigned long fs; + unsigned long gs; + /* Hardware debugging registers: */ + unsigned long debugreg0; + unsigned long debugreg1; + unsigned long debugreg2; + unsigned long debugreg3; + unsigned long debugreg6; + unsigned long debugreg7; + /* Fault info: */ + unsigned long cr2; + unsigned long trap_no; + unsigned long error_code; + /* floating point and extended processor state */ + union thread_xstate *xstate; +#ifdef CONFIG_X86_32 + /* Virtual 86 mode info */ + struct vm86_struct __user *vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags; + unsigned long v86mask; + unsigned long saved_sp0; + unsigned int saved_fs; + unsigned int saved_gs; +#endif + /* IO permissions: */ + unsigned long *io_bitmap_ptr; + unsigned long iopl; + /* Max allowed port in the bitmap, in bytes: */ + unsigned io_bitmap_max; +/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ + unsigned long debugctlmsr; +#ifdef CONFIG_X86_DS +/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + struct ds_context *ds_ctx; +#endif /* CONFIG_X86_DS */ +#ifdef CONFIG_X86_PTRACE_BTS +/* the signal to send on a bts buffer overflow */ + unsigned int bts_ovfl_signal; +#endif /* CONFIG_X86_PTRACE_BTS */ +}; + +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("mov %%db0, %0" :"=r" (val)); + break; + case 1: + asm("mov %%db1, %0" :"=r" (val)); + break; + case 2: + asm("mov %%db2, %0" :"=r" (val)); + break; + case 3: + asm("mov %%db3, %0" :"=r" (val)); + break; + case 6: + asm("mov %%db6, %0" :"=r" (val)); + break; + case 7: + asm("mov %%db7, %0" :"=r" (val)); + break; + default: + BUG(); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("mov %0, %%db0" ::"r" (value)); + break; + case 1: + asm("mov %0, %%db1" ::"r" (value)); + break; + case 2: + asm("mov %0, %%db2" ::"r" (value)); + break; + case 3: + asm("mov %0, %%db3" ::"r" (value)); + break; + case 6: + asm("mov %0, %%db6" ::"r" (value)); + break; + case 7: + asm("mov %0, %%db7" ::"r" (value)); + break; + default: + BUG(); + } +} + +/* + * Set IOPL bits in EFLAGS from given mask + */ +static inline void native_set_iopl_mask(unsigned mask) +{ +#ifdef CONFIG_X86_32 + unsigned int reg; + + asm volatile ("pushfl;" + "popl %0;" + "andl %1, %0;" + "orl %2, %0;" + "pushl %0;" + "popfl" + : "=&r" (reg) + : "i" (~X86_EFLAGS_IOPL), "r" (mask)); +#endif +} + +static inline void +native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) +{ + tss->x86_tss.sp0 = thread->sp0; +#ifdef CONFIG_X86_32 + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { + tss->x86_tss.ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +#endif +} + +static inline void native_swapgs(void) +{ +#ifdef CONFIG_X86_64 + asm volatile("swapgs" ::: "memory"); +#endif +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __cpuid native_cpuid +#define paravirt_enabled() 0 + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) + +static inline void load_sp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + native_load_sp0(tss, thread); +} + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + +/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up + * after us can get the correct flags. + */ +extern unsigned long mmu_cr4_features; + +static inline void set_in_cr4(unsigned long mask) +{ + unsigned cr4; + + mmu_cr4_features |= mask; + cr4 = read_cr4(); + cr4 |= mask; + write_cr4(cr4); +} + +static inline void clear_in_cr4(unsigned long mask) +{ + unsigned cr4; + + mmu_cr4_features &= ~mask; + cr4 = read_cr4(); + cr4 &= ~mask; + write_cr4(cr4); +} + +typedef struct { + unsigned long seg; +} mm_segment_t; + + +/* + * create a kernel thread without removing it from tasklists + */ +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); + +/* Prepare to copy thread state - unlazy all lazy state */ +extern void prepare_to_copy(struct task_struct *tsk); + +unsigned long get_wchan(struct task_struct *p); + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return eax; +} + +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ebx; +} + +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ecx; +} + +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return edx; +} + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + +static inline void cpu_relax(void) +{ + rep_nop(); +} + +/* Stop speculative execution: */ +static inline void sync_core(void) +{ + int tmp; + + asm volatile("cpuid" : "=a" (tmp) : "0" (1) + : "ebx", "ecx", "edx", "memory"); +} + +static inline void __monitor(const void *eax, unsigned long ecx, + unsigned long edx) +{ + /* "monitor %eax, %ecx, %edx;" */ + asm volatile(".byte 0x0f, 0x01, 0xc8;" + :: "a" (eax), "c" (ecx), "d"(edx)); +} + +static inline void __mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax, %ecx;" */ + asm volatile(".byte 0x0f, 0x01, 0xc9;" + :: "a" (eax), "c" (ecx)); +} + +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + trace_hardirqs_on(); + /* "mwait %eax, %ecx;" */ + asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" + :: "a" (eax), "c" (ecx)); +} + +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); + +extern void select_idle_routine(const struct cpuinfo_x86 *c); + +extern unsigned long boot_option_idle_override; +extern unsigned long idle_halt; +extern unsigned long idle_nomwait; + +/* + * on systems with caches, caches must be flashed as the absolute + * last instruction before going into a suspended halt. Otherwise, + * dirty data can linger in the cache and become stale on resume, + * leading to strange errors. + * + * perform a variety of operations to guarantee that the compiler + * will not reorder instructions. wbinvd itself is serializing + * so the processor will not reorder. + * + * Systems without cache can just go into halt. + */ +static inline void wbinvd_halt(void) +{ + mb(); + /* check for clflush to determine if wbinvd is legal */ + if (cpu_has_clflush) + asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); + else + while (1) + halt(); +} + +extern void enable_sep_cpu(void); +extern int sysenter_setup(void); + +/* Defined in head.S */ +extern struct desc_ptr early_gdt_descr; + +extern void cpu_set_gdt(int); +extern void switch_to_new_gdt(void); +extern void cpu_init(void); +extern void init_gdt(int cpu); + +static inline void update_debugctlmsr(unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); +} + +/* + * from system description table in BIOS. Mostly for MCA use, but + * others may find it useful: + */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; + +/* Boot loader type from the setup header: */ +extern int bootloader_type; + +extern char ignore_fpu_irq; + +#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#ifdef CONFIG_X86_32 +# define BASE_PREFETCH ASM_NOP4 +# define ARCH_HAS_PREFETCH +#else +# define BASE_PREFETCH "prefetcht0 (%1)" +#endif + +/* + * Prefetch instructions for Pentium III (+) and AMD Athlon (+) + * + * It's not worth to care about 3dnow prefetches for the K6 + * because they are microcoded there and very slow. + */ +static inline void prefetch(const void *x) +{ + alternative_input(BASE_PREFETCH, + "prefetchnta (%1)", + X86_FEATURE_XMM, + "r" (x)); +} + +/* + * 3dnow prefetch to get an exclusive cache line. + * Useful for spinlocks to avoid one state transition in the + * cache coherency protocol: + */ +static inline void prefetchw(const void *x) +{ + alternative_input(BASE_PREFETCH, + "prefetchw (%1)", + X86_FEATURE_3DNOW, + "r" (x)); +} + +static inline void spin_lock_prefetch(const void *x) +{ + prefetchw(x); +} + +#ifdef CONFIG_X86_32 +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE PAGE_OFFSET +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + +#define INIT_THREAD { \ + .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ + .fs = __KERNEL_PERCPU, \ +} + +/* + * Note that the .io_bitmap member must be extra-big. This is because + * the CPU will access an additional byte beyond the end of the IO + * permission bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ +#define INIT_TSS { \ + .x86_tss = { \ + .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + }, \ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ +} + +extern unsigned long thread_saved_pc(struct task_struct *tsk); + +#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) +#define KSTK_TOP(info) \ +({ \ + unsigned long *__ptr = (unsigned long *)(info); \ + (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ +}) + +/* + * The below -8 is to reserve 8 bytes on top of the ring0 stack. + * This is necessary to guarantee that the entire "struct pt_regs" + * is accessable even if the CPU haven't stored the SS/ESP registers + * on the stack (interrupt gate does not save these registers + * when switching to the same priv ring). + * Therefore beware: accessing the ss/esp fields of the + * "struct pt_regs" is possible, but they may contain the + * completely wrong values. + */ +#define task_pt_regs(task) \ +({ \ + struct pt_regs *__regs__; \ + __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ - 1; \ +}) + +#define KSTK_ESP(task) (task_pt_regs(task)->sp) + +#else +/* + * User space process size. 47bits minus one guard page. + */ +#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) + +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX TASK_SIZE64 + +#define INIT_THREAD { \ + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} + +#define INIT_TSS { \ + .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} + +/* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) + +#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) +#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ +#endif /* CONFIG_X86_64 */ + +extern void start_thread(struct pt_regs *regs, unsigned long new_ip, + unsigned long new_sp); + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + +#define KSTK_EIP(task) (task_pt_regs(task)->ip) + +/* Get/set a process' ability to use the timestamp counter instruction */ +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) + +extern int get_tsc_mode(unsigned long adr); +extern int set_tsc_mode(unsigned int val); + +#endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h new file mode 100644 index 00000000000..d6a22f92ba7 --- /dev/null +++ b/arch/x86/include/asm/proto.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_PROTO_H +#define _ASM_X86_PROTO_H + +#include <asm/ldt.h> + +/* misc architecture specific prototypes */ + +extern void early_idt_handler(void); + +extern void system_call(void); +extern void syscall_init(void); + +extern void ia32_syscall(void); +extern void ia32_cstar_target(void); +extern void ia32_sysenter_target(void); + +extern void syscall32_cpu_init(void); + +extern void check_efer(void); + +#ifdef CONFIG_X86_BIOS_REBOOT +extern int reboot_force; +#else +static const int reboot_force = 0; +#endif + +long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); + +#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) +#define round_down(x, y) ((x) & ~((y) - 1)) + +#endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h new file mode 100644 index 00000000000..25f1bb8fc62 --- /dev/null +++ b/arch/x86/include/asm/ptrace-abi.h @@ -0,0 +1,145 @@ +#ifndef _ASM_X86_PTRACE_ABI_H +#define _ASM_X86_PTRACE_ABI_H + +#ifdef __i386__ + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + +#else /* __i386__ */ + +#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 +/* arguments: interrupts/non tracing syscalls only save upto here*/ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* = ERROR */ +/* end of arguments */ +/* cpu exception frame or undefined in case of fast syscall. */ +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 +#define ARGOFFSET R11 +#endif /* __ASSEMBLY__ */ + +/* top of stack page */ +#define FRAME_SIZE 168 + +#endif /* !__i386__ */ + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#define PTRACE_OLDSETOPTIONS 21 + +/* only useful for access 32bit programs / kernels */ +#define PTRACE_GET_THREAD_AREA 25 +#define PTRACE_SET_THREAD_AREA 26 + +#ifdef __x86_64__ +# define PTRACE_ARCH_PRCTL 30 +#endif + +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + +#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ + +#ifdef CONFIG_X86_PTRACE_BTS + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +/* configuration/status structure used in PTRACE_BTS_CONFIG and + PTRACE_BTS_STATUS commands. +*/ +struct ptrace_bts_config { + /* requested or actual size of BTS buffer in bytes */ + __u32 size; + /* bitmask of below flags */ + __u32 flags; + /* buffer overflow signal */ + __u32 signal; + /* actual size of bts_struct in bytes */ + __u32 bts_size; +}; +#endif /* __ASSEMBLY__ */ + +#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ +#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ +#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow + instead of wrapping around */ +#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ + +#define PTRACE_BTS_CONFIG 40 +/* Configure branch trace recording. + ADDR points to a struct ptrace_bts_config. + DATA gives the size of that buffer. + A new buffer is allocated, if requested in the flags. + An overflow signal may only be requested for new buffers. + Returns the number of bytes read. +*/ +#define PTRACE_BTS_STATUS 41 +/* Return the current configuration in a struct ptrace_bts_config + pointed to by ADDR; DATA gives the size of that buffer. + Returns the number of bytes written. +*/ +#define PTRACE_BTS_SIZE 42 +/* Return the number of available BTS records for draining. + DATA and ADDR are ignored. +*/ +#define PTRACE_BTS_GET 43 +/* Get a single BTS record. + DATA defines the index into the BTS array, where 0 is the newest + entry, and higher indices refer to older entries. + ADDR is pointing to struct bts_struct (see asm/ds.h). +*/ +#define PTRACE_BTS_CLEAR 44 +/* Clear the BTS buffer. + DATA and ADDR are ignored. +*/ +#define PTRACE_BTS_DRAIN 45 +/* Read all available BTS records and clear the buffer. + ADDR points to an array of struct bts_struct. + DATA gives the size of that buffer. + BTS records are read from oldest to newest. + Returns number of BTS records drained. +*/ +#endif /* CONFIG_X86_PTRACE_BTS */ + +#endif /* _ASM_X86_PTRACE_ABI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h new file mode 100644 index 00000000000..d1531c8480b --- /dev/null +++ b/arch/x86/include/asm/ptrace.h @@ -0,0 +1,280 @@ +#ifndef _ASM_X86_PTRACE_H +#define _ASM_X86_PTRACE_H + +#include <linux/compiler.h> /* For __user */ +#include <asm/ptrace-abi.h> +#include <asm/processor-flags.h> + +#ifdef __KERNEL__ +#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ +#include <asm/segment.h> +#endif + +#ifndef __ASSEMBLY__ + +#ifdef __i386__ +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +#ifndef __KERNEL__ + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + int xfs; + /* int gs; */ + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +#else /* __KERNEL__ */ + +struct pt_regs { + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned long ds; + unsigned long es; + unsigned long fs; + /* int gs; */ + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +}; + +#endif /* __KERNEL__ */ + +#else /* __i386__ */ + +#ifndef __KERNEL__ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ +}; + +#else /* __KERNEL__ */ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +/* top of stack page */ +}; + +#endif /* __KERNEL__ */ +#endif /* !__i386__ */ + + +#ifdef CONFIG_X86_PTRACE_BTS +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from_ip; + __u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + __u64 jiffies; + } variant; +}; +#endif /* CONFIG_X86_PTRACE_BTS */ + +#ifdef __KERNEL__ + +#include <linux/init.h> + +struct cpuinfo_x86; +struct task_struct; + +#ifdef CONFIG_X86_PTRACE_BTS +extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); +extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); +#else +#define ptrace_bts_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_PTRACE_BTS */ + +extern unsigned long profile_pc(struct pt_regs *regs); + +extern unsigned long +convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code); +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +extern long syscall_trace_enter(struct pt_regs *); +extern void syscall_trace_leave(struct pt_regs *); + +static inline unsigned long regs_return_value(struct pt_regs *regs) +{ + return regs->ax; +} + +/* + * user_mode_vm(regs) determines whether a register set came from user mode. + * This is true if V8086 mode was enabled OR if the register set was from + * protected mode with RPL-3 CS value. This tricky test checks that with + * one comparison. Many places in the kernel can bypass this full check + * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + */ +static inline int user_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; +#else + return !!(regs->cs & 3); +#endif +} + +static inline int user_mode_vm(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= + USER_RPL; +#else + return user_mode(regs); +#endif +} + +static inline int v8086_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (regs->flags & X86_VM_MASK); +#else + return 0; /* No V86 mode support in long mode */ +#endif +} + +/* + * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode + * when it traps. So regs will be the current sp. + * + * This is valid only for kernel mode traps. + */ +static inline unsigned long kernel_trap_sp(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_32 + return (unsigned long)regs; +#else + return regs->sp; +#endif +} + +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->ip; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->bp; +} + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + +/* + * These are defined as per linux/ptrace.h, which see. + */ +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); + +extern void user_enable_block_step(struct task_struct *); +#ifdef CONFIG_X86_DEBUGCTLMSR +#define arch_has_block_step() (1) +#else +#define arch_has_block_step() (boot_cpu_data.x86 >= 6) +#endif + +struct user_desc; +extern int do_get_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info); +extern int do_set_thread_area(struct task_struct *p, int idx, + struct user_desc __user *info, int can_allocate); + +#define __ARCH_WANT_COMPAT_SYS_PTRACE + +#endif /* __KERNEL__ */ + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h new file mode 100644 index 00000000000..6d93508f262 --- /dev/null +++ b/arch/x86/include/asm/pvclock-abi.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_PVCLOCK_ABI_H +#define _ASM_X86_PVCLOCK_ABI_H +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h new file mode 100644 index 00000000000..53235fd5f8c --- /dev/null +++ b/arch/x86/include/asm/pvclock.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_PVCLOCK_H +#define _ASM_X86_PVCLOCK_H + +#include <linux/clocksource.h> +#include <asm/pvclock-abi.h> + +/* some helper functions for xen and kvm pv clock sources */ +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); +void pvclock_read_wallclock(struct pvclock_wall_clock *wall, + struct pvclock_vcpu_time_info *vcpu, + struct timespec *ts); + +#endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h new file mode 100644 index 00000000000..df7710354f8 --- /dev/null +++ b/arch/x86/include/asm/reboot.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_REBOOT_H +#define _ASM_X86_REBOOT_H + +struct pt_regs; + +struct machine_ops { + void (*restart)(char *cmd); + void (*halt)(void); + void (*power_off)(void); + void (*shutdown)(void); + void (*crash_shutdown)(struct pt_regs *); + void (*emergency_restart)(void); +}; + +extern struct machine_ops machine_ops; + +void native_machine_crash_shutdown(struct pt_regs *regs); +void native_machine_shutdown(void); +void machine_real_restart(const unsigned char *code, int length); + +#endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/include/asm/reboot_fixups.h b/arch/x86/include/asm/reboot_fixups.h new file mode 100644 index 00000000000..765debe4c54 --- /dev/null +++ b/arch/x86/include/asm/reboot_fixups.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_REBOOT_FIXUPS_H +#define _ASM_X86_REBOOT_FIXUPS_H + +extern void mach_reboot_fixups(void); + +#endif /* _ASM_X86_REBOOT_FIXUPS_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h new file mode 100644 index 00000000000..d5cd6c58688 --- /dev/null +++ b/arch/x86/include/asm/required-features.h @@ -0,0 +1,82 @@ +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#define _ASM_X86_REQUIRED_FEATURES_H + +/* Define minimum CPUID feature set for kernel These bits are checked + really early to actually display a visible error message before the + kernel dies. Make sure to assign features to the proper mask! + + Some requirements that are not in CPUID yet are also in the + CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. + + The real information is in arch/x86/Kconfig.cpu, this just converts + the CONFIGs into a bitmask */ + +#ifndef CONFIG_MATH_EMULATION +# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) +#else +# define NEED_FPU 0 +#endif + +#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) +# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) +#else +# define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else +# define NEED_CX8 0 +#endif + +#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) +# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) +#else +# define NEED_CMOV 0 +#endif + +#ifdef CONFIG_X86_USE_3DNOW +# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) +#else +# define NEED_3DNOW 0 +#endif + +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + +#ifdef CONFIG_X86_64 +#define NEED_PSE 0 +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) +#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) +#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) +#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) +#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) +#define NEED_LM (1<<(X86_FEATURE_LM & 31)) +#else +#define NEED_PSE 0 +#define NEED_MSR 0 +#define NEED_PGE 0 +#define NEED_FXSR 0 +#define NEED_XMM 0 +#define NEED_XMM2 0 +#define NEED_LM 0 +#endif + +#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ + NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ + NEED_XMM|NEED_XMM2) +#define SSE_MASK (NEED_XMM|NEED_XMM2) + +#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) + +#define REQUIRED_MASK2 0 +#define REQUIRED_MASK3 (NEED_NOPL) +#define REQUIRED_MASK4 0 +#define REQUIRED_MASK5 0 +#define REQUIRED_MASK6 0 +#define REQUIRED_MASK7 0 + +#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/resource.h b/arch/x86/include/asm/resource.h new file mode 100644 index 00000000000..04bc4db8921 --- /dev/null +++ b/arch/x86/include/asm/resource.h @@ -0,0 +1 @@ +#include <asm-generic/resource.h> diff --git a/arch/x86/include/asm/resume-trace.h b/arch/x86/include/asm/resume-trace.h new file mode 100644 index 00000000000..3ff1c2cb1da --- /dev/null +++ b/arch/x86/include/asm/resume-trace.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_RESUME_TRACE_H +#define _ASM_X86_RESUME_TRACE_H + +#include <asm/asm.h> + +#define TRACE_RESUME(user) \ +do { \ + if (pm_trace_enabled) { \ + const void *tracedata; \ + asm volatile(_ASM_MOV " $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n\t" \ + _ASM_PTR " %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ +} while (0) + +#endif /* _ASM_X86_RESUME_TRACE_H */ diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h new file mode 100644 index 00000000000..97bab6388a9 --- /dev/null +++ b/arch/x86/include/asm/rio.h @@ -0,0 +1,63 @@ +/* + * Derived from include/asm-x86/mach-summit/mach_mpparse.h + * and include/asm-x86/mach-default/bios_ebda.h + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + */ + +#ifndef _ASM_X86_RIO_H +#define _ASM_X86_RIO_H + +#define RIO_TABLE_VERSION 3 + +struct rio_table_hdr { + u8 version; /* Version number of this data structure */ + u8 num_scal_dev; /* # of Scalability devices */ + u8 num_rio_dev; /* # of RIO I/O devices */ +} __attribute__((packed)); + +struct scal_detail { + u8 node_id; /* Scalability Node ID */ + u32 CBAR; /* Address of 1MB register space */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF = None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port2node; /* Node ID port connected to: 0xFF = None */ + u8 port2port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + u8 node_id; /* RIO Node ID */ + u32 BBAR; /* Address of 1MB register space */ + u8 type; /* Type of device */ + u8 owner_id; /* Node ID of Hurricane that owns this */ + /* node */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF=None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 first_slot; /* Lowest slot number below this Calgary */ + u8 status; /* Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie: */ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + u8 WP_index; /* instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + u8 chassis_num; /* 1 based Chassis number */ +} __attribute__((packed)); + +enum { + HURR_SCALABILTY = 0, /* Hurricane Scalability info */ + HURR_RIOIB = 2, /* Hurricane RIOIB info */ + COMPAT_CALGARY = 4, /* Compatibility Calgary */ + ALT_CALGARY = 5, /* Second Planar Calgary */ +}; + +#endif /* _ASM_X86_RIO_H */ diff --git a/arch/x86/include/asm/rtc.h b/arch/x86/include/asm/rtc.h new file mode 100644 index 00000000000..f71c3b0ed36 --- /dev/null +++ b/arch/x86/include/asm/rtc.h @@ -0,0 +1 @@ +#include <asm-generic/rtc.h> diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h new file mode 100644 index 00000000000..6a8c0d64510 --- /dev/null +++ b/arch/x86/include/asm/rwlock.h @@ -0,0 +1,8 @@ +#ifndef _ASM_X86_RWLOCK_H +#define _ASM_X86_RWLOCK_H + +#define RW_LOCK_BIAS 0x01000000 + +/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ + +#endif /* _ASM_X86_RWLOCK_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h new file mode 100644 index 00000000000..ca7517d3377 --- /dev/null +++ b/arch/x86/include/asm/rwsem.h @@ -0,0 +1,265 @@ +/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+ + * + * Written by David Howells (dhowells@redhat.com). + * + * Derived from asm-x86/semaphore.h + * + * + * The MSW of the count is the negated number of active writers and waiting + * lockers, and the LSW is the total number of active locks + * + * The lock count is initialized to 0 (no active and no waiting lockers). + * + * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an + * uncontended lock. This can be determined because XADD returns the old value. + * Readers increment by 1 and see a positive value when uncontended, negative + * if there are writers (and maybe) readers waiting (in which case it goes to + * sleep). + * + * The value of WAITING_BIAS supports up to 32766 waiting processes. This can + * be extended to 65534 by manually checking the whole MSW rather than relying + * on the S flag. + * + * The value of ACTIVE_BIAS supports up to 65535 active processes. + * + * This should be totally fair - if anything is waiting, a process that wants a + * lock will go to the back of the queue. When the currently active lock is + * released, if there's a writer at the front of the queue, then that and only + * that will be woken up; if there's a bunch of consequtive readers at the + * front, then they'll all be woken up, but no other readers will be. + */ + +#ifndef _ASM_X86_RWSEM_H +#define _ASM_X86_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/lockdep.h> + +struct rwsem_waiter; + +extern asmregparm struct rw_semaphore * + rwsem_down_read_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_down_write_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_wake(struct rw_semaphore *); +extern asmregparm struct rw_semaphore * + rwsem_downgrade_wake(struct rw_semaphore *sem); + +/* + * the semaphore definition + */ + +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + + +#define __RWSEM_INITIALIZER(name) \ +{ \ + RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ +} + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + asm volatile("# beginning down_read\n\t" + LOCK_PREFIX " incl (%%eax)\n\t" + /* adds 0x00000001, returns the old value */ + " jns 1f\n" + " call call_rwsem_down_read_failed\n" + "1:\n\t" + "# ending down_read\n\t" + : "+m" (sem->count) + : "a" (sem) + : "memory", "cc"); +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + __s32 result, tmp; + asm volatile("# beginning __down_read_trylock\n\t" + " movl %0,%1\n\t" + "1:\n\t" + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" + LOCK_PREFIX " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" + : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "i" (RWSEM_ACTIVE_READ_BIAS) + : "memory", "cc"); + return result >= 0 ? 1 : 0; +} + +/* + * lock for writing + */ +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + int tmp; + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile("# beginning down_write\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" + /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" + /* was the count 0 before? */ + " jz 1f\n" + " call call_rwsem_down_write_failed\n" + "1:\n" + "# ending down_write" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) + : "memory", "cc"); +} + +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + signed long ret = cmpxchg(&sem->count, + RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + if (ret == RWSEM_UNLOCKED_VALUE) + return 1; + return 0; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + asm volatile("# beginning __up_read\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" + /* subtracts 1, returns the old value */ + " jns 1f\n\t" + " call call_rwsem_wake\n" + "1:\n" + "# ending __up_read\n" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) + : "memory", "cc"); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + asm volatile("# beginning __up_write\n\t" + " movl %2,%%edx\n\t" + LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" + /* tries to transition + 0xffff0001 -> 0x00000000 */ + " jz 1f\n" + " call call_rwsem_wake\n" + "1:\n\t" + "# ending __up_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) + : "memory", "cc", "edx"); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + asm volatile("# beginning __downgrade_write\n\t" + LOCK_PREFIX " addl %2,(%%eax)\n\t" + /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " jns 1f\n\t" + " call call_rwsem_downgrade_wake\n" + "1:\n\t" + "# ending __downgrade_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_WAITING_BIAS) + : "memory", "cc"); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (sem->count) + : "ir" (delta)); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +{ + int tmp = delta; + + asm volatile(LOCK_PREFIX "xadd %0,%1" + : "+r" (tmp), "+m" (sem->count) + : : "memory"); + + return tmp + delta; +} + +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h new file mode 100644 index 00000000000..263d397d2ee --- /dev/null +++ b/arch/x86/include/asm/scatterlist.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_SCATTERLIST_H +#define _ASM_X86_SCATTERLIST_H + +#include <asm/types.h> + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; + unsigned int dma_length; +}; + +#define ARCH_HAS_SG_CHAIN +#define ISA_DMA_THRESHOLD (0x00ffffff) + +/* + * These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#ifdef CONFIG_X86_32 +# define sg_dma_len(sg) ((sg)->length) +#else +# define sg_dma_len(sg) ((sg)->dma_length) +#endif + +#endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h new file mode 100644 index 00000000000..c62e58a5a90 --- /dev/null +++ b/arch/x86/include/asm/seccomp.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "seccomp_32.h" +#else +# include "seccomp_64.h" +#endif diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h new file mode 100644 index 00000000000..a6ad87b352c --- /dev/null +++ b/arch/x86/include/asm/seccomp_32.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_SECCOMP_32_H +#define _ASM_X86_SECCOMP_32_H + +#include <linux/thread_info.h> + +#ifdef TIF_32BIT +#error "unexpected TIF_32BIT on i386" +#endif + +#include <linux/unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_sigreturn + +#endif /* _ASM_X86_SECCOMP_32_H */ diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h new file mode 100644 index 00000000000..4171bb794e9 --- /dev/null +++ b/arch/x86/include/asm/seccomp_64.h @@ -0,0 +1,25 @@ +#ifndef _ASM_X86_SECCOMP_64_H +#define _ASM_X86_SECCOMP_64_H + +#include <linux/thread_info.h> + +#ifdef TIF_32BIT +#error "unexpected TIF_32BIT on x86_64" +#else +#define TIF_32BIT TIF_IA32 +#endif + +#include <linux/unistd.h> +#include <asm/ia32_unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_rt_sigreturn + +#define __NR_seccomp_read_32 __NR_ia32_read +#define __NR_seccomp_write_32 __NR_ia32_write +#define __NR_seccomp_exit_32 __NR_ia32_exit +#define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn + +#endif /* _ASM_X86_SECCOMP_64_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h new file mode 100644 index 00000000000..2b8c5160388 --- /dev/null +++ b/arch/x86/include/asm/sections.h @@ -0,0 +1 @@ +#include <asm-generic/sections.h> diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h new file mode 100644 index 00000000000..1dc1b51ac62 --- /dev/null +++ b/arch/x86/include/asm/segment.h @@ -0,0 +1,209 @@ +#ifndef _ASM_X86_SEGMENT_H +#define _ASM_X86_SEGMENT_H + +/* Constructor for a conventional segment GDT (or LDT) entry */ +/* This is a macro so it can be used in initializers */ +#define GDT_ENTRY(flags, base, limit) \ + ((((base) & 0xff000000ULL) << (56-24)) | \ + (((flags) & 0x0000f0ffULL) << 40) | \ + (((limit) & 0x000f0000ULL) << (48-16)) | \ + (((base) & 0x00ffffffULL) << 16) | \ + (((limit) & 0x0000ffffULL))) + +/* Simple and small GDT entries for booting only */ + +#define GDT_ENTRY_BOOT_CS 2 +#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) + +#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) +#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) + +#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) +#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) + +#ifdef CONFIG_X86_32 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - unused <==== new cacheline + * 5 - unused + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 + * 9 - reserved + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - default user CS + * 15 - default user DS + * 16 - TSS + * 17 - LDT + * 18 - PNPBIOS support (16->32 gate) + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - PNPBIOS support + * 22 - PNPBIOS support + * 23 - APM BIOS support + * 24 - APM BIOS support + * 25 - APM BIOS support + * + * 26 - ESPFIX small SS + * 27 - per-cpu [ offset to per-cpu data area ] + * 28 - unused + * 29 - unused + * 30 - unused + * 31 - TSS for double fault handler + */ +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) + +#define GDT_ENTRY_DEFAULT_USER_CS 14 + +#define GDT_ENTRY_DEFAULT_USER_DS 15 + +#define GDT_ENTRY_KERNEL_BASE 12 + +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6) +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11) + +#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#ifdef CONFIG_SMP +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) +#else +#define __KERNEL_PERCPU 0 +#endif + +#define GDT_ENTRY_DOUBLEFAULT_TSS 31 + +/* + * The GDT has 32 entries + */ +#define GDT_ENTRIES 32 + +/* The PnP BIOS entries in the GDT */ +#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) +#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) +#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) +#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) +#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) + +/* The PnP BIOS selectors */ +#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ +#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ +#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ +#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ +#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ + +/* Bottom two bits of selector give the ring privilege level */ +#define SEGMENT_RPL_MASK 0x3 +/* Bit 2 is table indicator (LDT/GDT) */ +#define SEGMENT_TI_MASK 0x4 + +/* User mode is privilege level 3 */ +#define USER_RPL 0x3 +/* LDT segment has TI set, GDT has it cleared */ +#define SEGMENT_LDT 0x4 +#define SEGMENT_GDT 0x0 + +/* + * Matching rules for certain types of segments. + */ + +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) + + +#else +#include <asm/cache.h> + +#define GDT_ENTRY_KERNEL32_CS 1 +#define GDT_ENTRY_KERNEL_CS 2 +#define GDT_ENTRY_KERNEL_DS 3 + +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) + +/* + * we cannot use the same code segment descriptor for user and kernel + * -- not even in the long flat mode, because of different DPL /kkeil + * The segment offset needs to contain a RPL. Grr. -AK + * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) + */ +#define GDT_ENTRY_DEFAULT_USER32_CS 4 +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define GDT_ENTRY_DEFAULT_USER_CS 6 +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) +#define __USER32_DS __USER_DS + +#define GDT_ENTRY_TSS 8 /* needs two entries */ +#define GDT_ENTRY_LDT 10 /* needs two entries */ +#define GDT_ENTRY_TLS_MIN 12 +#define GDT_ENTRY_TLS_MAX 14 + +#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ +#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) + +/* TLS indexes for 64bit - hardcoded in arch_prctl */ +#define FS_TLS 0 +#define GS_TLS 1 + +#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) +#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) + +#define GDT_ENTRIES 16 + +#endif + +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) +#ifndef CONFIG_PARAVIRT +#define get_kernel_rpl() 0 +#endif + +/* User mode is privilege level 3 */ +#define USER_RPL 0x3 +/* LDT segment has TI set, GDT has it cleared */ +#define SEGMENT_LDT 0x4 +#define SEGMENT_GDT 0x0 + +/* Bottom two bits of selector give the ring privilege level */ +#define SEGMENT_RPL_MASK 0x3 +/* Bit 2 is table indicator (LDT/GDT) */ +#define SEGMENT_TI_MASK 0x4 + +#define IDT_ENTRIES 256 +#define NUM_EXCEPTION_VECTORS 32 +#define GDT_SIZE (GDT_ENTRIES * 8) +#define GDT_ENTRY_TLS_ENTRIES 3 +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; +#endif +#endif + +#endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/sembuf.h b/arch/x86/include/asm/sembuf.h new file mode 100644 index 00000000000..ee50c801f7b --- /dev/null +++ b/arch/x86/include/asm/sembuf.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_SEMBUF_H +#define _ASM_X86_SEMBUF_H + +/* + * The semid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + unsigned long __unused1; + __kernel_time_t sem_ctime; /* last change time */ + unsigned long __unused2; + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _ASM_X86_SEMBUF_H */ diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h new file mode 100644 index 00000000000..628c801535e --- /dev/null +++ b/arch/x86/include/asm/serial.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_SERIAL_H +#define _ASM_X86_SERIAL_H + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +/* Standard COM flags (except for COM4, because of the 8514 problem) */ +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) +#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#endif + +#define SERIAL_PORT_DFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ + { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ + { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ + { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ + +#endif /* _ASM_X86_SERIAL_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h new file mode 100644 index 00000000000..f12d3723746 --- /dev/null +++ b/arch/x86/include/asm/setup.h @@ -0,0 +1,105 @@ +#ifndef _ASM_X86_SETUP_H +#define _ASM_X86_SETUP_H + +#define COMMAND_LINE_SIZE 2048 + +#ifndef __ASSEMBLY__ + +/* Interrupt control for vSMPowered x86_64 systems */ +void vsmp_init(void); + +#ifdef CONFIG_X86_VISWS +extern void visws_early_detect(void); +extern int is_visws_box(void); +#else +static inline void visws_early_detect(void) { } +static inline int is_visws_box(void) { return 0; } +#endif + +/* + * Any setup quirks to be performed? + */ +struct mpc_config_processor; +struct mpc_config_bus; +struct mp_config_oemtable; +struct x86_quirks { + int (*arch_pre_time_init)(void); + int (*arch_time_init)(void); + int (*arch_pre_intr_init)(void); + int (*arch_intr_init)(void); + int (*arch_trap_init)(void); + char * (*arch_memory_setup)(void); + int (*mach_get_smp_config)(unsigned int early); + int (*mach_find_smp_config)(unsigned int reserve); + + int *mpc_record; + int (*mpc_apic_id)(struct mpc_config_processor *m); + void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name); + void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); + void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, + unsigned short oemsize); + int (*setup_ioapic_ids)(void); +}; + +extern struct x86_quirks *x86_quirks; +extern unsigned long saved_video_mode; + +#ifndef CONFIG_PARAVIRT +#define paravirt_post_allocator_init() do {} while (0) +#endif +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ + +#ifdef __i386__ + +#include <linux/pfn.h> +/* + * Reserved space for vmalloc and iomap - defined in asm/page.h + */ +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + +#endif /* __i386__ */ + +#define PARAM_SIZE 4096 /* sizeof(struct boot_params) */ + +#define OLD_CL_MAGIC 0xA33F +#define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ +#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ + +#ifndef __ASSEMBLY__ +#include <asm/bootparam.h> + +#ifndef _SETUP + +/* + * This is set up by the setup-routine at boot-time + */ +extern struct boot_params boot_params; + +/* + * Do NOT EVER look at the BIOS memory size location. + * It does not work on many machines. + */ +#define LOWMEMSIZE() (0x9f000) + +#ifdef __i386__ + +void __init i386_start_kernel(void); +extern void probe_roms(void); + +extern unsigned long init_pg_tables_start; +extern unsigned long init_pg_tables_end; + +#else +void __init x86_64_init_pda(void); +void __init x86_64_start_kernel(char *real_mode); +void __init x86_64_start_reservations(char *real_mode_data); + +#endif /* __i386__ */ +#endif /* _SETUP */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h new file mode 100644 index 00000000000..b51413b7497 --- /dev/null +++ b/arch/x86/include/asm/shmbuf.h @@ -0,0 +1,51 @@ +#ifndef _ASM_X86_SHMBUF_H +#define _ASM_X86_SHMBUF_H + +/* + * The shmid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space on 32 bit is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + * + * Pad space on 64 bit is left for: + * - 2 miscellaneous 64-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ +#ifdef __i386__ + unsigned long __unused1; +#endif + __kernel_time_t shm_dtime; /* last detach time */ +#ifdef __i386__ + unsigned long __unused2; +#endif + __kernel_time_t shm_ctime; /* last change time */ +#ifdef __i386__ + unsigned long __unused3; +#endif + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _ASM_X86_SHMBUF_H */ diff --git a/arch/x86/include/asm/shmparam.h b/arch/x86/include/asm/shmparam.h new file mode 100644 index 00000000000..0880cf0917b --- /dev/null +++ b/arch/x86/include/asm/shmparam.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_SHMPARAM_H +#define _ASM_X86_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_X86_SHMPARAM_H */ diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h new file mode 100644 index 00000000000..0afcb5e58ac --- /dev/null +++ b/arch/x86/include/asm/sigcontext.h @@ -0,0 +1,284 @@ +#ifndef _ASM_X86_SIGCONTEXT_H +#define _ASM_X86_SIGCONTEXT_H + +#include <linux/compiler.h> +#include <asm/types.h> + +#define FP_XSTATE_MAGIC1 0x46505853U +#define FP_XSTATE_MAGIC2 0x46505845U +#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) + +/* + * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame + * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes + * are used to extended the fpstate pointer in the sigcontext, which now + * includes the extended state information along with fpstate information. + * + * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved + * area and FP_XSTATE_MAGIC2 at the end of memory layout + * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the + * extended state information in the memory layout pointed by the fpstate + * pointer in sigcontext. + */ +struct _fpx_sw_bytes { + __u32 magic1; /* FP_XSTATE_MAGIC1 */ + __u32 extended_size; /* total size of the layout referred by + * fpstate pointer in the sigcontext. + */ + __u64 xstate_bv; + /* feature bit mask (including fp/sse/extended + * state) that is present in the memory + * layout. + */ + __u32 xstate_size; /* actual xsave state size, based on the + * features saved in the layout. + * 'extended_size' will be greater than + * 'xstate_size'. + */ + __u32 padding[7]; /* for future use. */ +}; + +#ifdef __i386__ +/* + * As documented in the iBCS2 standard.. + * + * The first part of "struct _fpstate" is just the normal i387 + * hardware setup, the extra "status" word is used to save the + * coprocessor status word before entering the handler. + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * The FPU state data structure has had to grow to accommodate the + * extended FPU state required by the Streaming SIMD Extensions. + * There is no documented standard to accomplish this at the moment. + */ +struct _fpreg { + unsigned short significand[4]; + unsigned short exponent; +}; + +struct _fpxreg { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg { + unsigned long element[4]; +}; + +struct _fpstate { + /* Regular FPU environment */ + unsigned long cw; + unsigned long sw; + unsigned long tag; + unsigned long ipoff; + unsigned long cssel; + unsigned long dataoff; + unsigned long datasel; + struct _fpreg _st[8]; + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */ + unsigned long mxcsr; + unsigned long reserved; + struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg _xmm[8]; + unsigned long padding1[44]; + + union { + unsigned long padding2[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state info */ + }; +}; + +#define X86_FXSR_MAGIC 0x0000 + +#ifdef __KERNEL__ +struct sigcontext { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned long di; + unsigned long si; + unsigned long bp; + unsigned long sp; + unsigned long bx; + unsigned long dx; + unsigned long cx; + unsigned long ax; + unsigned long trapno; + unsigned long err; + unsigned long ip; + unsigned short cs, __csh; + unsigned long flags; + unsigned long sp_at_signal; + unsigned short ss, __ssh; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ + unsigned long oldmask; + unsigned long cr2; +}; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned long edi; + unsigned long esi; + unsigned long ebp; + unsigned long esp; + unsigned long ebx; + unsigned long edx; + unsigned long ecx; + unsigned long eax; + unsigned long trapno; + unsigned long err; + unsigned long eip; + unsigned short cs, __csh; + unsigned long eflags; + unsigned long esp_at_signal; + unsigned short ss, __ssh; + struct _fpstate __user *fpstate; + unsigned long oldmask; + unsigned long cr2; +}; +#endif /* !__KERNEL__ */ + +#else /* __i386__ */ + +/* FXSAVE frame */ +/* Note: reserved1/2 may someday contain valuable data. Always save/restore + them when you change signal frames. */ +struct _fpstate { + __u16 cwd; + __u16 swd; + __u16 twd; /* Note this is not the same as the + 32bit/x87/FSAVE twd */ + __u16 fop; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ + __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ + __u32 reserved2[12]; + union { + __u32 reserved3[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state information */ + }; +}; + +#ifdef __KERNEL__ +struct sigcontext { + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long di; + unsigned long si; + unsigned long bp; + unsigned long bx; + unsigned long dx; + unsigned long ax; + unsigned long cx; + unsigned long sp; + unsigned long ip; + unsigned long flags; + unsigned short cs; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; + unsigned long err; + unsigned long trapno; + unsigned long oldmask; + unsigned long cr2; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ + unsigned long reserved1[8]; +}; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long rdi; + unsigned long rsi; + unsigned long rbp; + unsigned long rbx; + unsigned long rdx; + unsigned long rax; + unsigned long rcx; + unsigned long rsp; + unsigned long rip; + unsigned long eflags; /* RFLAGS */ + unsigned short cs; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; + unsigned long err; + unsigned long trapno; + unsigned long oldmask; + unsigned long cr2; + struct _fpstate __user *fpstate; /* zero when no FPU context */ + unsigned long reserved1[8]; +}; +#endif /* !__KERNEL__ */ + +#endif /* !__i386__ */ + +struct _xsave_hdr { + __u64 xstate_bv; + __u64 reserved1[2]; + __u64 reserved2[5]; +}; + +/* + * Extended state pointed by the fpstate pointer in the sigcontext. + * In addition to the fpstate, information encoded in the xstate_hdr + * indicates the presence of other extended state information + * supported by the processor and OS. + */ +struct _xstate { + struct _fpstate fpstate; + struct _xsave_hdr xstate_hdr; + /* new processor state extensions go here */ +}; + +#endif /* _ASM_X86_SIGCONTEXT_H */ diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h new file mode 100644 index 00000000000..6126188cf3a --- /dev/null +++ b/arch/x86/include/asm/sigcontext32.h @@ -0,0 +1,75 @@ +#ifndef _ASM_X86_SIGCONTEXT32_H +#define _ASM_X86_SIGCONTEXT32_H + +/* signal context for 32bit programs. */ + +#define X86_FXSR_MAGIC 0x0000 + +struct _fpreg { + unsigned short significand[4]; + unsigned short exponent; +}; + +struct _fpxreg { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg { + __u32 element[4]; +}; + +/* FSAVE frame with extensions */ +struct _fpstate_ia32 { + /* Regular FPU environment */ + __u32 cw; + __u32 sw; + __u32 tag; /* not compatible to 64bit twd */ + __u32 ipoff; + __u32 cssel; + __u32 dataoff; + __u32 datasel; + struct _fpreg _st[8]; + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + __u32 _fxsr_env[6]; + __u32 mxcsr; + __u32 reserved; + struct _fpxreg _fxsr_st[8]; + struct _xmmreg _xmm[8]; /* It's actually 16 */ + __u32 padding[44]; + union { + __u32 padding2[12]; + struct _fpx_sw_bytes sw_reserved; + }; +}; + +struct sigcontext_ia32 { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned int di; + unsigned int si; + unsigned int bp; + unsigned int sp; + unsigned int bx; + unsigned int dx; + unsigned int cx; + unsigned int ax; + unsigned int trapno; + unsigned int err; + unsigned int ip; + unsigned short cs, __csh; + unsigned int flags; + unsigned int sp_at_signal; + unsigned short ss, __ssh; + unsigned int fpstate; /* really (struct _fpstate_ia32 *) */ + unsigned int oldmask; + unsigned int cr2; +}; + +#endif /* _ASM_X86_SIGCONTEXT32_H */ diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h new file mode 100644 index 00000000000..fc1aa553564 --- /dev/null +++ b/arch/x86/include/asm/siginfo.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_SIGINFO_H +#define _ASM_X86_SIGINFO_H + +#ifdef __x86_64__ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + +#include <asm-generic/siginfo.h> + +#endif /* _ASM_X86_SIGINFO_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h new file mode 100644 index 00000000000..96ac44f275d --- /dev/null +++ b/arch/x86/include/asm/signal.h @@ -0,0 +1,262 @@ +#ifndef _ASM_X86_SIGNAL_H +#define _ASM_X86_SIGNAL_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#include <linux/time.h> +#include <linux/compiler.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifdef __KERNEL__ +#include <linux/linkage.h> + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 + +#ifdef __i386__ +# define _NSIG_BPW 32 +#else +# define _NSIG_BPW 64 +#endif + +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001u +#define SA_NOCLDWAIT 0x00000002u +#define SA_SIGINFO 0x00000004u +#define SA_ONSTACK 0x08000000u +#define SA_RESTART 0x10000000u +#define SA_NODEFER 0x40000000u +#define SA_RESETHAND 0x80000000u + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal.h> + +#ifndef __ASSEMBLY__ + +#ifdef __i386__ +# ifdef __KERNEL__ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + __sigrestore_t sa_restorer; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +extern void do_notify_resume(struct pt_regs *, void *, __u32); + +# else /* __KERNEL__ */ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +# endif /* ! __KERNEL__ */ +#else /* __i386__ */ + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + __sigrestore_t sa_restorer; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +#endif /* !__i386__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ +#include <asm/sigcontext.h> + +#ifdef __i386__ + +#define __HAVE_ARCH_SIG_BITOPS + +#define sigaddset(set,sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigaddset((set), (sig)) \ + : __gen_sigaddset((set), (sig))) + +static inline void __gen_sigaddset(sigset_t *set, int _sig) +{ + asm("btsl %1,%0" : "+m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static inline void __const_sigaddset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + set->sig[sig / _NSIG_BPW] |= 1 << (sig % _NSIG_BPW); +} + +#define sigdelset(set, sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigdelset((set), (sig)) \ + : __gen_sigdelset((set), (sig))) + + +static inline void __gen_sigdelset(sigset_t *set, int _sig) +{ + asm("btrl %1,%0" : "+m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static inline void __const_sigdelset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + set->sig[sig / _NSIG_BPW] &= ~(1 << (sig % _NSIG_BPW)); +} + +static inline int __const_sigismember(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); +} + +static inline int __gen_sigismember(sigset_t *set, int _sig) +{ + int ret; + asm("btl %2,%1\n\tsbbl %0,%0" + : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + return ret; +} + +#define sigismember(set, sig) \ + (__builtin_constant_p(sig) \ + ? __const_sigismember((set), (sig)) \ + : __gen_sigismember((set), (sig))) + +static inline int sigfindinword(unsigned long word) +{ + asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc"); + return word; +} + +struct pt_regs; + +#else /* __i386__ */ + +#undef __HAVE_ARCH_SIG_BITOPS + +#endif /* !__i386__ */ + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h new file mode 100644 index 00000000000..2766021aef8 --- /dev/null +++ b/arch/x86/include/asm/smp.h @@ -0,0 +1,229 @@ +#ifndef _ASM_X86_SMP_H +#define _ASM_X86_SMP_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> +#include <linux/init.h> +#include <asm/percpu.h> + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifdef CONFIG_X86_LOCAL_APIC +# include <asm/mpspec.h> +# include <asm/apic.h> +# ifdef CONFIG_X86_IO_APIC +# include <asm/io_apic.h> +# endif +#endif +#include <asm/pda.h> +#include <asm/thread_info.h> + +extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; +extern cpumask_t cpu_callin_map; + +extern void (*mtrr_hook)(void); +extern void zap_low_mappings(void); + +extern int __cpuinit get_local_pda(int cpu); + +extern int smp_num_siblings; +extern unsigned int num_processors; +extern cpumask_t cpu_initialized; + +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); +DECLARE_PER_CPU(u16, cpu_llc_id); +#ifdef CONFIG_X86_32 +DECLARE_PER_CPU(int, cpu_number); +#endif + +DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + +/* Static state in head.S used to set up a CPU */ +extern struct { + void *sp; + unsigned short ss; +} stack_start; + +struct smp_ops { + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*smp_send_stop)(void); + void (*smp_send_reschedule)(int cpu); + + int (*cpu_up)(unsigned cpu); + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int cpu); + void (*play_dead)(void); + + void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_single_ipi)(int cpu); +}; + +/* Globals due to paravirt */ +extern void set_cpu_sibling_map(int cpu); + +#ifdef CONFIG_SMP +#ifndef CONFIG_PARAVIRT +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0) +#endif +extern struct smp_ops smp_ops; + +static inline void smp_send_stop(void) +{ + smp_ops.smp_send_stop(); +} + +static inline void smp_prepare_boot_cpu(void) +{ + smp_ops.smp_prepare_boot_cpu(); +} + +static inline void smp_prepare_cpus(unsigned int max_cpus) +{ + smp_ops.smp_prepare_cpus(max_cpus); +} + +static inline void smp_cpus_done(unsigned int max_cpus) +{ + smp_ops.smp_cpus_done(max_cpus); +} + +static inline int __cpu_up(unsigned int cpu) +{ + return smp_ops.cpu_up(cpu); +} + +static inline int __cpu_disable(void) +{ + return smp_ops.cpu_disable(); +} + +static inline void __cpu_die(unsigned int cpu) +{ + smp_ops.cpu_die(cpu); +} + +static inline void play_dead(void) +{ + smp_ops.play_dead(); +} + +static inline void smp_send_reschedule(int cpu) +{ + smp_ops.smp_send_reschedule(cpu); +} + +static inline void arch_send_call_function_single_ipi(int cpu) +{ + smp_ops.send_call_func_single_ipi(cpu); +} + +static inline void arch_send_call_function_ipi(cpumask_t mask) +{ + smp_ops.send_call_func_ipi(mask); +} + +void cpu_disable_common(void); +void native_smp_prepare_boot_cpu(void); +void native_smp_prepare_cpus(unsigned int max_cpus); +void native_smp_cpus_done(unsigned int max_cpus); +int native_cpu_up(unsigned int cpunum); +int native_cpu_disable(void); +void native_cpu_die(unsigned int cpu); +void native_play_dead(void); +void play_dead_common(void); + +void native_send_call_func_ipi(cpumask_t mask); +void native_send_call_func_single_ipi(int cpu); + +extern void prefill_possible_map(void); + +void smp_store_cpu_info(int id); +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) + +/* We don't mark CPUs online until __cpu_up(), so we need another measure */ +static inline int num_booting_cpus(void) +{ + return cpus_weight(cpu_callout_map); +} +#else +static inline void prefill_possible_map(void) +{ +} +#endif /* CONFIG_SMP */ + +extern unsigned disabled_cpus __cpuinitdata; + +#ifdef CONFIG_X86_32_SMP +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +extern int safe_smp_processor_id(void); + +#elif defined(CONFIG_X86_64_SMP) +#define raw_smp_processor_id() read_pda(cpunumber) + +#define stack_smp_processor_id() \ +({ \ + struct thread_info *ti; \ + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ + ti->cpu; \ +}) +#define safe_smp_processor_id() smp_processor_id() + +#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define safe_smp_processor_id() 0 +#define stack_smp_processor_id() 0 +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +#ifndef CONFIG_X86_64 +static inline int logical_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); +} + +#include <mach_apicdef.h> +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = *(u32 *)(APIC_BASE + APIC_ID); + + return GET_APIC_ID(reg); +} +#endif + + +# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) +extern int hard_smp_processor_id(void); +# else +#include <mach_apicdef.h> +static inline int hard_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return read_apic_id(); +} +# endif /* APIC_DEFINITION */ + +#else /* CONFIG_X86_LOCAL_APIC */ + +# ifndef CONFIG_SMP +# define hard_smp_processor_id() 0 +# endif + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h new file mode 100644 index 00000000000..8ab9cc8b2ec --- /dev/null +++ b/arch/x86/include/asm/socket.h @@ -0,0 +1,57 @@ +#ifndef _ASM_X86_SOCKET_H +#define _ASM_X86_SOCKET_H + +#include <asm/sockios.h> + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#endif /* _ASM_X86_SOCKET_H */ diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h new file mode 100644 index 00000000000..49cc72b5d3c --- /dev/null +++ b/arch/x86/include/asm/sockios.h @@ -0,0 +1,13 @@ +#ifndef _ASM_X86_SOCKIOS_H +#define _ASM_X86_SOCKIOS_H + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif /* _ASM_X86_SOCKIOS_H */ diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h new file mode 100644 index 00000000000..be44f7dab39 --- /dev/null +++ b/arch/x86/include/asm/sparsemem.h @@ -0,0 +1,34 @@ +#ifndef _ASM_X86_SPARSEMEM_H +#define _ASM_X86_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the flags + * field of the struct page + * + * SECTION_SIZE_BITS 2^n: size of each section + * MAX_PHYSADDR_BITS 2^n: max size of physical address space + * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space + * + */ + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define SECTION_SIZE_BITS 29 +# define MAX_PHYSADDR_BITS 36 +# define MAX_PHYSMEM_BITS 36 +# else +# define SECTION_SIZE_BITS 26 +# define MAX_PHYSADDR_BITS 32 +# define MAX_PHYSMEM_BITS 32 +# endif +#else /* CONFIG_X86_32 */ +# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ +# define MAX_PHYSADDR_BITS 44 +# define MAX_PHYSMEM_BITS 44 +#endif + +#endif /* CONFIG_SPARSEMEM */ +#endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h new file mode 100644 index 00000000000..d17c91981da --- /dev/null +++ b/arch/x86/include/asm/spinlock.h @@ -0,0 +1,364 @@ +#ifndef _ASM_X86_SPINLOCK_H +#define _ASM_X86_SPINLOCK_H + +#include <asm/atomic.h> +#include <asm/rwlock.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <linux/compiler.h> +#include <asm/paravirt.h> +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + * + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * These are fair FIFO ticket locks, which are currently limited to 256 + * CPUs. + * + * (the type definitions are in asm/spinlock_types.h) + */ + +#ifdef CONFIG_X86_32 +# define LOCK_PTR_REG "a" +# define REG_PTR_MODE "k" +#else +# define LOCK_PTR_REG "D" +# define REG_PTR_MODE "q" +#endif + +#if defined(CONFIG_X86_32) && \ + (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) +/* + * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock + * (PPro errata 66, 92) + */ +# define UNLOCK_LOCK_PREFIX LOCK_PREFIX +#else +# define UNLOCK_LOCK_PREFIX +#endif + +/* + * Ticket locks are conceptually two parts, one indicating the current head of + * the queue, and the other indicating the current tail. The lock is acquired + * by atomically noting the tail and incrementing it by one (thus adding + * ourself to the queue and noting our position), then waiting until the head + * becomes equal to the the initial value of the tail. + * + * We use an xadd covering *both* parts of the lock, to increment the tail and + * also load the position of the head, which takes care of memory ordering + * issues and should be optimal for the uncontended case. Note the tail must be + * in the high part, because a wide xadd increment of the low part would carry + * up and contaminate the high part. + * + * With fewer than 2^8 possible CPUs, we can use x86's partial registers to + * save some instructions and make the code more elegant. There really isn't + * much between them in performance though, especially as locks are out of line. + */ +#if (NR_CPUS < 256) +#define TICKET_SHIFT 8 + +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) +{ + short inc = 0x0100; + + asm volatile ( + LOCK_PREFIX "xaddw %w0, %1\n" + "1:\t" + "cmpb %h0, %b0\n\t" + "je 2f\n\t" + "rep ; nop\n\t" + "movb %1, %b0\n\t" + /* don't need lfence here, because loads are in-order */ + "jmp 1b\n" + "2:" + : "+Q" (inc), "+m" (lock->slock) + : + : "memory", "cc"); +} + +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) +{ + int tmp, new; + + asm volatile("movzwl %2, %0\n\t" + "cmpb %h0,%b0\n\t" + "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" + "jne 1f\n\t" + LOCK_PREFIX "cmpxchgw %w1,%2\n\t" + "1:" + "sete %b1\n\t" + "movzbl %b1,%0\n\t" + : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) + : + : "memory", "cc"); + + return tmp; +} + +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) +{ + asm volatile(UNLOCK_LOCK_PREFIX "incb %0" + : "+m" (lock->slock) + : + : "memory", "cc"); +} +#else +#define TICKET_SHIFT 16 + +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) +{ + int inc = 0x00010000; + int tmp; + + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" + "movzwl %w0, %2\n\t" + "shrl $16, %0\n\t" + "1:\t" + "cmpl %0, %2\n\t" + "je 2f\n\t" + "rep ; nop\n\t" + "movzwl %1, %2\n\t" + /* don't need lfence here, because loads are in-order */ + "jmp 1b\n" + "2:" + : "+r" (inc), "+m" (lock->slock), "=&r" (tmp) + : + : "memory", "cc"); +} + +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) +{ + int tmp; + int new; + + asm volatile("movl %2,%0\n\t" + "movl %0,%1\n\t" + "roll $16, %0\n\t" + "cmpl %0,%1\n\t" + "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" + "jne 1f\n\t" + LOCK_PREFIX "cmpxchgl %1,%2\n\t" + "1:" + "sete %b1\n\t" + "movzbl %b1,%0\n\t" + : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) + : + : "memory", "cc"); + + return tmp; +} + +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) +{ + asm volatile(UNLOCK_LOCK_PREFIX "incw %0" + : "+m" (lock->slock) + : + : "memory", "cc"); +} +#endif + +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) +{ + int tmp = ACCESS_ONCE(lock->slock); + + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); +} + +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) +{ + int tmp = ACCESS_ONCE(lock->slock); + + return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; +} + +#ifdef CONFIG_PARAVIRT +/* + * Define virtualization-friendly old-style lock byte lock, for use in + * pv_lock_ops if desired. + * + * This differs from the pre-2.6.24 spinlock by always using xchgb + * rather than decb to take the lock; this allows it to use a + * zero-initialized lock structure. It also maintains a 1-byte + * contention counter, so that we can implement + * __byte_spin_is_contended. + */ +struct __byte_spinlock { + s8 lock; + s8 spinners; +}; + +static inline int __byte_spin_is_locked(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->lock != 0; +} + +static inline int __byte_spin_is_contended(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->spinners != 0; +} + +static inline void __byte_spin_lock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + s8 val = 1; + + asm("1: xchgb %1, %0\n" + " test %1,%1\n" + " jz 3f\n" + " " LOCK_PREFIX "incb %2\n" + "2: rep;nop\n" + " cmpb $1, %0\n" + " je 2b\n" + " " LOCK_PREFIX "decb %2\n" + " jmp 1b\n" + "3:" + : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); +} + +static inline int __byte_spin_trylock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + u8 old = 1; + + asm("xchgb %1,%0" + : "+m" (bl->lock), "+q" (old) : : "memory"); + + return old == 0; +} + +static inline void __byte_spin_unlock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + smp_wmb(); + bl->lock = 0; +} +#else /* !CONFIG_PARAVIRT */ +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +{ + return __ticket_spin_is_locked(lock); +} + +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +{ + return __ticket_spin_is_contended(lock); +} + +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + __ticket_spin_lock(lock); +} + +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + return __ticket_spin_trylock(lock); +} + +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __ticket_spin_unlock(lock); +} + +static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, + unsigned long flags) +{ + __raw_spin_lock(lock); +} + +#endif /* CONFIG_PARAVIRT */ + +static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +{ + while (__raw_spin_is_locked(lock)) + cpu_relax(); +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + * + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int __raw_read_can_lock(raw_rwlock_t *lock) +{ + return (int)(lock)->lock > 0; +} + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int __raw_write_can_lock(raw_rwlock_t *lock) +{ + return (lock)->lock == RW_LOCK_BIAS; +} + +static inline void __raw_read_lock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" + "jns 1f\n" + "call __read_lock_failed\n\t" + "1:\n" + ::LOCK_PTR_REG (rw) : "memory"); +} + +static inline void __raw_write_lock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" + "jz 1f\n" + "call __write_lock_failed\n\t" + "1:\n" + ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); +} + +static inline int __raw_read_trylock(raw_rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + + atomic_dec(count); + if (atomic_read(count) >= 0) + return 1; + atomic_inc(count); + return 0; +} + +static inline int __raw_write_trylock(raw_rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +static inline void __raw_read_unlock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); +} + +static inline void __raw_write_unlock(raw_rwlock_t *rw) +{ + asm volatile(LOCK_PREFIX "addl %1, %0" + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); +} + +#define _raw_spin_relax(lock) cpu_relax() +#define _raw_read_relax(lock) cpu_relax() +#define _raw_write_relax(lock) cpu_relax() + +#endif /* _ASM_X86_SPINLOCK_H */ diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h new file mode 100644 index 00000000000..845f81c8709 --- /dev/null +++ b/arch/x86/include/asm/spinlock_types.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_SPINLOCK_TYPES_H +#define _ASM_X86_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct raw_spinlock { + unsigned int slock; +} raw_spinlock_t; + +#define __RAW_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + unsigned int lock; +} raw_rwlock_t; + +#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } + +#endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h new file mode 100644 index 00000000000..b508d639d1a --- /dev/null +++ b/arch/x86/include/asm/srat.h @@ -0,0 +1,39 @@ +/* + * Some of the code in this file has been gleaned from the 64 bit + * discontigmem support code base. + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to Pat Gaughen <gone@us.ibm.com> + */ + +#ifndef _ASM_X86_SRAT_H +#define _ASM_X86_SRAT_H + +#ifdef CONFIG_ACPI_NUMA +extern int get_memcfg_from_srat(void); +#else +static inline int get_memcfg_from_srat(void) +{ + return 0; +} +#endif + +#endif /* _ASM_X86_SRAT_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h new file mode 100644 index 00000000000..f517944b2b1 --- /dev/null +++ b/arch/x86/include/asm/stacktrace.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_STACKTRACE_H +#define _ASM_X86_STACKTRACE_H + +extern int kstack_depth_to_print; + +/* Generic stack tracer with callbacks */ + +struct stacktrace_ops { + void (*warning)(void *data, char *msg); + /* msg must contain %s for the symbol */ + void (*warning_symbol)(void *data, char *msg, unsigned long symbol); + void (*address)(void *data, unsigned long address, int reliable); + /* On negative return stop dumping */ + int (*stack)(void *data, char *name); +}; + +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data); + +#endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h new file mode 100644 index 00000000000..e0b1d9bbcbc --- /dev/null +++ b/arch/x86/include/asm/stat.h @@ -0,0 +1,114 @@ +#ifndef _ASM_X86_STAT_H +#define _ASM_X86_STAT_H + +#define STAT_HAVE_NSEC 1 + +#ifdef __i386__ +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused4; + unsigned long __unused5; +}; + +#define STAT64_HAS_BROKEN_ST_INO 1 + +/* This matches struct stat64 in glibc2.1, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + + unsigned long __st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned long st_uid; + unsigned long st_gid; + + unsigned long long st_rdev; + unsigned char __pad3[4]; + + long long st_size; + unsigned long st_blksize; + + /* Number 512-byte blocks allocated. */ + unsigned long long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + + unsigned long st_mtime; + unsigned int st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + + unsigned long long st_ino; +}; + +#else /* __i386__ */ + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + long st_blocks; /* Number 512-byte blocks allocated. */ + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; +#endif + +/* for 32bit emulation and 32 bit kernels */ +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; +#ifdef __i386__ + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +#else + unsigned int st_size; + unsigned int st_atime; + unsigned int st_mtime; + unsigned int st_ctime; +#endif +}; + +#endif /* _ASM_X86_STAT_H */ diff --git a/arch/x86/include/asm/statfs.h b/arch/x86/include/asm/statfs.h new file mode 100644 index 00000000000..2d0adbf99a8 --- /dev/null +++ b/arch/x86/include/asm/statfs.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_STATFS_H +#define _ASM_X86_STATFS_H + +/* + * We need compat_statfs64 to be packed, because the i386 ABI won't + * add padding at the end to bring it to a multiple of 8 bytes, but + * the x86_64 ABI will. + */ +#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4))) + +#include <asm-generic/statfs.h> +#endif /* _ASM_X86_STATFS_H */ diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h new file mode 100644 index 00000000000..6dfd6d9373a --- /dev/null +++ b/arch/x86/include/asm/string.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "string_32.h" +#else +# include "string_64.h" +#endif diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h new file mode 100644 index 00000000000..0e0e3ba827f --- /dev/null +++ b/arch/x86/include/asm/string_32.h @@ -0,0 +1,326 @@ +#ifndef _ASM_X86_STRING_32_H +#define _ASM_X86_STRING_32_H + +#ifdef __KERNEL__ + +/* Let gcc decide whether to inline or use the out of line functions */ + +#define __HAVE_ARCH_STRCPY +extern char *strcpy(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCPY +extern char *strncpy(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCAT +extern char *strcat(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCAT +extern char *strncat(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCMP +extern int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRNCMP +extern int strncmp(const char *cs, const char *ct, size_t count); + +#define __HAVE_ARCH_STRCHR +extern char *strchr(const char *s, int c); + +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *s); + +static __always_inline void *__memcpy(void *to, const void *from, size_t n) +{ + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; +} + +/* + * This looks ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) +{ + long esi, edi; + if (!n) + return to; + + switch (n) { + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; + } + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { + /* large block: use rep prefix */ + int ecx; + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" + ); + } else { + /* small block: don't clobber ecx + smaller code */ + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + } + switch (n % 4) { + /* tail */ + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + } +} + +#define __HAVE_ARCH_MEMCPY + +#ifdef CONFIG_X86_USE_3DNOW + +#include <asm/mmx.h> + +/* + * This CPU favours 3DNow strongly (eg AMD Athlon) + */ + +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) +{ + if (len < 512) + return __constant_memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +static inline void *__memcpy3d(void *to, const void *from, size_t len) +{ + if (len < 512) + return __memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy3d((t), (f), (n)) \ + : __memcpy3d((t), (f), (n))) + +#else + +/* + * No 3D Now! + */ + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy((t), (f), (n)) \ + : __memcpy((t), (f), (n))) + +#endif + +#define __HAVE_ARCH_MEMMOVE +void *memmove(void *dest, const void *src, size_t n); + +#define memcmp __builtin_memcmp + +#define __HAVE_ARCH_MEMCHR +extern void *memchr(const void *cs, int c, size_t count); + +static inline void *__memset_generic(void *s, char c, size_t count) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; +} + +/* we might want to write optimized versions of these later */ +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) + +/* + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill + * things 32 bits at a time even when we don't know the size of the + * area at compile-time.. + */ +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) +{ + int d0, d1; + asm volatile("rep ; stosl\n\t" + "testb $2,%b3\n\t" + "je 1f\n\t" + "stosw\n" + "1:\ttestb $1,%b3\n\t" + "je 2f\n\t" + "stosb\n" + "2:" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) + : "memory"); + return s; +} + +/* Added by Gertjan van Wingerde to make minix and sysv module work */ +#define __HAVE_ARCH_STRNLEN +extern size_t strnlen(const char *s, size_t count); +/* end of additional stuff */ + +#define __HAVE_ARCH_STRSTR +extern char *strstr(const char *cs, const char *ct); + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as we by now know that both pattern and count is constant.. + */ +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, + size_t count) +{ + switch (count) { + case 0: + return s; + case 1: + *(unsigned char *)s = pattern & 0xff; + return s; + case 2: + *(unsigned short *)s = pattern & 0xffff; + return s; + case 3: + *(unsigned short *)s = pattern & 0xffff; + *((unsigned char *)s + 2) = pattern & 0xff; + return s; + case 4: + *(unsigned long *)s = pattern; + return s; + } + +#define COMMON(x) \ + asm volatile("rep ; stosl" \ + x \ + : "=&c" (d0), "=&D" (d1) \ + : "a" (eax), "0" (count/4), "1" ((long)s) \ + : "memory") + + { + int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 + /* Workaround for broken gcc 4.0 */ + register unsigned long eax asm("%eax") = pattern; +#else + unsigned long eax = pattern; +#endif + + switch (count % 4) { + case 0: + COMMON(""); + return s; + case 1: + COMMON("\n\tstosb"); + return s; + case 2: + COMMON("\n\tstosw"); + return s; + default: + COMMON("\n\tstosw\n\tstosb"); + return s; + } + } + +#undef COMMON +} + +#define __constant_c_x_memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_c_and_count_memset((s), (c), (count)) \ + : __constant_c_memset((s), (c), (count))) + +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) + +#define __HAVE_ARCH_MEMSET +#define memset(s, c, count) \ + (__builtin_constant_p(c) \ + ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ + (count)) \ + : __memset((s), (c), (count))) + +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +#define __HAVE_ARCH_MEMSCAN +extern void *memscan(void *addr, int c, size_t size); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_STRING_32_H */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h new file mode 100644 index 00000000000..2afe164bf1e --- /dev/null +++ b/arch/x86/include/asm/string_64.h @@ -0,0 +1,60 @@ +#ifndef _ASM_X86_STRING_64_H +#define _ASM_X86_STRING_64_H + +#ifdef __KERNEL__ + +/* Written 2002 by Andi Kleen */ + +/* Only used for special circumstances. Stolen from i386/string.h */ +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t n) +{ + unsigned long d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "q" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; +} + +/* Even with __builtin_ the compiler may decide to use the out of line + function. */ + +#define __HAVE_ARCH_MEMCPY 1 +#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 +extern void *memcpy(void *to, const void *from, size_t len); +#else +extern void *__memcpy(void *to, const void *from, size_t len); +#define memcpy(dst, src, len) \ +({ \ + size_t __len = (len); \ + void *__ret; \ + if (__builtin_constant_p(len) && __len >= 64) \ + __ret = __memcpy((dst), (src), __len); \ + else \ + __ret = __builtin_memcpy((dst), (src), __len); \ + __ret; \ +}) +#endif + +#define __HAVE_ARCH_MEMSET +void *memset(void *s, int c, size_t n); + +#define __HAVE_ARCH_MEMMOVE +void *memmove(void *dest, const void *src, size_t count); + +int memcmp(const void *cs, const void *ct, size_t count); +size_t strlen(const char *s); +char *strcpy(char *dest, const char *src); +char *strcat(char *dest, const char *src); +int strcmp(const char *cs, const char *ct); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h new file mode 100644 index 00000000000..9b3070f1c2a --- /dev/null +++ b/arch/x86/include/asm/summit/apic.h @@ -0,0 +1,184 @@ +#ifndef __ASM_SUMMIT_APIC_H +#define __ASM_SUMMIT_APIC_H + +#include <asm/smp.h> + +#define esr_disable (1) +#define NO_BALANCE_IRQ (0) + +/* In clustered mode, the high nibble of APIC ID is a cluster number. + * The low nibble is a 4-bit bitmap. */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) + +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline cpumask_t target_cpus(void) +{ + /* CPU_MASK_ALL (0xff) has undefined behaviour with + * dest_LowestPrio mode logical clustered apic interrupt routing + * Just start on cpu 0. IRQ balancing will spread load + */ + return cpumask_of_cpu(0); +} + +#define INT_DELIVERY_MODE (dest_LowestPrio) +#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +static inline unsigned long check_apicid_present(int bit) +{ + return 1; +} + +#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) + +extern u8 cpu_2_logical_apicid[]; + +static inline void init_apic_ldr(void) +{ + unsigned long val, id; + int count = 0; + u8 my_id = (u8)hard_smp_processor_id(); + u8 my_cluster = (u8)apicid_cluster(my_id); +#ifdef CONFIG_SMP + u8 lid; + int i; + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = NR_CPUS; --i >= 0; ) { + lid = cpu_2_logical_apicid[i]; + if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + ++count; + } +#endif + /* We only have a 4 wide bitmap in cluster mode. If a deranged + * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ + BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); + id = my_cluster | (1UL << count); + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write(APIC_LDR, val); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} + +static inline int apic_id_registered(void) +{ + return 1; +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); +} + +static inline int apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0x0F); +} + +static inline physid_mask_t apicid_to_cpu_present(int apicid) +{ + return physid_mask_of_physid(0); +} + +static inline void setup_portio_remap(void) +{ +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +static inline void enable_apic_mode(void) +{ +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = cpus_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) + return (int) 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +/* cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h new file mode 100644 index 00000000000..f3fbca1f61c --- /dev/null +++ b/arch/x86/include/asm/summit/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_SUMMIT_APICDEF_H +#define __ASM_SUMMIT_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (x>>24)&0xFF; +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h new file mode 100644 index 00000000000..53bd1e7bd7b --- /dev/null +++ b/arch/x86/include/asm/summit/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_SUMMIT_IPI_H +#define __ASM_SUMMIT_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h new file mode 100644 index 00000000000..013ce6fab2d --- /dev/null +++ b/arch/x86/include/asm/summit/mpparse.h @@ -0,0 +1,109 @@ +#ifndef __ASM_SUMMIT_MPPARSE_H +#define __ASM_SUMMIT_MPPARSE_H + +#include <asm/tsc.h> + +extern int use_cyclone; + +#ifdef CONFIG_X86_SUMMIT_NUMA +extern void setup_summit(void); +#else +#define setup_summit() {} +#endif + +static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERVIGIL", 8) + || !strncmp(oem_table_id, "EXA", 3))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +struct rio_table_hdr { + unsigned char version; /* Version number of this data structure */ + /* Version 3 adds chassis_num & WP_index */ + unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ + unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ +} __attribute__((packed)); + +struct scal_detail { + unsigned char node_id; /* Scalability Node ID */ + unsigned long CBAR; /* Address of 1MB register space */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF = None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port2node; /* Node ID port connected to: 0xFF = None */ + unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + unsigned char node_id; /* RIO Node ID */ + unsigned long BBAR; /* Address of 1MB register space */ + unsigned char type; /* Type of device */ + unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ + /* For CYC: Node ID of Twister that owns this CYC */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF=None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ + /* For CYC: 0 */ + unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie:*/ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + /* For CYC: Bits0:7 Reserved */ + unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + /* For CYC: No meaning */ + unsigned char chassis_num; /* 1 based Chassis number */ + /* For LookOut WPEGs this field indicates the */ + /* Expansion Chassis #, enumerated from Boot */ + /* Node WPEG external port, then Boot Node CYC */ + /* external port, then Next Vigil chassis WPEG */ + /* external port, etc. */ + /* Shared Lookouts have only 1 chassis number (the */ + /* first one assigned) */ +} __attribute__((packed)); + + +typedef enum { + CompatTwister = 0, /* Compatibility Twister */ + AltTwister = 1, /* Alternate Twister of internal 8-way */ + CompatCyclone = 2, /* Compatibility Cyclone */ + AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ + CompatWPEG = 4, /* Compatibility WPEG */ + AltWPEG = 5, /* Second Planar WPEG */ + LookOutAWPEG = 6, /* LookOut WPEG */ + LookOutBWPEG = 7, /* LookOut WPEG */ +} node_type; + +static inline int is_WPEG(struct rio_detail *rio){ + return (rio->type == CompatWPEG || rio->type == AltWPEG || + rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); +} + +#endif /* __ASM_SUMMIT_MPPARSE_H */ diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h new file mode 100644 index 00000000000..9bd521fe457 --- /dev/null +++ b/arch/x86/include/asm/suspend.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "suspend_32.h" +#else +# include "suspend_64.h" +#endif diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h new file mode 100644 index 00000000000..a5074bd0f8b --- /dev/null +++ b/arch/x86/include/asm/suspend_32.h @@ -0,0 +1,51 @@ +/* + * Copyright 2001-2002 Pavel Machek <pavel@suse.cz> + * Based on code + * Copyright 2001 Patrick Mochel <mochel@osdl.org> + */ +#ifndef _ASM_X86_SUSPEND_32_H +#define _ASM_X86_SUSPEND_32_H + +#include <asm/desc.h> +#include <asm/i387.h> + +static inline int arch_prepare_suspend(void) { return 0; } + +/* image of the saved processor state */ +struct saved_context { + u16 es, fs, gs, ss; + unsigned long cr0, cr2, cr3, cr4; + struct desc_ptr gdt; + struct desc_ptr idt; + u16 ldt; + u16 tss; + unsigned long tr; + unsigned long safety; + unsigned long return_address; +} __attribute__((packed)); + +#ifdef CONFIG_ACPI +extern unsigned long saved_eip; +extern unsigned long saved_esp; +extern unsigned long saved_ebp; +extern unsigned long saved_ebx; +extern unsigned long saved_esi; +extern unsigned long saved_edi; + +static inline void acpi_save_register_state(unsigned long return_point) +{ + saved_eip = return_point; + asm volatile("movl %%esp,%0" : "=m" (saved_esp)); + asm volatile("movl %%ebp,%0" : "=m" (saved_ebp)); + asm volatile("movl %%ebx,%0" : "=m" (saved_ebx)); + asm volatile("movl %%edi,%0" : "=m" (saved_edi)); + asm volatile("movl %%esi,%0" : "=m" (saved_esi)); +} + +#define acpi_restore_register_state() do {} while (0) + +/* routines for saving/restoring kernel state */ +extern int acpi_save_state_mem(void); +#endif + +#endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h new file mode 100644 index 00000000000..06284f42b75 --- /dev/null +++ b/arch/x86/include/asm/suspend_64.h @@ -0,0 +1,52 @@ +/* + * Copyright 2001-2003 Pavel Machek <pavel@suse.cz> + * Based on code + * Copyright 2001 Patrick Mochel <mochel@osdl.org> + */ +#ifndef _ASM_X86_SUSPEND_64_H +#define _ASM_X86_SUSPEND_64_H + +#include <asm/desc.h> +#include <asm/i387.h> + +static inline int arch_prepare_suspend(void) +{ + return 0; +} + +/* + * Image of the saved processor state, used by the low level ACPI suspend to + * RAM code and by the low level hibernation code. + * + * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that + * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, + * still work as required. + */ +struct saved_context { + struct pt_regs regs; + u16 ds, es, fs, gs, ss; + unsigned long gs_base, gs_kernel_base, fs_base; + unsigned long cr0, cr2, cr3, cr4, cr8; + unsigned long efer; + u16 gdt_pad; + u16 gdt_limit; + unsigned long gdt_base; + u16 idt_pad; + u16 idt_limit; + unsigned long idt_base; + u16 ldt; + u16 tss; + unsigned long tr; + unsigned long safety; + unsigned long return_address; +} __attribute__((packed)); + +#define loaddebug(thread,register) \ + set_debugreg((thread)->debugreg##register, register) + +/* routines for saving/restoring kernel state */ +extern int acpi_save_state_mem(void); +extern char core_restore_code; +extern char restore_registers; + +#endif /* _ASM_X86_SUSPEND_64_H */ diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h new file mode 100644 index 00000000000..51fb2c76ad7 --- /dev/null +++ b/arch/x86/include/asm/swiotlb.h @@ -0,0 +1,58 @@ +#ifndef _ASM_X86_SWIOTLB_H +#define _ASM_X86_SWIOTLB_H + +#include <asm/dma-mapping.h> + +/* SWIOTLB interface */ + +extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, + size_t size, int dir); +extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags); +extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_device(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + unsigned long offset, + size_t size, int dir); +extern void swiotlb_sync_single_range_for_device(struct device *hwdev, + dma_addr_t dev_addr, + unsigned long offset, + size_t size, int dir); +extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern void swiotlb_sync_sg_for_device(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); +extern void swiotlb_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +extern void swiotlb_init(void); + +extern int swiotlb_force; + +#ifdef CONFIG_SWIOTLB +extern int swiotlb; +extern void pci_swiotlb_init(void); +#else +#define swiotlb 0 +static inline void pci_swiotlb_init(void) +{ +} +#endif + +static inline void dma_mark_clean(void *addr, size_t size) {} + +#endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h new file mode 100644 index 00000000000..9d09b4073b6 --- /dev/null +++ b/arch/x86/include/asm/sync_bitops.h @@ -0,0 +1,130 @@ +#ifndef _ASM_X86_SYNC_BITOPS_H +#define _ASM_X86_SYNC_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#define ADDR (*(volatile long *)addr) + +/** + * sync_set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_set_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("lock; btsl %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * sync_clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void sync_clear_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("lock; btrl %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * sync_change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_change_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("lock; btcl %1,%0" + : "+m" (ADDR) + : "Ir" (nr) + : "memory"); +} + +/** + * sync_test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "+m" (ADDR) + : "Ir" (nr) : "memory"); + return oldbit; +} + +/** + * sync_test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "+m" (ADDR) + : "Ir" (nr) : "memory"); + return oldbit; +} + +/** + * sync_test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "+m" (ADDR) + : "Ir" (nr) : "memory"); + return oldbit; +} + +#define sync_test_bit(nr, addr) test_bit(nr, addr) + +#undef ADDR + +#endif /* _ASM_X86_SYNC_BITOPS_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h new file mode 100644 index 00000000000..d82f39bb790 --- /dev/null +++ b/arch/x86/include/asm/syscall.h @@ -0,0 +1,213 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_X86_SYSCALL_H +#define _ASM_X86_SYSCALL_H + +#include <linux/sched.h> +#include <linux/err.h> + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + /* + * We always sign-extend a -1 value being set here, + * so this is always either -1L or a syscall number. + */ + return regs->orig_ax; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->ax = regs->orig_ax; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->ax; +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ + if (task_thread_info(task)->status & TS_COMPAT) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long) (int) error; +#endif + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->ax; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->ax = (long) error ?: val; +} + +#ifdef CONFIG_X86_32 + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(args, ®s->bx + i, n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(®s->bx + i, args, n * sizeof(args[0])); +} + +#else /* CONFIG_X86_64 */ + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; + *args++ = regs->bx; + case 1: + if (!n--) break; + *args++ = regs->cx; + case 2: + if (!n--) break; + *args++ = regs->dx; + case 3: + if (!n--) break; + *args++ = regs->si; + case 4: + if (!n--) break; + *args++ = regs->di; + case 5: + if (!n--) break; + *args++ = regs->bp; + case 6: + if (!n--) break; + default: + BUG(); + break; + } + else +# endif + switch (i) { + case 0: + if (!n--) break; + *args++ = regs->di; + case 1: + if (!n--) break; + *args++ = regs->si; + case 2: + if (!n--) break; + *args++ = regs->dx; + case 3: + if (!n--) break; + *args++ = regs->r10; + case 4: + if (!n--) break; + *args++ = regs->r8; + case 5: + if (!n--) break; + *args++ = regs->r9; + case 6: + if (!n--) break; + default: + BUG(); + break; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; + regs->bx = *args++; + case 1: + if (!n--) break; + regs->cx = *args++; + case 2: + if (!n--) break; + regs->dx = *args++; + case 3: + if (!n--) break; + regs->si = *args++; + case 4: + if (!n--) break; + regs->di = *args++; + case 5: + if (!n--) break; + regs->bp = *args++; + case 6: + if (!n--) break; + default: + BUG(); + break; + } + else +# endif + switch (i) { + case 0: + if (!n--) break; + regs->di = *args++; + case 1: + if (!n--) break; + regs->si = *args++; + case 2: + if (!n--) break; + regs->dx = *args++; + case 3: + if (!n--) break; + regs->r10 = *args++; + case 4: + if (!n--) break; + regs->r8 = *args++; + case 5: + if (!n--) break; + regs->r9 = *args++; + case 6: + if (!n--) break; + default: + BUG(); + break; + } +} + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h new file mode 100644 index 00000000000..87803da4401 --- /dev/null +++ b/arch/x86/include/asm/syscalls.h @@ -0,0 +1,93 @@ +/* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYSCALLS_H +#define _ASM_X86_SYSCALLS_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> + +/* Common in X86_32 and X86_64 */ +/* kernel/ioport.c */ +asmlinkage long sys_ioperm(unsigned long, unsigned long, int); + +/* X86_32 only */ +#ifdef CONFIG_X86_32 +/* kernel/process_32.c */ +asmlinkage int sys_fork(struct pt_regs); +asmlinkage int sys_clone(struct pt_regs); +asmlinkage int sys_vfork(struct pt_regs); +asmlinkage int sys_execve(struct pt_regs); + +/* kernel/signal_32.c */ +asmlinkage int sys_sigsuspend(int, int, old_sigset_t); +asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); +asmlinkage int sys_sigaltstack(unsigned long); +asmlinkage unsigned long sys_sigreturn(unsigned long); +asmlinkage int sys_rt_sigreturn(unsigned long); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned long); + +/* kernel/ldt.c */ +asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); + +/* kernel/sys_i386_32.c */ +asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +struct mmap_arg_struct; +asmlinkage int old_mmap(struct mmap_arg_struct __user *); +struct sel_arg_struct; +asmlinkage int old_select(struct sel_arg_struct __user *); +asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); +struct old_utsname; +asmlinkage int sys_uname(struct old_utsname __user *); +struct oldold_utsname; +asmlinkage int sys_olduname(struct oldold_utsname __user *); + +/* kernel/tls.c */ +asmlinkage int sys_set_thread_area(struct user_desc __user *); +asmlinkage int sys_get_thread_area(struct user_desc __user *); + +/* kernel/vm86_32.c */ +asmlinkage int sys_vm86old(struct pt_regs); +asmlinkage int sys_vm86(struct pt_regs); + +#else /* CONFIG_X86_32 */ + +/* X86_64 only */ +/* kernel/process_64.c */ +asmlinkage long sys_fork(struct pt_regs *); +asmlinkage long sys_clone(unsigned long, unsigned long, + void __user *, void __user *, + struct pt_regs *); +asmlinkage long sys_vfork(struct pt_regs *); +asmlinkage long sys_execve(char __user *, char __user * __user *, + char __user * __user *, + struct pt_regs *); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + +/* kernel/signal_64.c */ +asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); +asmlinkage long sys_rt_sigreturn(struct pt_regs *); + +/* kernel/sys_x86_64.c */ +asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +struct new_utsname; +asmlinkage long sys_uname(struct new_utsname __user *); + +#endif /* CONFIG_X86_32 */ +#endif /* _ASM_X86_SYSCALLS_H */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h new file mode 100644 index 00000000000..2ed3f0f44ff --- /dev/null +++ b/arch/x86/include/asm/system.h @@ -0,0 +1,425 @@ +#ifndef _ASM_X86_SYSTEM_H +#define _ASM_X86_SYSTEM_H + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +#ifdef CONFIG_X86_32 + +struct task_struct; /* one of the stranger aspects of C forward declarations */ +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); + +/* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ +#define switch_to(prev, next, last) \ +do { \ + /* \ + * Context-switching clobbers all registers, so we clobber \ + * them explicitly, via unused output variables. \ + * (EAX and EBP is not listed because EBP is saved/restored \ + * explicitly for wchan access and EAX is the return value of \ + * __switch_to()) \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ + asm volatile("pushfl\n\t" /* save flags */ \ + "pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ + "popfl\n" /* restore flags */ \ + \ + /* output parameters */ \ + : [prev_sp] "=m" (prev->thread.sp), \ + [prev_ip] "=m" (prev->thread.ip), \ + "=a" (last), \ + \ + /* clobbered output registers: */ \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ + : /* reloaded segment registers */ \ + "memory"); \ +} while (0) + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +#else +#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" +#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" + +/* frame pointer must be last for get_wchan */ +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + +#define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + +/* Save restore flags to clear handle leaking NT */ +#define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + : "memory", "cc" __EXTRA_CLOBBER) +#endif + +#ifdef __KERNEL__ +#define _set_base(addr, base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while (0) + +#define _set_limit(addr, limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while (0) + +#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) +#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) + +extern void native_load_gs_index(unsigned); + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg, value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %k0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "movl %k1, %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b,3b) \ + : :"r" (value), "r" (0) : "memory") + + +/* + * Save a segment register away + */ +#define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); + return __limit + 1; +} + +static inline void native_clts(void) +{ + asm volatile("clts"); +} + +/* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ +static unsigned long __force_order; + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ +#ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); +#else + val = native_read_cr4(); +#endif + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long native_read_cr8(void) +{ + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; +} + +static inline void native_write_cr8(unsigned long val) +{ + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); +} +#endif + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define read_cr0() (native_read_cr0()) +#define write_cr0(x) (native_write_cr0(x)) +#define read_cr2() (native_read_cr2()) +#define write_cr2(x) (native_write_cr2(x)) +#define read_cr3() (native_read_cr3()) +#define write_cr3(x) (native_write_cr3(x)) +#define read_cr4() (native_read_cr4()) +#define read_cr4_safe() (native_read_cr4_safe()) +#define write_cr4(x) (native_write_cr4(x)) +#define wbinvd() (native_wbinvd()) +#ifdef CONFIG_X86_64 +#define read_cr8() (native_read_cr8()) +#define write_cr8(x) (native_write_cr8(x)) +#define load_gs_index native_load_gs_index +#endif + +/* Clear the 'TS' bit */ +#define clts() (native_clts()) + +#endif/* CONFIG_PARAVIRT */ + +#define stts() write_cr0(read_cr0() | X86_CR0_TS) + +#endif /* __KERNEL__ */ + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); +} + +#define nop() asm volatile ("nop") + +void disable_hlt(void); +void enable_hlt(void); + +void cpu_idle_wait(void); + +extern unsigned long arch_align_stack(unsigned long sp); +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + +void default_idle(void); + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif /* _ASM_X86_SYSTEM_H */ diff --git a/arch/x86/include/asm/system_64.h b/arch/x86/include/asm/system_64.h new file mode 100644 index 00000000000..1159e091ad0 --- /dev/null +++ b/arch/x86/include/asm/system_64.h @@ -0,0 +1,22 @@ +#ifndef _ASM_X86_SYSTEM_64_H +#define _ASM_X86_SYSTEM_64_H + +#include <asm/segment.h> +#include <asm/cmpxchg.h> + + +static inline unsigned long read_cr8(void) +{ + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; +} + +static inline void write_cr8(unsigned long val) +{ + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); +} + +#include <linux/irqflags.h> + +#endif /* _ASM_X86_SYSTEM_64_H */ diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h new file mode 100644 index 00000000000..7a6677c1a71 --- /dev/null +++ b/arch/x86/include/asm/tce.h @@ -0,0 +1,48 @@ +/* + * This file is derived from asm-powerpc/tce.h. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Muli Ben-Yehuda <muli@il.ibm.com> + * Author: Jon Mason <jdmason@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_TCE_H +#define _ASM_X86_TCE_H + +extern unsigned int specified_table_size; +struct iommu_table; + +#define TCE_ENTRY_SIZE 8 /* in bytes */ + +#define TCE_READ_SHIFT 0 +#define TCE_WRITE_SHIFT 1 +#define TCE_HUBID_SHIFT 2 /* unused */ +#define TCE_RSVD_SHIFT 8 /* unused */ +#define TCE_RPN_SHIFT 12 +#define TCE_UNUSED_SHIFT 48 /* unused */ + +#define TCE_RPN_MASK 0x0000fffffffff000ULL + +extern void tce_build(struct iommu_table *tbl, unsigned long index, + unsigned int npages, unsigned long uaddr, int direction); +extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages); +extern void * __init alloc_tce_table(void); +extern void __init free_tce_table(void *tbl); +extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); + +#endif /* _ASM_X86_TCE_H */ diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h new file mode 100644 index 00000000000..af1b70ea440 --- /dev/null +++ b/arch/x86/include/asm/termbits.h @@ -0,0 +1,198 @@ +#ifndef _ASM_X86_TERMBITS_H +#define _ASM_X86_TERMBITS_H + +#include <linux/posix_types.h> + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define BOTHER 0010000 /* non standard rate */ +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* _ASM_X86_TERMBITS_H */ diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h new file mode 100644 index 00000000000..f72956331c4 --- /dev/null +++ b/arch/x86/include/asm/termios.h @@ -0,0 +1,113 @@ +#ifndef _ASM_X86_TERMIOS_H +#define _ASM_X86_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +#ifdef __KERNEL__ + +#include <asm/uaccess.h> + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ + unsigned short __tmp; \ + get_user(__tmp,&(termio)->x); \ + *(unsigned short *) &(termios)->x = __tmp; \ +} + +static inline int user_termio_to_kernel_termios(struct ktermios *termios, + struct termio __user *termio) +{ + SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); + SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); + SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); + SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); + return copy_from_user(termios->c_cc, termio->c_cc, NCC); +} + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +static inline int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + put_user((termios)->c_iflag, &(termio)->c_iflag); + put_user((termios)->c_oflag, &(termio)->c_oflag); + put_user((termios)->c_cflag, &(termio)->c_cflag); + put_user((termios)->c_lflag, &(termio)->c_lflag); + put_user((termios)->c_line, &(termio)->c_line); + return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); +} + +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios2 __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios2)); +} + +static inline int kernel_termios_to_user_termios(struct termios2 __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios2)); +} + +static inline int user_termios_to_kernel_termios_1(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios_1(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_TERMIOS_H */ diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h new file mode 100644 index 00000000000..c62349ee786 --- /dev/null +++ b/arch/x86/include/asm/therm_throt.h @@ -0,0 +1,9 @@ +#ifndef _ASM_X86_THERM_THROT_H +#define _ASM_X86_THERM_THROT_H + +#include <asm/atomic.h> + +extern atomic_t therm_throt_en; +int therm_throt_process(int curr); + +#endif /* _ASM_X86_THERM_THROT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h new file mode 100644 index 00000000000..e44d379faad --- /dev/null +++ b/arch/x86/include/asm/thread_info.h @@ -0,0 +1,264 @@ +/* thread_info.h: low-level thread information + * + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + +#ifndef _ASM_X86_THREAD_INFO_H +#define _ASM_X86_THREAD_INFO_H + +#include <linux/compiler.h> +#include <asm/page.h> +#include <asm/types.h> + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + */ +#ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include <asm/processor.h> + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; + struct restart_block restart_block; + void __user *sysenter_return; +#ifdef CONFIG_X86_32 + unsigned long previous_esp; /* ESP of the previous stack in + case of nested (IRQ) stacks + */ + __u8 supervisor_stack[0]; +#endif +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +#else /* !__ASSEMBLY__ */ + +#include <asm/asm-offsets.h> + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ +#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_NOTSC 16 /* TSC is not accessible in userland */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + +/* work to do in syscall_trace_enter() */ +#define _TIF_WORK_SYSCALL_ENTRY \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \ + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) + +/* work to do in syscall_trace_leave() */ +#define _TIF_WORK_SYSCALL_EXIT \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ + _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) + +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +/* Only used for 64 bit */ +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ + _TIF_NOTSC) + +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + +#define PREEMPT_ACTIVE 0x10000000 + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) +#else +#define THREAD_FLAGS GFP_KERNEL +#endif + +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) + (current_stack_pointer & ~(THREAD_SIZE - 1)); +} + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movl $-THREAD_SIZE, reg; \ + andl %esp, reg + +/* use this one if reg already contains %esp */ +#define GET_THREAD_INFO_WITH_ESP(reg) \ + andl $-THREAD_SIZE, reg + +#endif + +#else /* X86_32 */ + +#include <asm/pda.h> + +/* + * macros/functions for gaining access to the thread information structure + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); + return ti; +} + +/* do not use in interrupt context */ +static inline struct thread_info *stack_thread_info(void) +{ + struct thread_info *ti; + asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + return ti; +} + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movq %gs:pda_kernelstack,reg ; \ + subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + +#endif + +#endif /* !X86_32 */ + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#define TS_POLLING 0x0004 /* true if in idle loop + and not sleeping */ +#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ +#define TS_XSAVE 0x0010 /* Use xsave/xrstor */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); +} +#endif /* !__ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ +extern void arch_task_cache_init(void); +extern void free_thread_info(struct thread_info *ti); +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +#define arch_task_cache_init arch_task_cache_init +#endif +#endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h new file mode 100644 index 00000000000..50c733aac42 --- /dev/null +++ b/arch/x86/include/asm/time.h @@ -0,0 +1,63 @@ +#ifndef _ASM_X86_TIME_H +#define _ASM_X86_TIME_H + +extern void hpet_time_init(void); + +#include <asm/mc146818rtc.h> +#ifdef CONFIG_X86_32 +#include <linux/efi.h> + +static inline unsigned long native_get_wallclock(void) +{ + unsigned long retval; + + if (efi_enabled) + retval = efi_get_time(); + else + retval = mach_get_cmos_time(); + + return retval; +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + int retval; + + if (efi_enabled) + retval = efi_set_rtc_mmss(nowtime); + else + retval = mach_set_rtc_mmss(nowtime); + + return retval; +} + +#else +extern void native_time_init_hook(void); + +static inline unsigned long native_get_wallclock(void) +{ + return mach_get_cmos_time(); +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + return mach_set_rtc_mmss(nowtime); +} + +#endif + +extern void time_init(void); + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ + +#define get_wallclock() native_get_wallclock() +#define set_wallclock(x) native_set_wallclock(x) +#define choose_time_init() hpet_time_init + +#endif /* CONFIG_PARAVIRT */ + +extern unsigned long __init calibrate_cpu(void); + +#endif /* _ASM_X86_TIME_H */ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h new file mode 100644 index 00000000000..2bb6a835c45 --- /dev/null +++ b/arch/x86/include/asm/timer.h @@ -0,0 +1,66 @@ +#ifndef _ASM_X86_TIMER_H +#define _ASM_X86_TIMER_H +#include <linux/init.h> +#include <linux/pm.h> +#include <linux/percpu.h> + +#define TICK_SIZE (tick_nsec / 1000) + +unsigned long long native_sched_clock(void); +unsigned long native_calibrate_tsc(void); + +#ifdef CONFIG_X86_32 +extern int timer_ack; +extern int recalibrate_cpu_khz(void); +#endif /* CONFIG_X86_32 */ + +extern int no_timer_check; + +#ifndef CONFIG_PARAVIRT +#define calibrate_tsc() native_calibrate_tsc() +#endif + +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ + +DECLARE_PER_CPU(unsigned long, cyc2ns); + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) +{ + return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; +} + +#endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h new file mode 100644 index 00000000000..1287dc1347d --- /dev/null +++ b/arch/x86/include/asm/timex.h @@ -0,0 +1,19 @@ +/* x86 architecture timex specifications */ +#ifndef _ASM_X86_TIMEX_H +#define _ASM_X86_TIMEX_H + +#include <asm/processor.h> +#include <asm/tsc.h> + +#ifdef CONFIG_X86_ELAN +# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ +#elif defined(CONFIG_X86_RDC321X) +# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ +#else +# define PIT_TICK_RATE 1193182 /* Underlying HZ */ +#endif +#define CLOCK_TICK_RATE PIT_TICK_RATE + +#define ARCH_HAS_READ_CURRENT_TIMER + +#endif /* _ASM_X86_TIMEX_H */ diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h new file mode 100644 index 00000000000..829215fef9e --- /dev/null +++ b/arch/x86/include/asm/tlb.h @@ -0,0 +1,11 @@ +#ifndef _ASM_X86_TLB_H +#define _ASM_X86_TLB_H + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include <asm-generic/tlb.h> + +#endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h new file mode 100644 index 00000000000..0e7bbb54911 --- /dev/null +++ b/arch/x86/include/asm/tlbflush.h @@ -0,0 +1,178 @@ +#ifndef _ASM_X86_TLBFLUSH_H +#define _ASM_X86_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> + +#include <asm/processor.h> +#include <asm/system.h> + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +static inline void __native_flush_tlb(void) +{ + write_cr3(read_cr3()); +} + +static inline void __native_flush_tlb_global(void) +{ + unsigned long flags; + unsigned long cr4; + + /* + * Read-modify-write to CR4 - protect it from preemption and + * from interrupts. (Use the raw variant because this code can + * be called from deep inside debugging code.) + */ + raw_local_irq_save(flags); + + cr4 = read_cr4(); + /* clear PGE */ + write_cr4(cr4 & ~X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + write_cr4(cr4); + + raw_local_irq_restore(flags); +} + +static inline void __native_flush_tlb_single(unsigned long addr) +{ + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); +} + +static inline void __flush_tlb_all(void) +{ + if (cpu_has_pge) + __flush_tlb_global(); + else + __flush_tlb(); +} + +static inline void __flush_tlb_one(unsigned long addr) +{ + if (cpu_has_invlpg) + __flush_tlb_single(addr); + else + __flush_tlb(); +} + +#ifdef CONFIG_X86_32 +# define TLB_FLUSH_ALL 0xffffffff +#else +# define TLB_FLUSH_ALL -1ULL +#endif + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + * + * x86-64 can only flush individual pages or full VMs. For a range flush + * we always do the full VM. Might be worth trying if for a small + * range a few INVLPGs in a row are a win. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb_one(addr); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb(); +} + +static inline void native_flush_tlb_others(const cpumask_t *cpumask, + struct mm_struct *mm, + unsigned long va) +{ +} + +static inline void reset_lazy_tlbstate(void) +{ +} + +#else /* SMP */ + +#include <asm/smp.h> + +#define local_flush_tlb() __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_tlb_mm(vma->vm_mm); +} + +void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, + unsigned long va); + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +#ifdef CONFIG_X86_32 +struct tlb_state { + struct mm_struct *active_mm; + int state; + char __cacheline_padding[L1_CACHE_BYTES-8]; +}; +DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); + +void reset_lazy_tlbstate(void); +#else +static inline void reset_lazy_tlbstate(void) +{ +} +#endif + +#endif /* SMP */ + +#ifndef CONFIG_PARAVIRT +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#endif + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} + +#endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h new file mode 100644 index 00000000000..90ac7718469 --- /dev/null +++ b/arch/x86/include/asm/topology.h @@ -0,0 +1,258 @@ +/* + * Written by: Matthew Dobson, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to <colpatch@us.ibm.com> + */ +#ifndef _ASM_X86_TOPOLOGY_H +#define _ASM_X86_TOPOLOGY_H + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT +# define ENABLE_TOPO_DEFINES +# endif +#else +# ifdef CONFIG_SMP +# define ENABLE_TOPO_DEFINES +# endif +#endif + +/* Node not present */ +#define NUMA_NO_NODE (-1) + +#ifdef CONFIG_NUMA +#include <linux/cpumask.h> +#include <asm/mpspec.h> + +#ifdef CONFIG_X86_32 + +/* Mappings between node number and cpus on that node. */ +extern cpumask_t node_to_cpumask_map[]; + +/* Mappings between logical cpu number and node number */ +extern int cpu_to_node_map[]; + +/* Returns the number of the node containing CPU 'cpu' */ +static inline int cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} +#define early_cpu_to_node(cpu) cpu_to_node(cpu) + +/* Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +static inline cpumask_t node_to_cpumask(int node) +{ + return node_to_cpumask_map[node]; +} + +#else /* CONFIG_X86_64 */ + +/* Mappings between node number and cpus on that node. */ +extern cpumask_t *node_to_cpumask_map; + +/* Mappings between logical cpu number and node number */ +DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); + +/* Returns the number of the current Node. */ +#define numa_node_id() read_pda(nodenumber) + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +extern int cpu_to_node(int cpu); +extern int early_cpu_to_node(int cpu); +extern const cpumask_t *_node_to_cpumask_ptr(int node); +extern cpumask_t node_to_cpumask(int node); + +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Returns the number of the node containing CPU 'cpu' */ +static inline int cpu_to_node(int cpu) +{ + return per_cpu(x86_cpu_to_node_map, cpu); +} + +/* Same function but used if called before per_cpu areas are setup */ +static inline int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + return per_cpu(x86_cpu_to_node_map, cpu); +} + +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline const cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &node_to_cpumask_map[node]; +} + +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline cpumask_t node_to_cpumask(int node) +{ + return node_to_cpumask_map[node]; +} + +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + const cpumask_t *v = _node_to_cpumask_ptr(node) + +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) + +#endif /* CONFIG_X86_64 */ + +/* + * Returns the number of the node containing Node 'node'. This + * architecture is flat, so it is a pretty simple function! + */ +#define parent_node(node) (node) + +#define pcibus_to_node(bus) __pcibus_to_node(bus) +#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) + +#ifdef CONFIG_X86_32 +extern unsigned long node_start_pfn[]; +extern unsigned long node_end_pfn[]; +extern unsigned long node_remap_size[]; +#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) + +# define SD_CACHE_NICE_TRIES 1 +# define SD_IDLE_IDX 1 +# define SD_NEWIDLE_IDX 2 +# define SD_FORKEXEC_IDX 0 + +#else + +# define SD_CACHE_NICE_TRIES 2 +# define SD_IDLE_IDX 2 +# define SD_NEWIDLE_IDX 2 +# define SD_FORKEXEC_IDX 1 + +#endif + +/* sched_domains SD_NODE_INIT for NUMAQ machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = SD_CACHE_NICE_TRIES, \ + .busy_idx = 3, \ + .idle_idx = SD_IDLE_IDX, \ + .newidle_idx = SD_NEWIDLE_IDX, \ + .wake_idx = 1, \ + .forkexec_idx = SD_FORKEXEC_IDX, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ + | SD_SERIALIZE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ +} + +#ifdef CONFIG_X86_64_ACPI_NUMA +extern int __node_distance(int, int); +#define node_distance(a, b) __node_distance(a, b) +#endif + +#else /* !CONFIG_NUMA */ + +#define numa_node_id() 0 +#define cpu_to_node(cpu) 0 +#define early_cpu_to_node(cpu) 0 + +static inline const cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &cpu_online_map; +} +static inline cpumask_t node_to_cpumask(int node) +{ + return cpu_online_map; +} +static inline int node_to_first_cpu(int node) +{ + return first_cpu(cpu_online_map); +} + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + const cpumask_t *v = _node_to_cpumask_ptr(node) + +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) +#endif + +#include <asm-generic/topology.h> + +#ifdef CONFIG_NUMA +/* Returns the number of the first CPU on Node 'node'. */ +static inline int node_to_first_cpu(int node) +{ + node_to_cpumask_ptr(mask, node); + return first_cpu(*mask); +} +#endif + +extern cpumask_t cpu_coregroup_map(int cpu); + +#ifdef ENABLE_TOPO_DEFINES +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) + +/* indicates that pointers to the topology cpumask_t maps are valid */ +#define arch_provides_topology_pointers yes +#endif + +static inline void arch_fix_phys_package_id(int num, u32 slot) +{ +} + +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + +#ifdef CONFIG_SMP +#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define smt_capable() (smp_num_siblings > 1) +#endif + +#ifdef CONFIG_NUMA +extern int get_mp_bus_to_node(int busnum); +extern void set_mp_bus_to_node(int busnum, int node); +#else +static inline int get_mp_bus_to_node(int busnum) +{ + return 0; +} +static inline void set_mp_bus_to_node(int busnum, int node) +{ +} +#endif + +#endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h new file mode 100644 index 00000000000..fa0d79facdb --- /dev/null +++ b/arch/x86/include/asm/trampoline.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_TRAMPOLINE_H +#define _ASM_X86_TRAMPOLINE_H + +#ifndef __ASSEMBLY__ + +/* + * Trampoline 80x86 program as an array. + */ +extern const unsigned char trampoline_data []; +extern const unsigned char trampoline_end []; +extern unsigned char *trampoline_base; + +extern unsigned long init_rsp; +extern unsigned long initial_code; + +#define TRAMPOLINE_BASE 0x6000 +extern unsigned long setup_trampoline(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_TRAMPOLINE_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h new file mode 100644 index 00000000000..45dee286e45 --- /dev/null +++ b/arch/x86/include/asm/traps.h @@ -0,0 +1,81 @@ +#ifndef _ASM_X86_TRAPS_H +#define _ASM_X86_TRAPS_H + +#include <asm/debugreg.h> + +#ifdef CONFIG_X86_32 +#define dotraplinkage +#else +#define dotraplinkage asmlinkage +#endif + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void nmi(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +#ifdef CONFIG_X86_64 +asmlinkage void double_fault(void); +#endif +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void coprocessor_error(void); +asmlinkage void alignment_check(void); +#ifdef CONFIG_X86_MCE +asmlinkage void machine_check(void); +#endif /* CONFIG_X86_MCE */ +asmlinkage void simd_coprocessor_error(void); + +dotraplinkage void do_divide_error(struct pt_regs *, long); +dotraplinkage void do_debug(struct pt_regs *, long); +dotraplinkage void do_nmi(struct pt_regs *, long); +dotraplinkage void do_int3(struct pt_regs *, long); +dotraplinkage void do_overflow(struct pt_regs *, long); +dotraplinkage void do_bounds(struct pt_regs *, long); +dotraplinkage void do_invalid_op(struct pt_regs *, long); +dotraplinkage void do_device_not_available(struct pt_regs *, long); +dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); +dotraplinkage void do_invalid_TSS(struct pt_regs *, long); +dotraplinkage void do_segment_not_present(struct pt_regs *, long); +dotraplinkage void do_stack_segment(struct pt_regs *, long); +dotraplinkage void do_general_protection(struct pt_regs *, long); +dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); +dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); +dotraplinkage void do_coprocessor_error(struct pt_regs *, long); +dotraplinkage void do_alignment_check(struct pt_regs *, long); +#ifdef CONFIG_X86_MCE +dotraplinkage void do_machine_check(struct pt_regs *, long); +#endif +dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); +#ifdef CONFIG_X86_32 +dotraplinkage void do_iret_error(struct pt_regs *, long); +#endif + +static inline int get_si_code(unsigned long condition) +{ + if (condition & DR_STEP) + return TRAP_TRACE; + else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) + return TRAP_HWBKPT; + else + return TRAP_BRKPT; +} + +extern int panic_on_unrecovered_nmi; +extern int kstack_depth_to_print; + +#ifdef CONFIG_X86_32 +void math_error(void __user *); +unsigned long patch_espfix_desc(unsigned long, unsigned long); +asmlinkage void math_emulate(long); +#endif + +#endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h new file mode 100644 index 00000000000..38ae163cc91 --- /dev/null +++ b/arch/x86/include/asm/tsc.h @@ -0,0 +1,62 @@ +/* + * x86 TSC related functions + */ +#ifndef _ASM_X86_TSC_H +#define _ASM_X86_TSC_H + +#include <asm/processor.h> + +#define NS_SCALE 10 /* 2^10, carefully chosen */ +#define US_SCALE 32 /* 2^32, arbitralrily chosen */ + +/* + * Standard way to access the cycle counter. + */ +typedef unsigned long long cycles_t; + +extern unsigned int cpu_khz; +extern unsigned int tsc_khz; + +extern void disable_TSC(void); + +static inline cycles_t get_cycles(void) +{ + unsigned long long ret = 0; + +#ifndef CONFIG_X86_TSC + if (!cpu_has_tsc) + return 0; +#endif + rdtscll(ret); + + return ret; +} + +static __always_inline cycles_t vget_cycles(void) +{ + /* + * We only do VDSOs on TSC capable CPUs, so this shouldnt + * access boot_cpu_data (which is not VDSO-safe): + */ +#ifndef CONFIG_X86_TSC + if (!cpu_has_tsc) + return 0; +#endif + return (cycles_t)__native_read_tsc(); +} + +extern void tsc_init(void); +extern void mark_tsc_unstable(char *reason); +extern int unsynchronized_tsc(void); +int check_tsc_unstable(void); + +/* + * Boot-time check whether the TSCs are synchronized across + * all CPUs/cores: + */ +extern void check_tsc_sync_source(int cpu); +extern void check_tsc_sync_target(void); + +extern int notsc_setup(char *); + +#endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h new file mode 100644 index 00000000000..e6f73632007 --- /dev/null +++ b/arch/x86/include/asm/types.h @@ -0,0 +1,36 @@ +#ifndef _ASM_X86_TYPES_H +#define _ASM_X86_TYPES_H + +#include <asm-generic/int-ll64.h> + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#ifdef CONFIG_X86_32 +# define BITS_PER_LONG 32 +#else +# define BITS_PER_LONG 64 +#endif + +#ifndef __ASSEMBLY__ + +typedef u64 dma64_addr_t; +#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) +/* DMA addresses come in 32-bit and 64-bit flavours. */ +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_TYPES_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h new file mode 100644 index 00000000000..35c54921b2e --- /dev/null +++ b/arch/x86/include/asm/uaccess.h @@ -0,0 +1,454 @@ +#ifndef _ASM_X86_UACCESS_H +#define _ASM_X86_UACCESS_H +/* + * User space memory access functions + */ +#include <linux/errno.h> +#include <linux/compiler.h> +#include <linux/thread_info.h> +#include <linux/prefetch.h> +#include <linux/string.h> +#include <asm/asm.h> +#include <asm/page.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define KERNEL_DS MAKE_MM_SEG(-1UL) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < \ + (current_thread_info()->addr_limit.seg)) + +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + * + * This is equivalent to the following test: + * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) + * + * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... + */ + +#define __range_not_ok(addr, size) \ +({ \ + unsigned long flag, roksum; \ + __chk_user_ptr(addr); \ + asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ + : "=&r" (flag), "=r" (roksum) \ + : "1" (addr), "g" ((long)(size)), \ + "rm" (current_thread_info()->addr_limit.seg)); \ + flag; \ +}) + +/** + * access_ok: - Checks if a user space pointer is valid + * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that + * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe + * to write to a block, it is always safe to read from it. + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + unsigned long insn, fixup; +}; + +extern int fixup_exception(struct pt_regs *regs); + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the ugliness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern int __get_user_1(void); +extern int __get_user_2(void); +extern int __get_user_4(void); +extern int __get_user_8(void); +extern int __get_user_bad(void); + +#define __get_user_x(size, ret, x, ptr) \ + asm volatile("call __get_user_" #size \ + : "=a" (ret),"=d" (x) \ + : "0" (ptr)) \ + +/* Careful: we have to cast the result to the type of the pointer + * for sign reasons */ + +/** + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#ifdef CONFIG_X86_32 +#define __get_user_8(__ret_gu, __val_gu, ptr) \ + __get_user_x(X, __ret_gu, __val_gu, ptr) +#else +#define __get_user_8(__ret_gu, __val_gu, ptr) \ + __get_user_x(8, __ret_gu, __val_gu, ptr) +#endif + +#define get_user(x, ptr) \ +({ \ + int __ret_gu; \ + unsigned long __val_gu; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_user_x(1, __ret_gu, __val_gu, ptr); \ + break; \ + case 2: \ + __get_user_x(2, __ret_gu, __val_gu, ptr); \ + break; \ + case 4: \ + __get_user_x(4, __ret_gu, __val_gu, ptr); \ + break; \ + case 8: \ + __get_user_8(__ret_gu, __val_gu, ptr); \ + break; \ + default: \ + __get_user_x(X, __ret_gu, __val_gu, ptr); \ + break; \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +#define __put_user_x(size, x, ptr, __ret_pu) \ + asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ + :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + + + +#ifdef CONFIG_X86_32 +#define __put_user_u64(x, addr, err) \ + asm volatile("1: movl %%eax,0(%2)\n" \ + "2: movl %%edx,4(%2)\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=r" (err) \ + : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + +#define __put_user_x8(x, ptr, __ret_pu) \ + asm volatile("call __put_user_8" : "=a" (__ret_pu) \ + : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") +#else +#define __put_user_u64(x, ptr, retval) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) +#endif + +extern void __put_user_bad(void); + +/* + * Strange magic calling convention: pointer in %ecx, + * value in %eax(:%edx), return value in %eax. clobbers %rbx + */ +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +#ifdef CONFIG_X86_WP_WORKS_OK + +/** + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x, ptr) \ +({ \ + int __ret_pu; \ + __typeof__(*(ptr)) __pu_val; \ + __chk_user_ptr(ptr); \ + __pu_val = x; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_user_x(1, __pu_val, ptr, __ret_pu); \ + break; \ + case 2: \ + __put_user_x(2, __pu_val, ptr, __ret_pu); \ + break; \ + case 4: \ + __put_user_x(4, __pu_val, ptr, __ret_pu); \ + break; \ + case 8: \ + __put_user_x8(__pu_val, ptr, __ret_pu); \ + break; \ + default: \ + __put_user_x(X, __pu_val, ptr, __ret_pu); \ + break; \ + } \ + __ret_pu; \ +}) + +#define __put_user_size(x, ptr, size, retval, errret) \ +do { \ + retval = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ + break; \ + case 2: \ + __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ + break; \ + case 4: \ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + break; \ + case 8: \ + __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + +#else + +#define __put_user_size(x, ptr, size, retval, errret) \ +do { \ + __typeof__(*(ptr))__pus_tmp = x; \ + retval = 0; \ + \ + if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ + retval = errret; \ +} while (0) + +#define put_user(x, ptr) \ +({ \ + int __ret_pu; \ + __typeof__(*(ptr))__pus_tmp = x; \ + __ret_pu = 0; \ + if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ + sizeof(*(ptr))) != 0)) \ + __ret_pu = -EFAULT; \ + __ret_pu; \ +}) +#endif + +#ifdef CONFIG_X86_32 +#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#else +#define __get_user_asm_u64(x, ptr, retval, errret) \ + __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#endif + +#define __get_user_size(x, ptr, size, retval, errret) \ +do { \ + retval = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ + break; \ + case 2: \ + __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ + break; \ + case 4: \ + __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ + break; \ + case 8: \ + __get_user_asm_u64(x, ptr, retval, errret); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (err), ltype(x) \ + : "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_nocheck(x, ptr, size) \ +({ \ + long __pu_err; \ + __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __pu_err; \ +}) + +#define __get_user_nocheck(x, ptr, size) \ +({ \ + long __gu_err; \ + unsigned long __gu_val; \ + __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +/* FIXME: this hack is definitely wrong -AK */ +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r"(err) \ + : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) +/** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ + +#define __get_user(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr))) +/** + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ + +#define __put_user(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user + +/* + * movsl can be slow when source and dest are not both 8-byte aligned + */ +#ifdef CONFIG_X86_INTEL_USERCOPY +extern struct movsl_mask { + int mask; +} ____cacheline_aligned_in_smp movsl_mask; +#endif + +#define ARCH_HAS_NOCACHE_UACCESS 1 + +#ifdef CONFIG_X86_32 +# include "uaccess_32.h" +#else +# define ARCH_HAS_SEARCH_EXTABLE +# include "uaccess_64.h" +#endif + +#endif /* _ASM_X86_UACCESS_H */ + diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h new file mode 100644 index 00000000000..d095a3aeea1 --- /dev/null +++ b/arch/x86/include/asm/uaccess_32.h @@ -0,0 +1,218 @@ +#ifndef _ASM_X86_UACCESS_32_H +#define _ASM_X86_UACCESS_32_H + +/* + * User space memory access functions + */ +#include <linux/errno.h> +#include <linux/thread_info.h> +#include <linux/prefetch.h> +#include <linux/string.h> +#include <asm/asm.h> +#include <asm/page.h> + +unsigned long __must_check __copy_to_user_ll + (void __user *to, const void *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll + (void *to, const void __user *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll_nozero + (void *to, const void __user *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll_nocache + (void *to, const void __user *from, unsigned long n); +unsigned long __must_check __copy_from_user_ll_nocache_nozero + (void *to, const void __user *from, unsigned long n); + +/** + * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * The caller should also make sure he pins the user space address + * so that the we don't result in page fault and sleep. + * + * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault + * we return the initial request size (1, 2 or 4), as copy_*_user should do. + * If a store crosses a page boundary and gets a fault, the x86 will not write + * anything, so this is accurate. + */ + +static __always_inline unsigned long __must_check +__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __put_user_size(*(u8 *)from, (u8 __user *)to, + 1, ret, 1); + return ret; + case 2: + __put_user_size(*(u16 *)from, (u16 __user *)to, + 2, ret, 2); + return ret; + case 4: + __put_user_size(*(u32 *)from, (u32 __user *)to, + 4, ret, 4); + return ret; + } + } + return __copy_to_user_ll(to, from, n); +} + +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static __always_inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_sleep(); + return __copy_to_user_inatomic(to, from, n); +} + +static __always_inline unsigned long +__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) +{ + /* Avoid zeroing the tail if the copy fails.. + * If 'n' is constant and 1, 2, or 4, we do still zero on a failure, + * but as the zeroing behaviour is only significant when n is not + * constant, that shouldn't be a problem. + */ + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __get_user_size(*(u8 *)to, from, 1, ret, 1); + return ret; + case 2: + __get_user_size(*(u16 *)to, from, 2, ret, 2); + return ret; + case 4: + __get_user_size(*(u32 *)to, from, 4, ret, 4); + return ret; + } + } + return __copy_from_user_ll_nozero(to, from, n); +} + +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + * + * An alternate version - __copy_from_user_inatomic() - may be called from + * atomic context and will fail rather than sleep. In this case the + * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h + * for explanation of why this is needed. + */ +static __always_inline unsigned long +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + might_sleep(); + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __get_user_size(*(u8 *)to, from, 1, ret, 1); + return ret; + case 2: + __get_user_size(*(u16 *)to, from, 2, ret, 2); + return ret; + case 4: + __get_user_size(*(u32 *)to, from, 4, ret, 4); + return ret; + } + } + return __copy_from_user_ll(to, from, n); +} + +static __always_inline unsigned long __copy_from_user_nocache(void *to, + const void __user *from, unsigned long n) +{ + might_sleep(); + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __get_user_size(*(u8 *)to, from, 1, ret, 1); + return ret; + case 2: + __get_user_size(*(u16 *)to, from, 2, ret, 2); + return ret; + case 4: + __get_user_size(*(u32 *)to, from, 4, ret, 4); + return ret; + } + } + return __copy_from_user_ll_nocache(to, from, n); +} + +static __always_inline unsigned long +__copy_from_user_inatomic_nocache(void *to, const void __user *from, + unsigned long n) +{ + return __copy_from_user_ll_nocache_nozero(to, from, n); +} + +unsigned long __must_check copy_to_user(void __user *to, + const void *from, unsigned long n); +unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n); +long __must_check strncpy_from_user(char *dst, const char __user *src, + long count); +long __must_check __strncpy_from_user(char *dst, + const char __user *src, long count); + +/** + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ +#define strlen_user(str) strnlen_user(str, LONG_MAX) + +long strnlen_user(const char __user *str, long n); +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + +#endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h new file mode 100644 index 00000000000..664f15280f1 --- /dev/null +++ b/arch/x86/include/asm/uaccess_64.h @@ -0,0 +1,202 @@ +#ifndef _ASM_X86_UACCESS_64_H +#define _ASM_X86_UACCESS_64_H + +/* + * User space memory access functions + */ +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/prefetch.h> +#include <linux/lockdep.h> +#include <asm/page.h> + +/* + * Copy To/From Userspace + */ + +/* Handles exceptions in both to and from, but doesn't do access_ok */ +__must_check unsigned long +copy_user_generic(void *to, const void *from, unsigned len); + +__must_check unsigned long +copy_to_user(void __user *to, const void *from, unsigned len); +__must_check unsigned long +copy_from_user(void *to, const void __user *from, unsigned len); +__must_check unsigned long +copy_in_user(void __user *to, const void __user *from, unsigned len); + +static __always_inline __must_check +int __copy_from_user(void *dst, const void __user *src, unsigned size) +{ + int ret = 0; + if (!__builtin_constant_p(size)) + return copy_user_generic(dst, (__force void *)src, size); + switch (size) { + case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src, + ret, "b", "b", "=q", 1); + return ret; + case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src, + ret, "w", "w", "=r", 2); + return ret; + case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src, + ret, "l", "k", "=r", 4); + return ret; + case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 8); + return ret; + case 10: + __get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 16); + if (unlikely(ret)) + return ret; + __get_user_asm(*(u16 *)(8 + (char *)dst), + (u16 __user *)(8 + (char __user *)src), + ret, "w", "w", "=r", 2); + return ret; + case 16: + __get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 16); + if (unlikely(ret)) + return ret; + __get_user_asm(*(u64 *)(8 + (char *)dst), + (u64 __user *)(8 + (char __user *)src), + ret, "q", "", "=r", 8); + return ret; + default: + return copy_user_generic(dst, (__force void *)src, size); + } +} + +static __always_inline __must_check +int __copy_to_user(void __user *dst, const void *src, unsigned size) +{ + int ret = 0; + if (!__builtin_constant_p(size)) + return copy_user_generic((__force void *)dst, src, size); + switch (size) { + case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst, + ret, "b", "b", "iq", 1); + return ret; + case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst, + ret, "w", "w", "ir", 2); + return ret; + case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst, + ret, "l", "k", "ir", 4); + return ret; + case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, + ret, "q", "", "ir", 8); + return ret; + case 10: + __put_user_asm(*(u64 *)src, (u64 __user *)dst, + ret, "q", "", "ir", 10); + if (unlikely(ret)) + return ret; + asm("":::"memory"); + __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, + ret, "w", "w", "ir", 2); + return ret; + case 16: + __put_user_asm(*(u64 *)src, (u64 __user *)dst, + ret, "q", "", "ir", 16); + if (unlikely(ret)) + return ret; + asm("":::"memory"); + __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, + ret, "q", "", "ir", 8); + return ret; + default: + return copy_user_generic((__force void *)dst, src, size); + } +} + +static __always_inline __must_check +int __copy_in_user(void __user *dst, const void __user *src, unsigned size) +{ + int ret = 0; + if (!__builtin_constant_p(size)) + return copy_user_generic((__force void *)dst, + (__force void *)src, size); + switch (size) { + case 1: { + u8 tmp; + __get_user_asm(tmp, (u8 __user *)src, + ret, "b", "b", "=q", 1); + if (likely(!ret)) + __put_user_asm(tmp, (u8 __user *)dst, + ret, "b", "b", "iq", 1); + return ret; + } + case 2: { + u16 tmp; + __get_user_asm(tmp, (u16 __user *)src, + ret, "w", "w", "=r", 2); + if (likely(!ret)) + __put_user_asm(tmp, (u16 __user *)dst, + ret, "w", "w", "ir", 2); + return ret; + } + + case 4: { + u32 tmp; + __get_user_asm(tmp, (u32 __user *)src, + ret, "l", "k", "=r", 4); + if (likely(!ret)) + __put_user_asm(tmp, (u32 __user *)dst, + ret, "l", "k", "ir", 4); + return ret; + } + case 8: { + u64 tmp; + __get_user_asm(tmp, (u64 __user *)src, + ret, "q", "", "=r", 8); + if (likely(!ret)) + __put_user_asm(tmp, (u64 __user *)dst, + ret, "q", "", "ir", 8); + return ret; + } + default: + return copy_user_generic((__force void *)dst, + (__force void *)src, size); + } +} + +__must_check long +strncpy_from_user(char *dst, const char __user *src, long count); +__must_check long +__strncpy_from_user(char *dst, const char __user *src, long count); +__must_check long strnlen_user(const char __user *str, long n); +__must_check long __strnlen_user(const char __user *str, long n); +__must_check long strlen_user(const char __user *str); +__must_check unsigned long clear_user(void __user *mem, unsigned long len); +__must_check unsigned long __clear_user(void __user *mem, unsigned long len); + +__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, + unsigned size); + +static __must_check __always_inline int +__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) +{ + return copy_user_generic((__force void *)dst, src, size); +} + +extern long __copy_user_nocache(void *dst, const void __user *src, + unsigned size, int zerorest); + +static inline int __copy_from_user_nocache(void *dst, const void __user *src, + unsigned size) +{ + might_sleep(); + return __copy_user_nocache(dst, src, size, 1); +} + +static inline int __copy_from_user_inatomic_nocache(void *dst, + const void __user *src, + unsigned size) +{ + return __copy_user_nocache(dst, src, size, 0); +} + +unsigned long +copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); + +#endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h new file mode 100644 index 00000000000..87324cf439d --- /dev/null +++ b/arch/x86/include/asm/ucontext.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_UCONTEXT_H +#define _ASM_X86_UCONTEXT_H + +#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state + * information in the memory layout pointed + * by the fpstate pointer in the ucontext's + * sigcontext struct (uc_mcontext). + */ + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* _ASM_X86_UCONTEXT_H */ diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h new file mode 100644 index 00000000000..a7bd416b476 --- /dev/null +++ b/arch/x86/include/asm/unaligned.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_UNALIGNED_H +#define _ASM_X86_UNALIGNED_H + +/* + * The x86 can do unaligned accesses itself. + */ + +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> + +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +#endif /* _ASM_X86_UNALIGNED_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h new file mode 100644 index 00000000000..2a58ed3e51d --- /dev/null +++ b/arch/x86/include/asm/unistd.h @@ -0,0 +1,13 @@ +#ifdef __KERNEL__ +# ifdef CONFIG_X86_32 +# include "unistd_32.h" +# else +# include "unistd_64.h" +# endif +#else +# ifdef __i386__ +# include "unistd_32.h" +# else +# include "unistd_64.h" +# endif +#endif diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h new file mode 100644 index 00000000000..f2bba78430a --- /dev/null +++ b/arch/x86/include/asm/unistd_32.h @@ -0,0 +1,379 @@ +#ifndef _ASM_X86_UNISTD_32_H +#define _ASM_X86_UNISTD_32_H + +/* + * This file contains the system call numbers. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86old 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_vm86 166 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_chown 182 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 /* some people actually want streams */ +#define __NR_putpmsg 189 /* some people actually want streams */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_madvise1 219 /* delete when C lib stub is removed */ +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +/* 223 is unused */ +#define __NR_gettid 224 +#define __NR_readahead 225 +#define __NR_setxattr 226 +#define __NR_lsetxattr 227 +#define __NR_fsetxattr 228 +#define __NR_getxattr 229 +#define __NR_lgetxattr 230 +#define __NR_fgetxattr 231 +#define __NR_listxattr 232 +#define __NR_llistxattr 233 +#define __NR_flistxattr 234 +#define __NR_removexattr 235 +#define __NR_lremovexattr 236 +#define __NR_fremovexattr 237 +#define __NR_tkill 238 +#define __NR_sendfile64 239 +#define __NR_futex 240 +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_fadvise64 250 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ +#define __NR_exit_group 252 +#define __NR_lookup_dcookie 253 +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 +#define __NR_remap_file_pages 257 +#define __NR_set_tid_address 258 +#define __NR_timer_create 259 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_statfs64 268 +#define __NR_fstatfs64 269 +#define __NR_tgkill 270 +#define __NR_utimes 271 +#define __NR_fadvise64_64 272 +#define __NR_vserver 273 +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_mq_open 277 +#define __NR_mq_unlink (__NR_mq_open+1) +#define __NR_mq_timedsend (__NR_mq_open+2) +#define __NR_mq_timedreceive (__NR_mq_open+3) +#define __NR_mq_notify (__NR_mq_open+4) +#define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_kexec_load 283 +#define __NR_waitid 284 +/* #define __NR_sys_setaltroot 285 */ +#define __NR_add_key 286 +#define __NR_request_key 287 +#define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_utimensat 320 +#define __NR_signalfd 321 +#define __NR_timerfd_create 322 +#define __NR_eventfd 323 +#define __NR_fallocate 324 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 +#define __NR_signalfd4 327 +#define __NR_eventfd2 328 +#define __NR_epoll_create1 329 +#define __NR_dup3 330 +#define __NR_pipe2 331 +#define __NR_inotify_init1 332 + +#ifdef __KERNEL__ + +#define __ARCH_WANT_IPC_PARSE_VERSION +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_SGETMASK +#define __ARCH_WANT_SYS_SIGNAL +#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_WAITPID +#define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#ifndef cond_syscall +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") +#endif + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_UNISTD_32_H */ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h new file mode 100644 index 00000000000..834b2c1d89f --- /dev/null +++ b/arch/x86/include/asm/unistd_64.h @@ -0,0 +1,693 @@ +#ifndef _ASM_X86_UNISTD_64_H +#define _ASM_X86_UNISTD_64_H + +#ifndef __SYSCALL +#define __SYSCALL(a, b) +#endif + +/* + * This file contains the system call numbers. + * + * Note: holes are not allowed. + */ + +/* at least 8 syscall per cacheline */ +#define __NR_read 0 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 1 +__SYSCALL(__NR_write, sys_write) +#define __NR_open 2 +__SYSCALL(__NR_open, sys_open) +#define __NR_close 3 +__SYSCALL(__NR_close, sys_close) +#define __NR_stat 4 +__SYSCALL(__NR_stat, sys_newstat) +#define __NR_fstat 5 +__SYSCALL(__NR_fstat, sys_newfstat) +#define __NR_lstat 6 +__SYSCALL(__NR_lstat, sys_newlstat) +#define __NR_poll 7 +__SYSCALL(__NR_poll, sys_poll) + +#define __NR_lseek 8 +__SYSCALL(__NR_lseek, sys_lseek) +#define __NR_mmap 9 +__SYSCALL(__NR_mmap, sys_mmap) +#define __NR_mprotect 10 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_munmap 11 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_brk 12 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_rt_sigaction 13 +__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) +#define __NR_rt_sigprocmask 14 +__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) +#define __NR_rt_sigreturn 15 +__SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn) + +#define __NR_ioctl 16 +__SYSCALL(__NR_ioctl, sys_ioctl) +#define __NR_pread64 17 +__SYSCALL(__NR_pread64, sys_pread64) +#define __NR_pwrite64 18 +__SYSCALL(__NR_pwrite64, sys_pwrite64) +#define __NR_readv 19 +__SYSCALL(__NR_readv, sys_readv) +#define __NR_writev 20 +__SYSCALL(__NR_writev, sys_writev) +#define __NR_access 21 +__SYSCALL(__NR_access, sys_access) +#define __NR_pipe 22 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_select 23 +__SYSCALL(__NR_select, sys_select) + +#define __NR_sched_yield 24 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_mremap 25 +__SYSCALL(__NR_mremap, sys_mremap) +#define __NR_msync 26 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_mincore 27 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 28 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_shmget 29 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmat 30 +__SYSCALL(__NR_shmat, sys_shmat) +#define __NR_shmctl 31 +__SYSCALL(__NR_shmctl, sys_shmctl) + +#define __NR_dup 32 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_dup2 33 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_pause 34 +__SYSCALL(__NR_pause, sys_pause) +#define __NR_nanosleep 35 +__SYSCALL(__NR_nanosleep, sys_nanosleep) +#define __NR_getitimer 36 +__SYSCALL(__NR_getitimer, sys_getitimer) +#define __NR_alarm 37 +__SYSCALL(__NR_alarm, sys_alarm) +#define __NR_setitimer 38 +__SYSCALL(__NR_setitimer, sys_setitimer) +#define __NR_getpid 39 +__SYSCALL(__NR_getpid, sys_getpid) + +#define __NR_sendfile 40 +__SYSCALL(__NR_sendfile, sys_sendfile64) +#define __NR_socket 41 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_connect 42 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_accept 43 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_sendto 44 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recvfrom 45 +__SYSCALL(__NR_recvfrom, sys_recvfrom) +#define __NR_sendmsg 46 +__SYSCALL(__NR_sendmsg, sys_sendmsg) +#define __NR_recvmsg 47 +__SYSCALL(__NR_recvmsg, sys_recvmsg) + +#define __NR_shutdown 48 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_bind 49 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_listen 50 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_getsockname 51 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 52 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_socketpair 53 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_setsockopt 54 +__SYSCALL(__NR_setsockopt, sys_setsockopt) +#define __NR_getsockopt 55 +__SYSCALL(__NR_getsockopt, sys_getsockopt) + +#define __NR_clone 56 +__SYSCALL(__NR_clone, stub_clone) +#define __NR_fork 57 +__SYSCALL(__NR_fork, stub_fork) +#define __NR_vfork 58 +__SYSCALL(__NR_vfork, stub_vfork) +#define __NR_execve 59 +__SYSCALL(__NR_execve, stub_execve) +#define __NR_exit 60 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_wait4 61 +__SYSCALL(__NR_wait4, sys_wait4) +#define __NR_kill 62 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_uname 63 +__SYSCALL(__NR_uname, sys_uname) + +#define __NR_semget 64 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semop 65 +__SYSCALL(__NR_semop, sys_semop) +#define __NR_semctl 66 +__SYSCALL(__NR_semctl, sys_semctl) +#define __NR_shmdt 67 +__SYSCALL(__NR_shmdt, sys_shmdt) +#define __NR_msgget 68 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgsnd 69 +__SYSCALL(__NR_msgsnd, sys_msgsnd) +#define __NR_msgrcv 70 +__SYSCALL(__NR_msgrcv, sys_msgrcv) +#define __NR_msgctl 71 +__SYSCALL(__NR_msgctl, sys_msgctl) + +#define __NR_fcntl 72 +__SYSCALL(__NR_fcntl, sys_fcntl) +#define __NR_flock 73 +__SYSCALL(__NR_flock, sys_flock) +#define __NR_fsync 74 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_fdatasync 75 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR_truncate 76 +__SYSCALL(__NR_truncate, sys_truncate) +#define __NR_ftruncate 77 +__SYSCALL(__NR_ftruncate, sys_ftruncate) +#define __NR_getdents 78 +__SYSCALL(__NR_getdents, sys_getdents) +#define __NR_getcwd 79 +__SYSCALL(__NR_getcwd, sys_getcwd) + +#define __NR_chdir 80 +__SYSCALL(__NR_chdir, sys_chdir) +#define __NR_fchdir 81 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_rename 82 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_mkdir 83 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 84 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_creat 85 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_link 86 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 87 +__SYSCALL(__NR_unlink, sys_unlink) + +#define __NR_symlink 88 +__SYSCALL(__NR_symlink, sys_symlink) +#define __NR_readlink 89 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_chmod 90 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_fchmod 91 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_chown 92 +__SYSCALL(__NR_chown, sys_chown) +#define __NR_fchown 93 +__SYSCALL(__NR_fchown, sys_fchown) +#define __NR_lchown 94 +__SYSCALL(__NR_lchown, sys_lchown) +#define __NR_umask 95 +__SYSCALL(__NR_umask, sys_umask) + +#define __NR_gettimeofday 96 +__SYSCALL(__NR_gettimeofday, sys_gettimeofday) +#define __NR_getrlimit 97 +__SYSCALL(__NR_getrlimit, sys_getrlimit) +#define __NR_getrusage 98 +__SYSCALL(__NR_getrusage, sys_getrusage) +#define __NR_sysinfo 99 +__SYSCALL(__NR_sysinfo, sys_sysinfo) +#define __NR_times 100 +__SYSCALL(__NR_times, sys_times) +#define __NR_ptrace 101 +__SYSCALL(__NR_ptrace, sys_ptrace) +#define __NR_getuid 102 +__SYSCALL(__NR_getuid, sys_getuid) +#define __NR_syslog 103 +__SYSCALL(__NR_syslog, sys_syslog) + +/* at the very end the stuff that never runs during the benchmarks */ +#define __NR_getgid 104 +__SYSCALL(__NR_getgid, sys_getgid) +#define __NR_setuid 105 +__SYSCALL(__NR_setuid, sys_setuid) +#define __NR_setgid 106 +__SYSCALL(__NR_setgid, sys_setgid) +#define __NR_geteuid 107 +__SYSCALL(__NR_geteuid, sys_geteuid) +#define __NR_getegid 108 +__SYSCALL(__NR_getegid, sys_getegid) +#define __NR_setpgid 109 +__SYSCALL(__NR_setpgid, sys_setpgid) +#define __NR_getppid 110 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getpgrp 111 +__SYSCALL(__NR_getpgrp, sys_getpgrp) + +#define __NR_setsid 112 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_setreuid 113 +__SYSCALL(__NR_setreuid, sys_setreuid) +#define __NR_setregid 114 +__SYSCALL(__NR_setregid, sys_setregid) +#define __NR_getgroups 115 +__SYSCALL(__NR_getgroups, sys_getgroups) +#define __NR_setgroups 116 +__SYSCALL(__NR_setgroups, sys_setgroups) +#define __NR_setresuid 117 +__SYSCALL(__NR_setresuid, sys_setresuid) +#define __NR_getresuid 118 +__SYSCALL(__NR_getresuid, sys_getresuid) +#define __NR_setresgid 119 +__SYSCALL(__NR_setresgid, sys_setresgid) + +#define __NR_getresgid 120 +__SYSCALL(__NR_getresgid, sys_getresgid) +#define __NR_getpgid 121 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_setfsuid 122 +__SYSCALL(__NR_setfsuid, sys_setfsuid) +#define __NR_setfsgid 123 +__SYSCALL(__NR_setfsgid, sys_setfsgid) +#define __NR_getsid 124 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_capget 125 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 126 +__SYSCALL(__NR_capset, sys_capset) + +#define __NR_rt_sigpending 127 +__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) +#define __NR_rt_sigtimedwait 128 +__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 129 +__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) +#define __NR_rt_sigsuspend 130 +__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) +#define __NR_sigaltstack 131 +__SYSCALL(__NR_sigaltstack, stub_sigaltstack) +#define __NR_utime 132 +__SYSCALL(__NR_utime, sys_utime) +#define __NR_mknod 133 +__SYSCALL(__NR_mknod, sys_mknod) + +/* Only needed for a.out */ +#define __NR_uselib 134 +__SYSCALL(__NR_uselib, sys_ni_syscall) +#define __NR_personality 135 +__SYSCALL(__NR_personality, sys_personality) + +#define __NR_ustat 136 +__SYSCALL(__NR_ustat, sys_ustat) +#define __NR_statfs 137 +__SYSCALL(__NR_statfs, sys_statfs) +#define __NR_fstatfs 138 +__SYSCALL(__NR_fstatfs, sys_fstatfs) +#define __NR_sysfs 139 +__SYSCALL(__NR_sysfs, sys_sysfs) + +#define __NR_getpriority 140 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_setpriority 141 +__SYSCALL(__NR_setpriority, sys_setpriority) +#define __NR_sched_setparam 142 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_getparam 143 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setscheduler 144 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 145 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_get_priority_max 146 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 147 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 148 +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) + +#define __NR_mlock 149 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 150 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 151 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 152 +__SYSCALL(__NR_munlockall, sys_munlockall) + +#define __NR_vhangup 153 +__SYSCALL(__NR_vhangup, sys_vhangup) + +#define __NR_modify_ldt 154 +__SYSCALL(__NR_modify_ldt, sys_modify_ldt) + +#define __NR_pivot_root 155 +__SYSCALL(__NR_pivot_root, sys_pivot_root) + +#define __NR__sysctl 156 +__SYSCALL(__NR__sysctl, sys_sysctl) + +#define __NR_prctl 157 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_arch_prctl 158 +__SYSCALL(__NR_arch_prctl, sys_arch_prctl) + +#define __NR_adjtimex 159 +__SYSCALL(__NR_adjtimex, sys_adjtimex) + +#define __NR_setrlimit 160 +__SYSCALL(__NR_setrlimit, sys_setrlimit) + +#define __NR_chroot 161 +__SYSCALL(__NR_chroot, sys_chroot) + +#define __NR_sync 162 +__SYSCALL(__NR_sync, sys_sync) + +#define __NR_acct 163 +__SYSCALL(__NR_acct, sys_acct) + +#define __NR_settimeofday 164 +__SYSCALL(__NR_settimeofday, sys_settimeofday) + +#define __NR_mount 165 +__SYSCALL(__NR_mount, sys_mount) +#define __NR_umount2 166 +__SYSCALL(__NR_umount2, sys_umount) + +#define __NR_swapon 167 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_swapoff 168 +__SYSCALL(__NR_swapoff, sys_swapoff) + +#define __NR_reboot 169 +__SYSCALL(__NR_reboot, sys_reboot) + +#define __NR_sethostname 170 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setdomainname 171 +__SYSCALL(__NR_setdomainname, sys_setdomainname) + +#define __NR_iopl 172 +__SYSCALL(__NR_iopl, stub_iopl) +#define __NR_ioperm 173 +__SYSCALL(__NR_ioperm, sys_ioperm) + +#define __NR_create_module 174 +__SYSCALL(__NR_create_module, sys_ni_syscall) +#define __NR_init_module 175 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 176 +__SYSCALL(__NR_delete_module, sys_delete_module) +#define __NR_get_kernel_syms 177 +__SYSCALL(__NR_get_kernel_syms, sys_ni_syscall) +#define __NR_query_module 178 +__SYSCALL(__NR_query_module, sys_ni_syscall) + +#define __NR_quotactl 179 +__SYSCALL(__NR_quotactl, sys_quotactl) + +#define __NR_nfsservctl 180 +__SYSCALL(__NR_nfsservctl, sys_nfsservctl) + +/* reserved for LiS/STREAMS */ +#define __NR_getpmsg 181 +__SYSCALL(__NR_getpmsg, sys_ni_syscall) +#define __NR_putpmsg 182 +__SYSCALL(__NR_putpmsg, sys_ni_syscall) + +/* reserved for AFS */ +#define __NR_afs_syscall 183 +__SYSCALL(__NR_afs_syscall, sys_ni_syscall) + +/* reserved for tux */ +#define __NR_tuxcall 184 +__SYSCALL(__NR_tuxcall, sys_ni_syscall) + +#define __NR_security 185 +__SYSCALL(__NR_security, sys_ni_syscall) + +#define __NR_gettid 186 +__SYSCALL(__NR_gettid, sys_gettid) + +#define __NR_readahead 187 +__SYSCALL(__NR_readahead, sys_readahead) +#define __NR_setxattr 188 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 189 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 190 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 191 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 192 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 193 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 194 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 195 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 196 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 197 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 198 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 199 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) +#define __NR_tkill 200 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_time 201 +__SYSCALL(__NR_time, sys_time) +#define __NR_futex 202 +__SYSCALL(__NR_futex, sys_futex) +#define __NR_sched_setaffinity 203 +__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) +#define __NR_sched_getaffinity 204 +__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) +#define __NR_set_thread_area 205 +__SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */ +#define __NR_io_setup 206 +__SYSCALL(__NR_io_setup, sys_io_setup) +#define __NR_io_destroy 207 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_getevents 208 +__SYSCALL(__NR_io_getevents, sys_io_getevents) +#define __NR_io_submit 209 +__SYSCALL(__NR_io_submit, sys_io_submit) +#define __NR_io_cancel 210 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_get_thread_area 211 +__SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */ +#define __NR_lookup_dcookie 212 +__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) +#define __NR_epoll_create 213 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_epoll_ctl_old 214 +__SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall) +#define __NR_epoll_wait_old 215 +__SYSCALL(__NR_epoll_wait_old, sys_ni_syscall) +#define __NR_remap_file_pages 216 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) +#define __NR_getdents64 217 +__SYSCALL(__NR_getdents64, sys_getdents64) +#define __NR_set_tid_address 218 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_restart_syscall 219 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_semtimedop 220 +__SYSCALL(__NR_semtimedop, sys_semtimedop) +#define __NR_fadvise64 221 +__SYSCALL(__NR_fadvise64, sys_fadvise64) +#define __NR_timer_create 222 +__SYSCALL(__NR_timer_create, sys_timer_create) +#define __NR_timer_settime 223 +__SYSCALL(__NR_timer_settime, sys_timer_settime) +#define __NR_timer_gettime 224 +__SYSCALL(__NR_timer_gettime, sys_timer_gettime) +#define __NR_timer_getoverrun 225 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_delete 226 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 227 +__SYSCALL(__NR_clock_settime, sys_clock_settime) +#define __NR_clock_gettime 228 +__SYSCALL(__NR_clock_gettime, sys_clock_gettime) +#define __NR_clock_getres 229 +__SYSCALL(__NR_clock_getres, sys_clock_getres) +#define __NR_clock_nanosleep 230 +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) +#define __NR_exit_group 231 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_epoll_wait 232 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_epoll_ctl 233 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_tgkill 234 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 235 +__SYSCALL(__NR_utimes, sys_utimes) +#define __NR_vserver 236 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_mbind 237 +__SYSCALL(__NR_mbind, sys_mbind) +#define __NR_set_mempolicy 238 +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) +#define __NR_get_mempolicy 239 +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) +#define __NR_mq_open 240 +__SYSCALL(__NR_mq_open, sys_mq_open) +#define __NR_mq_unlink 241 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 242 +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +#define __NR_mq_timedreceive 243 +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +#define __NR_mq_notify 244 +__SYSCALL(__NR_mq_notify, sys_mq_notify) +#define __NR_mq_getsetattr 245 +__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) +#define __NR_kexec_load 246 +__SYSCALL(__NR_kexec_load, sys_kexec_load) +#define __NR_waitid 247 +__SYSCALL(__NR_waitid, sys_waitid) +#define __NR_add_key 248 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 249 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 250 +__SYSCALL(__NR_keyctl, sys_keyctl) +#define __NR_ioprio_set 251 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 252 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) +#define __NR_inotify_init 253 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_inotify_add_watch 254 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 255 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_migrate_pages 256 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) +#define __NR_openat 257 +__SYSCALL(__NR_openat, sys_openat) +#define __NR_mkdirat 258 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 259 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 260 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 261 +__SYSCALL(__NR_futimesat, sys_futimesat) +#define __NR_newfstatat 262 +__SYSCALL(__NR_newfstatat, sys_newfstatat) +#define __NR_unlinkat 263 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 264 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 265 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 266 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 267 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 268 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 269 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_pselect6 270 +__SYSCALL(__NR_pselect6, sys_pselect6) +#define __NR_ppoll 271 +__SYSCALL(__NR_ppoll, sys_ppoll) +#define __NR_unshare 272 +__SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 273 +__SYSCALL(__NR_set_robust_list, sys_set_robust_list) +#define __NR_get_robust_list 274 +__SYSCALL(__NR_get_robust_list, sys_get_robust_list) +#define __NR_splice 275 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_tee 276 +__SYSCALL(__NR_tee, sys_tee) +#define __NR_sync_file_range 277 +__SYSCALL(__NR_sync_file_range, sys_sync_file_range) +#define __NR_vmsplice 278 +__SYSCALL(__NR_vmsplice, sys_vmsplice) +#define __NR_move_pages 279 +__SYSCALL(__NR_move_pages, sys_move_pages) +#define __NR_utimensat 280 +__SYSCALL(__NR_utimensat, sys_utimensat) +#define __IGNORE_getcpu /* implemented as a vsyscall */ +#define __NR_epoll_pwait 281 +__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) +#define __NR_signalfd 282 +__SYSCALL(__NR_signalfd, sys_signalfd) +#define __NR_timerfd_create 283 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_eventfd 284 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 285 +__SYSCALL(__NR_fallocate, sys_fallocate) +#define __NR_timerfd_settime 286 +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +#define __NR_timerfd_gettime 287 +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) +#define __NR_paccept 288 +__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_signalfd4 289 +__SYSCALL(__NR_signalfd4, sys_signalfd4) +#define __NR_eventfd2 290 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 291 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 292 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 293 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 294 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) + + +#ifndef __NO_STUBS +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_SGETMASK +#define __ARCH_WANT_SYS_SIGNAL +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_WAITPID +#define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND +#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_COMPAT_SYS_TIME +#endif /* __NO_STUBS */ + +#ifdef __KERNEL__ +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_UNISTD_64_H */ diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h new file mode 100644 index 00000000000..8b064bd9c55 --- /dev/null +++ b/arch/x86/include/asm/unwind.h @@ -0,0 +1,13 @@ +#ifndef _ASM_X86_UNWIND_H +#define _ASM_X86_UNWIND_H + +#define UNW_PC(frame) ((void)(frame), 0UL) +#define UNW_SP(frame) ((void)(frame), 0UL) +#define UNW_FP(frame) ((void)(frame), 0UL) + +static inline int arch_unw_user_mode(const void *info) +{ + return 0; +} + +#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h new file mode 100644 index 00000000000..999873b22e7 --- /dev/null +++ b/arch/x86/include/asm/user.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "user_32.h" +#else +# include "user_64.h" +#endif diff --git a/arch/x86/include/asm/user32.h b/arch/x86/include/asm/user32.h new file mode 100644 index 00000000000..14cbb73ebcb --- /dev/null +++ b/arch/x86/include/asm/user32.h @@ -0,0 +1,70 @@ +#ifndef _ASM_X86_USER32_H +#define _ASM_X86_USER32_H + +/* IA32 compatible user structures for ptrace. + * These should be used for 32bit coredumps too. */ + +struct user_i387_ia32_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ +}; + +/* FSAVE frame with extensions */ +struct user32_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; /* not compatible to 64bit twd */ + unsigned short fop; + int fip; + int fcs; + int foo; + int fos; + int mxcsr; + int reserved; + int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + int xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + int padding[56]; +}; + +struct user_regs_struct32 { + __u32 ebx, ecx, edx, esi, edi, ebp, eax; + unsigned short ds, __ds, es, __es; + unsigned short fs, __fs, gs, __gs; + __u32 orig_eax, eip; + unsigned short cs, __cs; + __u32 eflags, esp; + unsigned short ss, __ss; +}; + +struct user32 { + struct user_regs_struct32 regs; /* Where the registers are actually stored */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + struct user_i387_ia32_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + __u32 u_tsize; /* Text segment size (pages). */ + __u32 u_dsize; /* Data segment size (pages). */ + __u32 u_ssize; /* Stack segment size (pages). */ + __u32 start_code; /* Starting virtual address of text. */ + __u32 start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + __u32 signal; /* Signal that caused the core dump. */ + int reserved; /* No __u32er used */ + __u32 u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + __u32 u_fpstate; /* Math Co-processor pointer. */ + __u32 magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + int u_debugreg[8]; +}; + + +#endif /* _ASM_X86_USER32_H */ diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h new file mode 100644 index 00000000000..bebfd864401 --- /dev/null +++ b/arch/x86/include/asm/user_32.h @@ -0,0 +1,131 @@ +#ifndef _ASM_X86_USER_32_H +#define _ASM_X86_USER_32_H + +#include <asm/page.h> +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user (under + linux we use the 'trad-core' bfd). There are quite a number of + obstacles to being able to view the contents of the floating point + registers, and until these are solved you will not be able to view the + contents of them. Actually, you can read in the core file and look at + the contents of the user struct to find out what the floating point + registers contain. + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. +*/ + +/* + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + */ + +struct user_i387_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ +}; + +struct user_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +}; + +/* + * This is the old layout of "struct pt_regs", and + * is still the layout used by user mode (the new + * pt_regs doesn't have all registers as the kernel + * doesn't use the extra segment registers) + */ +struct user_regs_struct { + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +}; + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ +struct user{ +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* ptrace does not yet supply these. Someday.... */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + struct user_i387_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + int reserved; /* No longer used */ + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + struct user_i387_struct *u_fpstate; /* Math Co-processor pointer. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + int u_debugreg[8]; +}; +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _ASM_X86_USER_32_H */ diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h new file mode 100644 index 00000000000..faf2cd3e0d7 --- /dev/null +++ b/arch/x86/include/asm/user_64.h @@ -0,0 +1,137 @@ +#ifndef _ASM_X86_USER_64_H +#define _ASM_X86_USER_64_H + +#include <asm/types.h> +#include <asm/page.h> +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user. + There are quite a number of obstacles to being able to view the + contents of the floating point registers, and until these are + solved you will not be able to view the contents of them. + Actually, you can read in the core file and look at the contents of + the user struct to find out what the floating point registers + contain. + + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. */ + +/* + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + * + * x86-64 support by Andi Kleen. + */ + +/* This matches the 64bit FXSAVE format as defined by AMD. It is the same + as the 32bit format defined by Intel, except that the selector:offset pairs + for data and eip are replaced with flat 64bit pointers. */ +struct user_i387_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; /* Note this is not the same as + the 32bit/x87/FSAVE twd */ + unsigned short fop; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + __u32 padding[24]; +}; + +/* + * Segment register layout in coredumps. + */ +struct user_regs_struct { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; + unsigned long fs_base; + unsigned long gs_base; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; +}; + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ + +struct user { +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* ptrace does not yet supply these. Someday.... */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + int pad0; + struct user_i387_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + int reserved; /* No longer used */ + int pad1; + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + struct user_i387_struct *u_fpstate; /* Math Co-processor pointer. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + unsigned long u_debugreg[8]; + unsigned long error_code; /* CPU error code or 0 */ + unsigned long fault_address; /* CR3 or 0 */ +}; +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _ASM_X86_USER_64_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h new file mode 100644 index 00000000000..d931d3b7e6f --- /dev/null +++ b/arch/x86/include/asm/uv/bios.h @@ -0,0 +1,94 @@ +#ifndef _ASM_X86_UV_BIOS_H +#define _ASM_X86_UV_BIOS_H + +/* + * UV BIOS layer definitions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include <linux/rtc.h> + +/* + * Values for the BIOS calls. It is passed as the first * argument in the + * BIOS call. Passing any other value in the first argument will result + * in a BIOS_STATUS_UNIMPLEMENTED return status. + */ +enum uv_bios_cmd { + UV_BIOS_COMMON, + UV_BIOS_GET_SN_INFO, + UV_BIOS_FREQ_BASE +}; + +/* + * Status values returned from a BIOS call. + */ +enum { + BIOS_STATUS_SUCCESS = 0, + BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, + BIOS_STATUS_EINVAL = -EINVAL, + BIOS_STATUS_UNAVAIL = -EBUSY +}; + +/* + * The UV system table describes specific firmware + * capabilities available to the Linux kernel at runtime. + */ +struct uv_systab { + char signature[4]; /* must be "UVST" */ + u32 revision; /* distinguish different firmware revs */ + u64 function; /* BIOS runtime callback function ptr */ +}; + +enum { + BIOS_FREQ_BASE_PLATFORM = 0, + BIOS_FREQ_BASE_INTERVAL_TIMER = 1, + BIOS_FREQ_BASE_REALTIME_CLOCK = 2 +}; + +union partition_info_u { + u64 val; + struct { + u64 hub_version : 8, + partition_id : 16, + coherence_id : 16, + region_size : 24; + }; +}; + +/* + * bios calls have 6 parameters + */ +extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); +extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); +extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); + +extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); +extern s64 uv_bios_freq_base(u64, u64 *); + +extern void uv_bios_init(void); + +extern int uv_type; +extern long sn_partition_id; +extern long uv_coherency_id; +extern long uv_region_size; +#define partition_coherence_id() (uv_coherency_id) + +extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ + +#endif /* _ASM_X86_UV_BIOS_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h new file mode 100644 index 00000000000..e2363253bbb --- /dev/null +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -0,0 +1,332 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV Broadcast Assist Unit definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_BAU_H +#define _ASM_X86_UV_UV_BAU_H + +#include <linux/bitmap.h> +#define BITSPERBYTE 8 + +/* + * Broadcast Assist Unit messaging structures + * + * Selective Broadcast activations are induced by software action + * specifying a particular 8-descriptor "set" via a 6-bit index written + * to an MMR. + * Thus there are 64 unique 512-byte sets of SB descriptors - one set for + * each 6-bit index value. These descriptor sets are mapped in sequence + * starting with set 0 located at the address specified in the + * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, + * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. + * + * We will use 31 sets, one for sending BAU messages from each of the 32 + * cpu's on the node. + * + * TLB shootdown will use the first of the 8 descriptors of each set. + * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). + */ + +#define UV_ITEMS_PER_DESCRIPTOR 8 +#define UV_CPUS_PER_ACT_STATUS 32 +#define UV_ACT_STATUS_MASK 0x3 +#define UV_ACT_STATUS_SIZE 2 +#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 +#define UV_DISTRIBUTION_SIZE 256 +#define UV_SW_ACK_NPENDING 8 +#define UV_NET_ENDPOINT_INTD 0x38 +#define UV_DESC_BASE_PNODE_SHIFT 49 +#define UV_PAYLOADQ_PNODE_SHIFT 49 +#define UV_PTC_BASENAME "sgi_uv/ptc_statistics" +#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) + +/* + * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 + */ +#define DESC_STATUS_IDLE 0 +#define DESC_STATUS_ACTIVE 1 +#define DESC_STATUS_DESTINATION_TIMEOUT 2 +#define DESC_STATUS_SOURCE_TIMEOUT 3 + +/* + * source side threshholds at which message retries print a warning + */ +#define SOURCE_TIMEOUT_LIMIT 20 +#define DESTINATION_TIMEOUT_LIMIT 20 + +/* + * number of entries in the destination side payload queue + */ +#define DEST_Q_SIZE 17 +/* + * number of destination side software ack resources + */ +#define DEST_NUM_RESOURCES 8 +#define MAX_CPUS_PER_NODE 32 +/* + * completion statuses for sending a TLB flush message + */ +#define FLUSH_RETRY 1 +#define FLUSH_GIVEUP 2 +#define FLUSH_COMPLETE 3 + +/* + * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) + * If the 'multilevel' flag in the header portion of the descriptor + * has been set to 0, then endpoint multi-unicast mode is selected. + * The distribution specification (32 bytes) is interpreted as a 256-bit + * distribution vector. Adjacent bits correspond to consecutive even numbered + * nodeIDs. The result of adding the index of a given bit to the 15-bit + * 'base_dest_nodeid' field of the header corresponds to the + * destination nodeID associated with that specified bit. + */ +struct bau_target_nodemask { + unsigned long bits[BITS_TO_LONGS(256)]; +}; + +/* + * mask of cpu's on a node + * (during initialization we need to check that unsigned long has + * enough bits for max. cpu's per node) + */ +struct bau_local_cpumask { + unsigned long bits; +}; + +/* + * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor) + * only 12 bytes (96 bits) of the payload area are usable. + * An additional 3 bytes (bits 27:4) of the header address are carried + * to the next bytes of the destination payload queue. + * And an additional 2 bytes of the header Suppl_A field are also + * carried to the destination payload queue. + * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte) + * of the destination payload queue, which is written by the hardware + * with the s/w ack resource bit vector. + * [ effective message contents (16 bytes (128 bits) maximum), not counting + * the s/w ack bit vector ] + */ + +/* + * The payload is software-defined for INTD transactions + */ +struct bau_msg_payload { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ + /* 64 bits */ + unsigned short sending_cpu; /* filled in by sender */ + /* 16 bits */ + unsigned short acknowledge_count;/* filled in by destination */ + /* 16 bits */ + unsigned int reserved1:32; /* not usable */ +}; + + +/* + * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) + * see table 4.2.3.0.1 in broacast_assist spec. + */ +struct bau_msg_header { + int dest_subnodeid:6; /* must be zero */ + /* bits 5:0 */ + int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ + /* bits 20:6 */ + int command:8; /* message type */ + /* bits 28:21 */ + /* 0x38: SN3net EndPoint Message */ + int rsvd_1:3; /* must be zero */ + /* bits 31:29 */ + /* int will align on 32 bits */ + int rsvd_2:9; /* must be zero */ + /* bits 40:32 */ + /* Suppl_A is 56-41 */ + int payload_2a:8; /* becomes byte 16 of msg */ + /* bits 48:41 */ /* not currently using */ + int payload_2b:8; /* becomes byte 17 of msg */ + /* bits 56:49 */ /* not currently using */ + /* Address field (96:57) is never used as an + address (these are address bits 42:3) */ + int rsvd_3:1; /* must be zero */ + /* bit 57 */ + /* address bits 27:4 are payload */ + /* these 24 bits become bytes 12-14 of msg */ + int replied_to:1; /* sent as 0 by the source to byte 12 */ + /* bit 58 */ + + int payload_1a:5; /* not currently used */ + /* bits 63:59 */ + int payload_1b:8; /* not currently used */ + /* bits 71:64 */ + int payload_1c:8; /* not currently used */ + /* bits 79:72 */ + int payload_1d:2; /* not currently used */ + /* bits 81:80 */ + + int rsvd_4:7; /* must be zero */ + /* bits 88:82 */ + int sw_ack_flag:1; /* software acknowledge flag */ + /* bit 89 */ + /* INTD trasactions at destination are to + wait for software acknowledge */ + int rsvd_5:6; /* must be zero */ + /* bits 95:90 */ + int rsvd_6:5; /* must be zero */ + /* bits 100:96 */ + int int_both:1; /* if 1, interrupt both sockets on the blade */ + /* bit 101*/ + int fairness:3; /* usually zero */ + /* bits 104:102 */ + int multilevel:1; /* multi-level multicast format */ + /* bit 105 */ + /* 0 for TLB: endpoint multi-unicast messages */ + int chaining:1; /* next descriptor is part of this activation*/ + /* bit 106 */ + int rsvd_7:21; /* must be zero */ + /* bits 127:107 */ +}; + +/* + * The activation descriptor: + * The format of the message to send, plus all accompanying control + * Should be 64 bytes + */ +struct bau_desc { + struct bau_target_nodemask distribution; + /* + * message template, consisting of header and payload: + */ + struct bau_msg_header header; + struct bau_msg_payload payload; +}; +/* + * -payload-- ---------header------ + * bytes 0-11 bits 41-56 bits 58-81 + * A B (2) C (3) + * + * A/B/C are moved to: + * A C B + * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) + * ------------payload queue----------- + */ + +/* + * The payload queue on the destination side is an array of these. + * With BAU_MISC_CONTROL set for software acknowledge mode, the messages + * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 + * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) + * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from + * sw_ack_vector and payload_2) + * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software + * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload + * operation." + */ +struct bau_payload_queue_entry { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ + /* 64 bits, bytes 0-7 */ + + unsigned short sending_cpu; /* cpu that sent the message */ + /* 16 bits, bytes 8-9 */ + + unsigned short acknowledge_count; /* filled in by destination */ + /* 16 bits, bytes 10-11 */ + + unsigned short replied_to:1; /* sent as 0 by the source */ + /* 1 bit */ + unsigned short unused1:7; /* not currently using */ + /* 7 bits: byte 12) */ + + unsigned char unused2[2]; /* not currently using */ + /* bytes 13-14 */ + + unsigned char sw_ack_vector; /* filled in by the hardware */ + /* byte 15 (bits 127:120) */ + + unsigned char unused4[3]; /* not currently using bytes 17-19 */ + /* bytes 17-19 */ + + int number_of_cpus; /* filled in at destination */ + /* 32 bits, bytes 20-23 (aligned) */ + + unsigned char unused5[8]; /* not using */ + /* bytes 24-31 */ +}; + +/* + * one for every slot in the destination payload queue + */ +struct bau_msg_status { + struct bau_local_cpumask seen_by; /* map of cpu's */ +}; + +/* + * one for every slot in the destination software ack resources + */ +struct bau_sw_ack_status { + struct bau_payload_queue_entry *msg; /* associated message */ + int watcher; /* cpu monitoring, or -1 */ +}; + +/* + * one on every node and per-cpu; to locate the software tables + */ +struct bau_control { + struct bau_desc *descriptor_base; + struct bau_payload_queue_entry *bau_msg_head; + struct bau_payload_queue_entry *va_queue_first; + struct bau_payload_queue_entry *va_queue_last; + struct bau_msg_status *msg_statuses; + int *watching; /* pointer to array */ +}; + +/* + * This structure is allocated per_cpu for UV TLB shootdown statistics. + */ +struct ptc_stats { + unsigned long ptc_i; /* number of IPI-style flushes */ + unsigned long requestor; /* number of nodes this cpu sent to */ + unsigned long requestee; /* times cpu was remotely requested */ + unsigned long alltlb; /* times all tlb's on this cpu were flushed */ + unsigned long onetlb; /* times just one tlb on this cpu was flushed */ + unsigned long s_retry; /* retries on source side timeouts */ + unsigned long d_retry; /* retries on destination side timeouts */ + unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ + unsigned long dflush; /* cycles spent on destination side */ + unsigned long retriesok; /* successes on retries */ + unsigned long nomsg; /* interrupts with no message */ + unsigned long multmsg; /* interrupts with multiple messages */ + unsigned long ntargeted;/* nodes targeted */ +}; + +static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) +{ + return constant_test_bit(node, &dstp->bits[0]); +} +static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) +{ + __set_bit(node, &dstp->bits[0]); +} +static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) +{ + bitmap_zero(&dstp->bits[0], nbits); +} + +static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) +{ + bitmap_zero(&dstp->bits, nbits); +} + +#define cpubit_isset(cpu, bau_local_cpumask) \ + test_bit((cpu), (bau_local_cpumask).bits) + +extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern void uv_bau_message_intr1(void); +extern void uv_bau_timeout_intr1(void); + +#endif /* _ASM_X86_UV_UV_BAU_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h new file mode 100644 index 00000000000..c6ad93e315c --- /dev/null +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -0,0 +1,354 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV architectural definitions + * + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_HUB_H +#define _ASM_X86_UV_UV_HUB_H + +#include <linux/numa.h> +#include <linux/percpu.h> +#include <asm/types.h> +#include <asm/percpu.h> + + +/* + * Addressing Terminology + * + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. + * + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * + * + * Memory/UV-HUB Processor Socket Address Format: + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) + * + * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). + * The actual values are configuration dependent and are set at + * boot time. M & N values are set by the hardware/BIOS at boot. + * + * + * APICID format + * NOTE!!!!!! This is the current format of the APICID. However, code + * should assume that this will change in the future. Use functions + * in this file for all APICID bit manipulations and conversion. + * + * 1111110000000000 + * 5432109876543210 + * pppppppppplc0cch + * sssssssssss + * + * p = pnode bits + * l = socket number on board + * c = core + * h = hyperthread + * s = bits that are in the SOCKET_ID CSR + * + * Note: Processor only supports 12 bits in the APICID register. The ACPI + * tables hold all 16 bits. Software needs to be aware of this. + * + * Unless otherwise specified, all references to APICID refer to + * the FULL value contained in ACPI tables, not the subset in the + * processor APICID register. + */ + + +/* + * Maximum number of bricks in all partitions and in all coherency domains. + * This is the total number of bricks accessible in the numalink fabric. It + * includes all C & M bricks. Routers are NOT included. + * + * This value is also the value of the maximum number of non-router NASIDs + * in the numalink fabric. + * + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. + */ +#define UV_MAX_NUMALINK_BLADES 16384 + +/* + * Maximum number of C/Mbricks within a software SSI (hardware may support + * more). + */ +#define UV_MAX_SSI_BLADES 256 + +/* + * The largest possible NASID of a C or M brick (+ 2) + */ +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) + +/* + * The following defines attributes of the HUB chip. These attributes are + * frequently referenced and are kept in the per-cpu data areas of each cpu. + * They are kept together in a struct to minimize cache misses. + */ +struct uv_hub_info_s { + unsigned long global_mmr_base; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; + unsigned short coherency_domain_number; + unsigned short numa_blade_id; + unsigned char blade_processor_id; + unsigned char m_val; + unsigned char n_val; +}; +DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) + +/* + * Local & Global MMR space macros. + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) + */ +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) + +#define UV_LOCAL_MMR_BASE 0xf4000000UL +#define UV_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) +#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) +#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) + +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 + +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) + +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +#define UV_APIC_PNODE_SHIFT 6 + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} + + +/* + * Extract a PNODE from an APICID (full apicid, not processor subset) + */ +static inline int uv_apicid_to_pnode(int apicid) +{ + return (apicid >> UV_APIC_PNODE_SHIFT); +} + +/* + * Access global MMRs using the low memory MMR32 space. This region supports + * faster MMR access but not all MMRs are accessible in this space. + */ +static inline unsigned long *uv_global_mmr32_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR32_BASE | + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr32_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr32(int pnode, + unsigned long offset) +{ + return *uv_global_mmr32_address(pnode, offset); +} + +/* + * Access Global MMR space using the MMR space located at the top of physical + * memory. + */ +static inline unsigned long *uv_global_mmr64_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR64_BASE | + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr64_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr64(int pnode, + unsigned long offset) +{ + return *uv_global_mmr64_address(pnode, offset); +} + +/* + * Access hub local MMRs. Faster than using global space but only local MMRs + * are accessible. + */ +static inline unsigned long *uv_local_mmr_address(unsigned long offset) +{ + return __va(UV_LOCAL_MMR_BASE | offset); +} + +static inline unsigned long uv_read_local_mmr(unsigned long offset) +{ + return *uv_local_mmr_address(offset); +} + +static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) +{ + *uv_local_mmr_address(offset) = val; +} + +/* + * Structures and definitions for converting between cpu, node, pnode, and blade + * numbers. + */ +struct uv_blade_info { + unsigned short nr_possible_cpus; + unsigned short nr_online_cpus; + unsigned short pnode; +}; +extern struct uv_blade_info *uv_blade_info; +extern short *uv_node_to_blade; +extern short *uv_cpu_to_blade; +extern short uv_possible_blades; + +/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_blade_processor_id(void) +{ + return uv_hub_info->blade_processor_id; +} + +/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ +static inline int uv_numa_blade_id(void) +{ + return uv_hub_info->numa_blade_id; +} + +/* Convert a cpu number to the the UV blade number */ +static inline int uv_cpu_to_blade_id(int cpu) +{ + return uv_cpu_to_blade[cpu]; +} + +/* Convert linux node number to the UV blade number */ +static inline int uv_node_to_blade_id(int nid) +{ + return uv_node_to_blade[nid]; +} + +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) +{ + return uv_blade_info[bid].pnode; +} + +/* Determine the number of possible cpus on a blade */ +static inline int uv_blade_nr_possible_cpus(int bid) +{ + return uv_blade_info[bid].nr_possible_cpus; +} + +/* Determine the number of online cpus on a blade */ +static inline int uv_blade_nr_online_cpus(int bid) +{ + return uv_blade_info[bid].nr_online_cpus; +} + +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) +{ + return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode; +} + +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) +{ + return uv_blade_info[uv_node_to_blade_id(nid)].pnode; +} + +/* Maximum possible number of blades */ +static inline int uv_num_possible_blades(void) +{ + return uv_possible_blades; +} + +#endif /* _ASM_X86_UV_UV_HUB_H */ + diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h new file mode 100644 index 00000000000..9613c8c0b64 --- /dev/null +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -0,0 +1,36 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_IRQ_H +#define _ASM_X86_UV_UV_IRQ_H + +/* If a generic version of this structure gets defined, eliminate this one. */ +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; + +extern struct irq_chip uv_irq_chip; + +extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); +extern void arch_disable_uv_irq(int, unsigned long); + +extern int uv_setup_irq(char *, int, int, unsigned long); +extern void uv_teardown_irq(unsigned int, int, unsigned long); + +#endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h new file mode 100644 index 00000000000..dd627793a23 --- /dev/null +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -0,0 +1,1295 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV MMR definitions + * + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_X86_UV_UV_MMRS_H +#define _ASM_X86_UV_UV_MMRS_H + +#define UV_MMR_ENABLE (1UL << 63) + +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0438 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0 0x70000UL +#define UVH_EVENT_OCCURRED0_32 0x005e8 + +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +union uvh_event_occurred0_u { + unsigned long v; + struct uvh_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW, W1C */ + unsigned long gr0_hcerr : 1; /* RW, W1C */ + unsigned long gr1_hcerr : 1; /* RW, W1C */ + unsigned long lh_hcerr : 1; /* RW, W1C */ + unsigned long rh_hcerr : 1; /* RW, W1C */ + unsigned long xn_hcerr : 1; /* RW, W1C */ + unsigned long si_hcerr : 1; /* RW, W1C */ + unsigned long lb_aoerr0 : 1; /* RW, W1C */ + unsigned long gr0_aoerr0 : 1; /* RW, W1C */ + unsigned long gr1_aoerr0 : 1; /* RW, W1C */ + unsigned long lh_aoerr0 : 1; /* RW, W1C */ + unsigned long rh_aoerr0 : 1; /* RW, W1C */ + unsigned long xn_aoerr0 : 1; /* RW, W1C */ + unsigned long si_aoerr0 : 1; /* RW, W1C */ + unsigned long lb_aoerr1 : 1; /* RW, W1C */ + unsigned long gr0_aoerr1 : 1; /* RW, W1C */ + unsigned long gr1_aoerr1 : 1; /* RW, W1C */ + unsigned long lh_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_aoerr1 : 1; /* RW, W1C */ + unsigned long xn_aoerr1 : 1; /* RW, W1C */ + unsigned long si_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_vpi_int : 1; /* RW, W1C */ + unsigned long system_shutdown_int : 1; /* RW, W1C */ + unsigned long lb_irq_int_0 : 1; /* RW, W1C */ + unsigned long lb_irq_int_1 : 1; /* RW, W1C */ + unsigned long lb_irq_int_2 : 1; /* RW, W1C */ + unsigned long lb_irq_int_3 : 1; /* RW, W1C */ + unsigned long lb_irq_int_4 : 1; /* RW, W1C */ + unsigned long lb_irq_int_5 : 1; /* RW, W1C */ + unsigned long lb_irq_int_6 : 1; /* RW, W1C */ + unsigned long lb_irq_int_7 : 1; /* RW, W1C */ + unsigned long lb_irq_int_8 : 1; /* RW, W1C */ + unsigned long lb_irq_int_9 : 1; /* RW, W1C */ + unsigned long lb_irq_int_10 : 1; /* RW, W1C */ + unsigned long lb_irq_int_11 : 1; /* RW, W1C */ + unsigned long lb_irq_int_12 : 1; /* RW, W1C */ + unsigned long lb_irq_int_13 : 1; /* RW, W1C */ + unsigned long lb_irq_int_14 : 1; /* RW, W1C */ + unsigned long lb_irq_int_15 : 1; /* RW, W1C */ + unsigned long l1_nmi_int : 1; /* RW, W1C */ + unsigned long stop_clock : 1; /* RW, W1C */ + unsigned long asic_to_l1 : 1; /* RW, W1C */ + unsigned long l1_to_asic : 1; /* RW, W1C */ + unsigned long ltc_int : 1; /* RW, W1C */ + unsigned long la_seq_trigger : 1; /* RW, W1C */ + unsigned long ipi_int : 1; /* RW, W1C */ + unsigned long extio_int0 : 1; /* RW, W1C */ + unsigned long extio_int1 : 1; /* RW, W1C */ + unsigned long extio_int2 : 1; /* RW, W1C */ + unsigned long extio_int3 : 1; /* RW, W1C */ + unsigned long profile_int : 1; /* RW, W1C */ + unsigned long rtc0 : 1; /* RW, W1C */ + unsigned long rtc1 : 1; /* RW, W1C */ + unsigned long rtc2 : 1; /* RW, W1C */ + unsigned long rtc3 : 1; /* RW, W1C */ + unsigned long bau_data : 1; /* RW, W1C */ + unsigned long power_management_req : 1; /* RW, W1C */ + unsigned long rsvd_57_63 : 7; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 + +/* ========================================================================= */ +/* UVH_INT_CMPB */ +/* ========================================================================= */ +#define UVH_INT_CMPB 0x22080UL + +#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpb_u { + unsigned long v; + struct uvh_int_cmpb_s { + unsigned long real_time_cmpb : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPC */ +/* ========================================================================= */ +#define UVH_INT_CMPC 0x22100UL + +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpc_u { + unsigned long v; + struct uvh_int_cmpc_s { + unsigned long real_time_cmpc : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPD */ +/* ========================================================================= */ +#define UVH_INT_CMPD 0x22180UL + +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpd_u { + unsigned long v; + struct uvh_int_cmpd_s { + unsigned long real_time_cmpd : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_IPI_INT */ +/* ========================================================================= */ +#define UVH_IPI_INT 0x60500UL +#define UVH_IPI_INT_32 0x0348 + +#define UVH_IPI_INT_VECTOR_SHFT 0 +#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UVH_IPI_INT_DESTMODE_SHFT 11 +#define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UVH_IPI_INT_APIC_ID_SHFT 16 +#define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UVH_IPI_INT_SEND_SHFT 63 +#define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL + +union uvh_ipi_int_u { + unsigned long v; + struct uvh_ipi_int_s { + unsigned long vector_ : 8; /* RW */ + unsigned long delivery_mode : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long rsvd_12_15 : 4; /* */ + unsigned long apic_id : 32; /* RW */ + unsigned long rsvd_48_62 : 15; /* */ + unsigned long send : 1; /* WP */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ +/* ========================================================================= */ +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0 + +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +union uvh_lb_bau_intd_payload_queue_first_u { + unsigned long v; + struct uvh_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3: 4; /* */ + unsigned long address : 39; /* RW */ + unsigned long rsvd_43_48: 6; /* */ + unsigned long node_id : 14; /* RW */ + unsigned long rsvd_63 : 1; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ +/* ========================================================================= */ +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8 + +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +union uvh_lb_bau_intd_payload_queue_last_u { + unsigned long v; + struct uvh_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3: 4; /* */ + unsigned long address : 39; /* RW */ + unsigned long rsvd_43_63: 21; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ +/* ========================================================================= */ +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0 + +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +union uvh_lb_bau_intd_payload_queue_tail_u { + unsigned long v; + struct uvh_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3: 4; /* */ + unsigned long address : 39; /* RW */ + unsigned long rsvd_43_63: 21; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ +/* ========================================================================= */ +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68 + +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL +union uvh_lb_bau_intd_software_acknowledge_u { + unsigned long v; + struct uvh_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0 : 1; /* RW, W1C */ + unsigned long pending_1 : 1; /* RW, W1C */ + unsigned long pending_2 : 1; /* RW, W1C */ + unsigned long pending_3 : 1; /* RW, W1C */ + unsigned long pending_4 : 1; /* RW, W1C */ + unsigned long pending_5 : 1; /* RW, W1C */ + unsigned long pending_6 : 1; /* RW, W1C */ + unsigned long pending_7 : 1; /* RW, W1C */ + unsigned long timeout_0 : 1; /* RW, W1C */ + unsigned long timeout_1 : 1; /* RW, W1C */ + unsigned long timeout_2 : 1; /* RW, W1C */ + unsigned long timeout_3 : 1; /* RW, W1C */ + unsigned long timeout_4 : 1; /* RW, W1C */ + unsigned long timeout_5 : 1; /* RW, W1C */ + unsigned long timeout_6 : 1; /* RW, W1C */ + unsigned long timeout_7 : 1; /* RW, W1C */ + unsigned long rsvd_16_63: 48; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ +/* ========================================================================= */ +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 + +/* ========================================================================= */ +/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ +/* ========================================================================= */ +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8 + +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + +union uvh_lb_bau_sb_activation_control_u { + unsigned long v; + struct uvh_lb_bau_sb_activation_control_s { + unsigned long index : 6; /* RW */ + unsigned long rsvd_6_61: 56; /* */ + unsigned long push : 1; /* WP */ + unsigned long init : 1; /* WP */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ +/* ========================================================================= */ +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0 + +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + +union uvh_lb_bau_sb_activation_status_0_u { + unsigned long v; + struct uvh_lb_bau_sb_activation_status_0_s { + unsigned long status : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ +/* ========================================================================= */ +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8 + +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + +union uvh_lb_bau_sb_activation_status_1_u { + unsigned long v; + struct uvh_lb_bau_sb_activation_status_1_s { + unsigned long status : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ +/* ========================================================================= */ +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0 + +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +union uvh_lb_bau_sb_descriptor_base_u { + unsigned long v; + struct uvh_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11 : 12; /* */ + unsigned long page_address : 31; /* RW */ + unsigned long rsvd_43_48 : 6; /* */ + unsigned long node_id : 14; /* RW */ + unsigned long rsvd_63 : 1; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ +/* ========================================================================= */ +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL + +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL + +union uvh_lb_mcast_aoerr0_rpt_enable_u { + unsigned long v; + struct uvh_lb_mcast_aoerr0_rpt_enable_s { + unsigned long mcast_obese_msg : 1; /* RW */ + unsigned long mcast_data_sb_err : 1; /* RW */ + unsigned long mcast_nack_buff_parity : 1; /* RW */ + unsigned long mcast_timeout : 1; /* RW */ + unsigned long mcast_inactive_reply : 1; /* RW */ + unsigned long mcast_upgrade_error : 1; /* RW */ + unsigned long mcast_reg_count_underflow : 1; /* RW */ + unsigned long mcast_rep_obese_msg : 1; /* RW */ + unsigned long ucache_req_runt_msg : 1; /* RW */ + unsigned long ucache_req_obese_msg : 1; /* RW */ + unsigned long ucache_req_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_runt_msg : 1; /* RW */ + unsigned long ucache_rep_obese_msg : 1; /* RW */ + unsigned long ucache_rep_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_command_err : 1; /* RW */ + unsigned long ucache_pend_timeout : 1; /* RW */ + unsigned long macc_req_runt_msg : 1; /* RW */ + unsigned long macc_req_obese_msg : 1; /* RW */ + unsigned long macc_req_data_sb_err : 1; /* RW */ + unsigned long macc_rep_runt_msg : 1; /* RW */ + unsigned long macc_rep_obese_msg : 1; /* RW */ + unsigned long macc_rep_data_sb_err : 1; /* RW */ + unsigned long macc_amo_timeout : 1; /* RW */ + unsigned long macc_put_timeout : 1; /* RW */ + unsigned long macc_spurious_event : 1; /* RW */ + unsigned long ioh_destination_table_parity : 1; /* RW */ + unsigned long get_had_error_reply : 1; /* RW */ + unsigned long get_timeout : 1; /* RW */ + unsigned long lock_manager_had_error_reply : 1; /* RW */ + unsigned long put_had_error_reply : 1; /* RW */ + unsigned long put_timeout : 1; /* RW */ + unsigned long sb_activation_overrun : 1; /* RW */ + unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ + unsigned long completed_gb_activation_timeout : 1; /* RW */ + unsigned long descriptor_buffer_0_parity : 1; /* RW */ + unsigned long descriptor_buffer_1_parity : 1; /* RW */ + unsigned long socket_destination_table_parity : 1; /* RW */ + unsigned long bau_reply_payload_corruption : 1; /* RW */ + unsigned long io_port_destination_table_parity : 1; /* RW */ + unsigned long intd_soft_ack_timeout : 1; /* RW */ + unsigned long int_rep_obese_msg : 1; /* RW */ + unsigned long int_rep_command_err : 1; /* RW */ + unsigned long int_timeout : 1; /* RW */ + unsigned long rsvd_43_63 : 21; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_CONFIG 0x61000UL + +#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 +#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 +#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 +#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 +#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_local_int0_config_u { + unsigned long v; + struct uvh_local_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_ENABLE */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_ENABLE 0x65000UL + +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL + +union uvh_local_int0_enable_u { + unsigned long v; + struct uvh_local_int0_enable_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long lh_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long xn_hcerr : 1; /* RW */ + unsigned long si_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long lh_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long xn_aoerr0 : 1; /* RW */ + unsigned long si_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long lh_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long xn_aoerr1 : 1; /* RW */ + unsigned long si_aoerr1 : 1; /* RW */ + unsigned long rh_vpi_int : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long ltc_int : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long rsvd_45_63 : 19; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_NODE_ID */ +/* ========================================================================= */ +#define UVH_NODE_ID 0x0UL + +#define UVH_NODE_ID_FORCE1_SHFT 0 +#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVH_NODE_ID_REVISION_SHFT 28 +#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVH_NODE_ID_NODE_ID_SHFT 32 +#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 +#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL +#define UVH_NODE_ID_NI_PORT_SHFT 56 +#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +union uvh_node_id_u { + unsigned long v; + struct uvh_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47 : 1; /* */ + unsigned long nodes_per_bit : 7; /* RW */ + unsigned long rsvd_55 : 1; /* */ + unsigned long ni_port : 4; /* RO */ + unsigned long rsvd_60_63 : 4; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_NODE_PRESENT_TABLE */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UVH_NODE_PRESENT_TABLE_DEPTH 16 + +#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +union uvh_node_present_table_u { + unsigned long v; + struct uvh_node_present_table_s { + unsigned long nodes : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL + +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_cfg_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_cfg_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62: 17; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_gru_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_47: 2; /* */ + unsigned long gr4 : 1; /* RW */ + unsigned long rsvd_49_51: 3; /* */ + unsigned long n_gru : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_mmioh_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_mmioh_overlay_config_mmr_s { + unsigned long rsvd_0_29: 30; /* */ + unsigned long base : 16; /* RW */ + unsigned long m_io : 6; /* RW */ + unsigned long n_io : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_mmr_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long dual_hub : 1; /* RW */ + unsigned long rsvd_47_62: 16; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC */ +/* ========================================================================= */ +#define UVH_RTC 0x340000UL + +#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +union uvh_rtc_u { + unsigned long v; + struct uvh_rtc_s { + unsigned long real_time_clock : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC1_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC1_INT_CONFIG 0x615c0UL + +#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc1_int_config_u { + unsigned long v; + struct uvh_rtc1_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC2_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC2_INT_CONFIG 0x61600UL + +#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC2_INT_CONFIG_P_SHFT 13 +#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC2_INT_CONFIG_T_SHFT 15 +#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC2_INT_CONFIG_M_SHFT 16 +#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc2_int_config_u { + unsigned long v; + struct uvh_rtc2_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC3_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC3_INT_CONFIG 0x61640UL + +#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC3_INT_CONFIG_P_SHFT 13 +#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC3_INT_CONFIG_T_SHFT 15 +#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC3_INT_CONFIG_M_SHFT 16 +#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc3_int_config_u { + unsigned long v; + struct uvh_rtc3_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC_INC_RATIO */ +/* ========================================================================= */ +#define UVH_RTC_INC_RATIO 0x350000UL + +#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 +#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL +#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 +#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL + +union uvh_rtc_inc_ratio_u { + unsigned long v; + struct uvh_rtc_inc_ratio_s { + unsigned long fraction : 20; /* RW */ + unsigned long ratio : 3; /* RW */ + unsigned long rsvd_23_63: 41; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ADDR_MAP_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL + +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8 +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL + +union uvh_si_addr_map_config_u { + unsigned long v; + struct uvh_si_addr_map_config_s { + unsigned long m_skt : 6; /* RW */ + unsigned long rsvd_6_7: 2; /* */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_12_63: 52; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + + +#endif /* _ASM_X86_UV_UV_MMRS_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h new file mode 100644 index 00000000000..9064052b73d --- /dev/null +++ b/arch/x86/include/asm/vdso.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_VDSO_H +#define _ASM_X86_VDSO_H + +#ifdef CONFIG_X86_64 +extern const char VDSO64_PRELINK[]; + +/* + * Given a pointer to the vDSO image, find the pointer to VDSO64_name + * as that symbol is defined in the vDSO sources or linker script. + */ +#define VDSO64_SYMBOL(base, name) \ +({ \ + extern const char VDSO64_##name[]; \ + (void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \ +}) +#endif + +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT +extern const char VDSO32_PRELINK[]; + +/* + * Given a pointer to the vDSO image, find the pointer to VDSO32_name + * as that symbol is defined in the vDSO sources or linker script. + */ +#define VDSO32_SYMBOL(base, name) \ +({ \ + extern const char VDSO32_##name[]; \ + (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ +}) +#endif + +/* + * These symbols are defined with the addresses in the vsyscall page. + * See vsyscall-sigreturn.S. + */ +extern void __user __kernel_sigreturn; +extern void __user __kernel_rt_sigreturn; + +/* + * These symbols are defined by vdso32.S to mark the bounds + * of the ELF DSO images included therein. + */ +extern const char vdso32_int80_start, vdso32_int80_end; +extern const char vdso32_syscall_start, vdso32_syscall_end; +extern const char vdso32_sysenter_start, vdso32_sysenter_end; + +#endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h new file mode 100644 index 00000000000..c4b9dc2f67c --- /dev/null +++ b/arch/x86/include/asm/vga.h @@ -0,0 +1,20 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares <mj@ucw.cz> + */ + +#ifndef _ASM_X86_VGA_H +#define _ASM_X86_VGA_H + +/* + * On the PC, we can just recalculate addresses and then + * access the videoram directly without any black magic. + */ + +#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x, y) (*(y) = (x)) + +#endif /* _ASM_X86_VGA_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h new file mode 100644 index 00000000000..dc27a69e5d2 --- /dev/null +++ b/arch/x86/include/asm/vgtod.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_VGTOD_H +#define _ASM_X86_VGTOD_H + +#include <asm/vsyscall.h> +#include <linux/clocksource.h> + +struct vsyscall_gtod_data { + seqlock_t lock; + + /* open coded 'struct timespec' */ + time_t wall_time_sec; + u32 wall_time_nsec; + + int sysctl_enabled; + struct timezone sys_tz; + struct { /* extract of a clocksource struct */ + cycle_t (*vread)(void); + cycle_t cycle_last; + cycle_t mask; + u32 mult; + u32 shift; + } clock; + struct timespec wall_to_monotonic; +}; +extern struct vsyscall_gtod_data __vsyscall_gtod_data +__section_vsyscall_gtod_data; +extern struct vsyscall_gtod_data vsyscall_gtod_data; + +#endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h new file mode 100644 index 00000000000..53100f35361 --- /dev/null +++ b/arch/x86/include/asm/vic.h @@ -0,0 +1,61 @@ +/* Copyright (C) 1999,2001 + * + * Author: J.E.J.Bottomley@HansenPartnership.com + * + * Standard include definitions for the NCR Voyager Interrupt Controller */ + +/* The eight CPI vectors. To activate a CPI, you write a bit mask + * corresponding to the processor set to be interrupted into the + * relevant register. That set of CPUs will then be interrupted with + * the CPI */ +static const int VIC_CPI_Registers[] = + {0xFC00, 0xFC01, 0xFC08, 0xFC09, + 0xFC10, 0xFC11, 0xFC18, 0xFC19 }; + +#define VIC_PROC_WHO_AM_I 0xfc29 +# define QUAD_IDENTIFIER 0xC0 +# define EIGHT_SLOT_IDENTIFIER 0xE0 +#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72 +#define VIC_CPI_BASE_REGISTER 0xFC41 +#define VIC_PROCESSOR_ID 0xFC21 +# define VIC_CPU_MASQUERADE_ENABLE 0x8 + +#define VIC_CLAIM_REGISTER_0 0xFC38 +#define VIC_CLAIM_REGISTER_1 0xFC39 +#define VIC_REDIRECT_REGISTER_0 0xFC60 +#define VIC_REDIRECT_REGISTER_1 0xFC61 +#define VIC_PRIORITY_REGISTER 0xFC20 + +#define VIC_PRIMARY_MC_BASE 0xFC48 +#define VIC_SECONDARY_MC_BASE 0xFC49 + +#define QIC_PROCESSOR_ID 0xFC71 +# define QIC_CPUID_ENABLE 0x08 + +#define QIC_VIC_CPI_BASE_REGISTER 0xFC79 +#define QIC_CPI_BASE_REGISTER 0xFC7A + +#define QIC_MASK_REGISTER0 0xFC80 +/* NOTE: these are masked high, enabled low */ +# define QIC_PERF_TIMER 0x01 +# define QIC_LPE 0x02 +# define QIC_SYS_INT 0x04 +# define QIC_CMN_INT 0x08 +/* at the moment, just enable CMN_INT, disable SYS_INT */ +# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */)) +#define QIC_MASK_REGISTER1 0xFC81 +# define QIC_BOOT_CPI_MASK 0xFE +/* Enable CPI's 1-6 inclusive */ +# define QIC_CPI_ENABLE 0x81 + +#define QIC_INTERRUPT_CLEAR0 0xFC8A +#define QIC_INTERRUPT_CLEAR1 0xFC8B + +/* this is where we place the CPI vectors */ +#define VIC_DEFAULT_CPI_BASE 0xC0 +/* this is where we place the QIC CPI vectors */ +#define QIC_DEFAULT_CPI_BASE 0xD0 + +#define VIC_BOOT_INTERRUPT_MASK 0xfe + +extern void smp_vic_timer_interrupt(void); diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h new file mode 100644 index 00000000000..166adf61e77 --- /dev/null +++ b/arch/x86/include/asm/visws/cobalt.h @@ -0,0 +1,125 @@ +#ifndef _ASM_X86_VISWS_COBALT_H +#define _ASM_X86_VISWS_COBALT_H + +#include <asm/fixmap.h> + +/* + * Cobalt SGI Visual Workstation system ASIC + */ + +#define CO_CPU_NUM_PHYS 0x1e00 +#define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2) + +#define CO_CPU_MAX 4 + +#define CO_CPU_PHYS 0xc2000000 +#define CO_APIC_PHYS 0xc4000000 + +/* see set_fixmap() and asm/fixmap.h */ +#define CO_CPU_VADDR (fix_to_virt(FIX_CO_CPU)) +#define CO_APIC_VADDR (fix_to_virt(FIX_CO_APIC)) + +/* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */ +#define CO_CPU_REV 0x08 +#define CO_CPU_CTRL 0x10 +#define CO_CPU_STAT 0x20 +#define CO_CPU_TIMEVAL 0x30 + +/* CO_CPU_CTRL bits */ +#define CO_CTRL_TIMERUN 0x04 /* 0 == disabled */ +#define CO_CTRL_TIMEMASK 0x08 /* 0 == unmasked */ + +/* CO_CPU_STATUS bits */ +#define CO_STAT_TIMEINTR 0x02 /* (r) 1 == int pend, (w) 0 == clear */ + +/* CO_CPU_TIMEVAL value */ +#define CO_TIME_HZ 100000000 /* Cobalt core rate */ + +/* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */ +#define CO_APIC_HI(n) (((n) * 0x10) + 4) +#define CO_APIC_LO(n) ((n) * 0x10) +#define CO_APIC_ID 0x0ffc + +/* CO_APIC_ID bits */ +#define CO_APIC_ENABLE 0x00000100 + +/* CO_APIC_LO bits */ +#define CO_APIC_MASK 0x00010000 /* 0 = enabled */ +#define CO_APIC_LEVEL 0x00008000 /* 0 = edge */ + +/* + * Where things are physically wired to Cobalt + * #defines with no board _<type>_<rev>_ are common to all (thus far) + */ +#define CO_APIC_IDE0 4 +#define CO_APIC_IDE1 2 /* Only on 320 */ + +#define CO_APIC_8259 12 /* serial, floppy, par-l-l */ + +/* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */ +#define CO_APIC_PCIA_BASE0 0 /* and 1 */ /* slot 0, line 0 */ +#define CO_APIC_PCIA_BASE123 5 /* and 6 */ /* slot 0, line 1 */ + +#define CO_APIC_PIIX4_USB 7 /* this one is weird */ + +/* Lithium PCI Bridge B -- "the one with PIIX4" */ +#define CO_APIC_PCIB_BASE0 8 /* and 9-12 *//* slot 0, line 0 */ +#define CO_APIC_PCIB_BASE123 13 /* 14.15 */ /* slot 0, line 1 */ + +#define CO_APIC_VIDOUT0 16 +#define CO_APIC_VIDOUT1 17 +#define CO_APIC_VIDIN0 18 +#define CO_APIC_VIDIN1 19 + +#define CO_APIC_LI_AUDIO 22 + +#define CO_APIC_AS 24 +#define CO_APIC_RE 25 + +#define CO_APIC_CPU 28 /* Timer and Cache interrupt */ +#define CO_APIC_NMI 29 +#define CO_APIC_LAST CO_APIC_NMI + +/* + * This is how irqs are assigned on the Visual Workstation. + * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU). + * All other devices (including PCI) go to Cobalt and are irq's 16 on up. + */ +#define CO_IRQ_APIC0 16 /* irq of apic entry 0 */ +#define IS_CO_APIC(irq) ((irq) >= CO_IRQ_APIC0) +#define CO_IRQ(apic) (CO_IRQ_APIC0 + (apic)) /* apic ent to irq */ +#define CO_APIC(irq) ((irq) - CO_IRQ_APIC0) /* irq to apic ent */ +#define CO_IRQ_IDE0 14 /* knowledge of... */ +#define CO_IRQ_IDE1 15 /* ... ide driver defaults! */ +#define CO_IRQ_8259 CO_IRQ(CO_APIC_8259) + +#ifdef CONFIG_X86_VISWS_APIC +static inline void co_cpu_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(CO_CPU_VADDR+reg))=v; +} + +static inline unsigned long co_cpu_read(unsigned long reg) +{ + return *((volatile unsigned long *)(CO_CPU_VADDR+reg)); +} + +static inline void co_apic_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(CO_APIC_VADDR+reg))=v; +} + +static inline unsigned long co_apic_read(unsigned long reg) +{ + return *((volatile unsigned long *)(CO_APIC_VADDR+reg)); +} +#endif + +extern char visws_board_type; + +#define VISWS_320 0 +#define VISWS_540 1 + +extern char visws_board_rev; + +#endif /* _ASM_X86_VISWS_COBALT_H */ diff --git a/arch/x86/include/asm/visws/lithium.h b/arch/x86/include/asm/visws/lithium.h new file mode 100644 index 00000000000..a10d89bc127 --- /dev/null +++ b/arch/x86/include/asm/visws/lithium.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_VISWS_LITHIUM_H +#define _ASM_X86_VISWS_LITHIUM_H + +#include <asm/fixmap.h> + +/* + * Lithium is the SGI Visual Workstation I/O ASIC + */ + +#define LI_PCI_A_PHYS 0xfc000000 /* Enet is dev 3 */ +#define LI_PCI_B_PHYS 0xfd000000 /* PIIX4 is here */ + +/* see set_fixmap() and asm/fixmap.h */ +#define LI_PCIA_VADDR (fix_to_virt(FIX_LI_PCIA)) +#define LI_PCIB_VADDR (fix_to_virt(FIX_LI_PCIB)) + +/* Not a standard PCI? (not in linux/pci.h) */ +#define LI_PCI_BUSNUM 0x44 /* lo8: primary, hi8: sub */ +#define LI_PCI_INTEN 0x46 + +/* LI_PCI_INTENT bits */ +#define LI_INTA_0 0x0001 +#define LI_INTA_1 0x0002 +#define LI_INTA_2 0x0004 +#define LI_INTA_3 0x0008 +#define LI_INTA_4 0x0010 +#define LI_INTB 0x0020 +#define LI_INTC 0x0040 +#define LI_INTD 0x0080 + +/* More special purpose macros... */ +static inline void li_pcia_write16(unsigned long reg, unsigned short v) +{ + *((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v; +} + +static inline unsigned short li_pcia_read16(unsigned long reg) +{ + return *((volatile unsigned short *)(LI_PCIA_VADDR+reg)); +} + +static inline void li_pcib_write16(unsigned long reg, unsigned short v) +{ + *((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v; +} + +static inline unsigned short li_pcib_read16(unsigned long reg) +{ + return *((volatile unsigned short *)(LI_PCIB_VADDR+reg)); +} + +#endif /* _ASM_X86_VISWS_LITHIUM_H */ + diff --git a/arch/x86/include/asm/visws/piix4.h b/arch/x86/include/asm/visws/piix4.h new file mode 100644 index 00000000000..d0af4d338e7 --- /dev/null +++ b/arch/x86/include/asm/visws/piix4.h @@ -0,0 +1,107 @@ +#ifndef _ASM_X86_VISWS_PIIX4_H +#define _ASM_X86_VISWS_PIIX4_H + +/* + * PIIX4 as used on SGI Visual Workstations + */ + +#define PIIX_PM_START 0x0F80 + +#define SIO_GPIO_START 0x0FC0 + +#define SIO_PM_START 0x0FC8 + +#define PMBASE PIIX_PM_START +#define GPIREG0 (PMBASE+0x30) +#define GPIREG(x) (GPIREG0+((x)/8)) +#define GPIBIT(x) (1 << ((x)%8)) + +#define PIIX_GPI_BD_ID1 18 +#define PIIX_GPI_BD_ID2 19 +#define PIIX_GPI_BD_ID3 20 +#define PIIX_GPI_BD_ID4 21 +#define PIIX_GPI_BD_REG GPIREG(PIIX_GPI_BD_ID1) +#define PIIX_GPI_BD_MASK (GPIBIT(PIIX_GPI_BD_ID1) | \ + GPIBIT(PIIX_GPI_BD_ID2) | \ + GPIBIT(PIIX_GPI_BD_ID3) | \ + GPIBIT(PIIX_GPI_BD_ID4) ) + +#define PIIX_GPI_BD_SHIFT (PIIX_GPI_BD_ID1 % 8) + +#define SIO_INDEX 0x2e +#define SIO_DATA 0x2f + +#define SIO_DEV_SEL 0x7 +#define SIO_DEV_ENB 0x30 +#define SIO_DEV_MSB 0x60 +#define SIO_DEV_LSB 0x61 + +#define SIO_GP_DEV 0x7 + +#define SIO_GP_BASE SIO_GPIO_START +#define SIO_GP_MSB (SIO_GP_BASE>>8) +#define SIO_GP_LSB (SIO_GP_BASE&0xff) + +#define SIO_GP_DATA1 (SIO_GP_BASE+0) + +#define SIO_PM_DEV 0x8 + +#define SIO_PM_BASE SIO_PM_START +#define SIO_PM_MSB (SIO_PM_BASE>>8) +#define SIO_PM_LSB (SIO_PM_BASE&0xff) +#define SIO_PM_INDEX (SIO_PM_BASE+0) +#define SIO_PM_DATA (SIO_PM_BASE+1) + +#define SIO_PM_FER2 0x1 + +#define SIO_PM_GP_EN 0x80 + + + +/* + * This is the dev/reg where generating a config cycle will + * result in a PCI special cycle. + */ +#define SPECIAL_DEV 0xff +#define SPECIAL_REG 0x00 + +/* + * PIIX4 needs to see a special cycle with the following data + * to be convinced the processor has gone into the stop grant + * state. PIIX4 insists on seeing this before it will power + * down a system. + */ +#define PIIX_SPECIAL_STOP 0x00120002 + +#define PIIX4_RESET_PORT 0xcf9 +#define PIIX4_RESET_VAL 0x6 + +#define PMSTS_PORT 0xf80 // 2 bytes PM Status +#define PMEN_PORT 0xf82 // 2 bytes PM Enable +#define PMCNTRL_PORT 0xf84 // 2 bytes PM Control + +#define PM_SUSPEND_ENABLE 0x2000 // start sequence to suspend state + +/* + * PMSTS and PMEN I/O bit definitions. + * (Bits are the same in both registers) + */ +#define PM_STS_RSM (1<<15) // Resume Status +#define PM_STS_PWRBTNOR (1<<11) // Power Button Override +#define PM_STS_RTC (1<<10) // RTC status +#define PM_STS_PWRBTN (1<<8) // Power Button Pressed? +#define PM_STS_GBL (1<<5) // Global Status +#define PM_STS_BM (1<<4) // Bus Master Status +#define PM_STS_TMROF (1<<0) // Timer Overflow Status. + +/* + * Stop clock GPI register + */ +#define PIIX_GPIREG0 (0xf80 + 0x30) + +/* + * Stop clock GPI bit in GPIREG0 + */ +#define PIIX_GPI_STPCLK 0x4 // STPCLK signal routed back in + +#endif /* _ASM_X86_VISWS_PIIX4_H */ diff --git a/arch/x86/include/asm/visws/sgivw.h b/arch/x86/include/asm/visws/sgivw.h new file mode 100644 index 00000000000..5fbf63e1003 --- /dev/null +++ b/arch/x86/include/asm/visws/sgivw.h @@ -0,0 +1,5 @@ +/* + * Frame buffer position and size: + */ +extern unsigned long sgivwfb_mem_phys; +extern unsigned long sgivwfb_mem_size; diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h new file mode 100644 index 00000000000..f9303602fbc --- /dev/null +++ b/arch/x86/include/asm/vm86.h @@ -0,0 +1,208 @@ +#ifndef _ASM_X86_VM86_H +#define _ASM_X86_VM86_H + +/* + * I'm guessing at the VIF/VIP flag usage, but hope that this is how + * the Pentium uses them. Linux will return from vm86 mode when both + * VIF and VIP is set. + * + * On a Pentium, we could probably optimize the virtual flags directly + * in the eflags register instead of doing it "by hand" in vflags... + * + * Linus + */ + +#include <asm/processor-flags.h> + +#define BIOSSEG 0x0f000 + +#define CPU_086 0 +#define CPU_186 1 +#define CPU_286 2 +#define CPU_386 3 +#define CPU_486 4 +#define CPU_586 5 + +/* + * Return values for the 'vm86()' system call + */ +#define VM86_TYPE(retval) ((retval) & 0xff) +#define VM86_ARG(retval) ((retval) >> 8) + +#define VM86_SIGNAL 0 /* return due to signal */ +#define VM86_UNKNOWN 1 /* unhandled GP fault + - IO-instruction or similar */ +#define VM86_INTx 2 /* int3/int x instruction (ARG = x) */ +#define VM86_STI 3 /* sti/popf/iret instruction enabled + virtual interrupts */ + +/* + * Additional return values when invoking new vm86() + */ +#define VM86_PICRETURN 4 /* return due to pending PIC request */ +#define VM86_TRAP 6 /* return due to DOS-debugger request */ + +/* + * function codes when invoking new vm86() + */ +#define VM86_PLUS_INSTALL_CHECK 0 +#define VM86_ENTER 1 +#define VM86_ENTER_NO_BYPASS 2 +#define VM86_REQUEST_IRQ 3 +#define VM86_FREE_IRQ 4 +#define VM86_GET_IRQ_BITS 5 +#define VM86_GET_AND_RESET_IRQ 6 + +/* + * This is the stack-layout seen by the user space program when we have + * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout + * is 'kernel_vm86_regs' (see below). + */ + +struct vm86_regs { +/* + * normal regs, with special meaning for the segment descriptors.. + */ + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + long __null_ds; + long __null_es; + long __null_fs; + long __null_gs; + long orig_eax; + long eip; + unsigned short cs, __csh; + long eflags; + long esp; + unsigned short ss, __ssh; +/* + * these are specific to v86 mode: + */ + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; +}; + +struct revectored_struct { + unsigned long __map[8]; /* 256 bits */ +}; + +struct vm86_struct { + struct vm86_regs regs; + unsigned long flags; + unsigned long screen_bitmap; + unsigned long cpu_type; + struct revectored_struct int_revectored; + struct revectored_struct int21_revectored; +}; + +/* + * flags masks + */ +#define VM86_SCREEN_BITMAP 0x0001 + +struct vm86plus_info_struct { + unsigned long force_return_for_pic:1; + unsigned long vm86dbg_active:1; /* for debugger */ + unsigned long vm86dbg_TFpendig:1; /* for debugger */ + unsigned long unused:28; + unsigned long is_vm86pus:1; /* for vm86 internal use */ + unsigned char vm86dbg_intxxtab[32]; /* for debugger */ +}; +struct vm86plus_struct { + struct vm86_regs regs; + unsigned long flags; + unsigned long screen_bitmap; + unsigned long cpu_type; + struct revectored_struct int_revectored; + struct revectored_struct int21_revectored; + struct vm86plus_info_struct vm86plus; +}; + +#ifdef __KERNEL__ + +#include <asm/ptrace.h> + +/* + * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 + * mode - the main change is that the old segment descriptors aren't + * useful any more and are forced to be zero by the kernel (and the + * hardware when a trap occurs), and the real segment descriptors are + * at the end of the structure. Look at ptrace.h to see the "normal" + * setup. For user space layout see 'struct vm86_regs' above. + */ + +struct kernel_vm86_regs { +/* + * normal regs, with special meaning for the segment descriptors.. + */ + struct pt_regs pt; +/* + * these are specific to v86 mode: + */ + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; +}; + +struct kernel_vm86_struct { + struct kernel_vm86_regs regs; +/* + * the below part remains on the kernel stack while we are in VM86 mode. + * 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we + * get forced back from VM86, the CPU and "SAVE_ALL" will restore the above + * 'struct kernel_vm86_regs' with the then actual values. + * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct' + * in kernelspace, hence we need not reget the data from userspace. + */ +#define VM86_TSS_ESP0 flags + unsigned long flags; + unsigned long screen_bitmap; + unsigned long cpu_type; + struct revectored_struct int_revectored; + struct revectored_struct int21_revectored; + struct vm86plus_info_struct vm86plus; + struct pt_regs *regs32; /* here we save the pointer to the old regs */ +/* + * The below is not part of the structure, but the stack layout continues + * this way. In front of 'return-eip' may be some data, depending on + * compilation, so we don't rely on this and save the pointer to 'oldregs' + * in 'regs32' above. + * However, with GCC-2.7.2 and the current CFLAGS you see exactly this: + + long return-eip; from call to vm86() + struct pt_regs oldregs; user space registers as saved by syscall + */ +}; + +#ifdef CONFIG_VM86 + +void handle_vm86_fault(struct kernel_vm86_regs *, long); +int handle_vm86_trap(struct kernel_vm86_regs *, long, int); +struct pt_regs *save_v86_state(struct kernel_vm86_regs *); + +struct task_struct; +void release_vm86_irqs(struct task_struct *); + +#else + +#define handle_vm86_fault(a, b) +#define release_vm86_irqs(a) + +static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) +{ + return 0; +} + +#endif /* CONFIG_VM86 */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_VM86_H */ diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h new file mode 100644 index 00000000000..b7c0dea119f --- /dev/null +++ b/arch/x86/include/asm/vmi.h @@ -0,0 +1,263 @@ +/* + * VMI interface definition + * + * Copyright (C) 2005, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Maintained by: Zachary Amsden zach@vmware.com + * + */ +#include <linux/types.h> + +/* + *--------------------------------------------------------------------- + * + * VMI Option ROM API + * + *--------------------------------------------------------------------- + */ +#define VMI_SIGNATURE 0x696d5663 /* "cVmi" */ + +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_VMI 0x0801 + +/* + * We use two version numbers for compatibility, with the major + * number signifying interface breakages, and the minor number + * interface extensions. + */ +#define VMI_API_REV_MAJOR 3 +#define VMI_API_REV_MINOR 0 + +#define VMI_CALL_CPUID 0 +#define VMI_CALL_WRMSR 1 +#define VMI_CALL_RDMSR 2 +#define VMI_CALL_SetGDT 3 +#define VMI_CALL_SetLDT 4 +#define VMI_CALL_SetIDT 5 +#define VMI_CALL_SetTR 6 +#define VMI_CALL_GetGDT 7 +#define VMI_CALL_GetLDT 8 +#define VMI_CALL_GetIDT 9 +#define VMI_CALL_GetTR 10 +#define VMI_CALL_WriteGDTEntry 11 +#define VMI_CALL_WriteLDTEntry 12 +#define VMI_CALL_WriteIDTEntry 13 +#define VMI_CALL_UpdateKernelStack 14 +#define VMI_CALL_SetCR0 15 +#define VMI_CALL_SetCR2 16 +#define VMI_CALL_SetCR3 17 +#define VMI_CALL_SetCR4 18 +#define VMI_CALL_GetCR0 19 +#define VMI_CALL_GetCR2 20 +#define VMI_CALL_GetCR3 21 +#define VMI_CALL_GetCR4 22 +#define VMI_CALL_WBINVD 23 +#define VMI_CALL_SetDR 24 +#define VMI_CALL_GetDR 25 +#define VMI_CALL_RDPMC 26 +#define VMI_CALL_RDTSC 27 +#define VMI_CALL_CLTS 28 +#define VMI_CALL_EnableInterrupts 29 +#define VMI_CALL_DisableInterrupts 30 +#define VMI_CALL_GetInterruptMask 31 +#define VMI_CALL_SetInterruptMask 32 +#define VMI_CALL_IRET 33 +#define VMI_CALL_SYSEXIT 34 +#define VMI_CALL_Halt 35 +#define VMI_CALL_Reboot 36 +#define VMI_CALL_Shutdown 37 +#define VMI_CALL_SetPxE 38 +#define VMI_CALL_SetPxELong 39 +#define VMI_CALL_UpdatePxE 40 +#define VMI_CALL_UpdatePxELong 41 +#define VMI_CALL_MachineToPhysical 42 +#define VMI_CALL_PhysicalToMachine 43 +#define VMI_CALL_AllocatePage 44 +#define VMI_CALL_ReleasePage 45 +#define VMI_CALL_InvalPage 46 +#define VMI_CALL_FlushTLB 47 +#define VMI_CALL_SetLinearMapping 48 + +#define VMI_CALL_SetIOPLMask 61 +#define VMI_CALL_SetInitialAPState 62 +#define VMI_CALL_APICWrite 63 +#define VMI_CALL_APICRead 64 +#define VMI_CALL_IODelay 65 +#define VMI_CALL_SetLazyMode 73 + +/* + *--------------------------------------------------------------------- + * + * MMU operation flags + * + *--------------------------------------------------------------------- + */ + +/* Flags used by VMI_{Allocate|Release}Page call */ +#define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */ +#define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */ +#define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */ + + +/* Flags shared by Allocate|Release Page and PTE updates */ +#define VMI_PAGE_PT 0x01 +#define VMI_PAGE_PD 0x02 +#define VMI_PAGE_PDP 0x04 +#define VMI_PAGE_PML4 0x08 + +#define VMI_PAGE_NORMAL 0x00 /* for debugging */ + +/* Flags used by PTE updates */ +#define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */ +#define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */ +#define VMI_PAGE_VA_MASK 0xfffff000 + +#ifdef CONFIG_X86_PAE +#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED) +#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED) +#else +#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED) +#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED) +#endif + +/* Flags used by VMI_FlushTLB call */ +#define VMI_FLUSH_TLB 0x01 +#define VMI_FLUSH_GLOBAL 0x02 + +/* + *--------------------------------------------------------------------- + * + * VMI relocation definitions for ROM call get_reloc + * + *--------------------------------------------------------------------- + */ + +/* VMI Relocation types */ +#define VMI_RELOCATION_NONE 0 +#define VMI_RELOCATION_CALL_REL 1 +#define VMI_RELOCATION_JUMP_REL 2 +#define VMI_RELOCATION_NOP 3 + +#ifndef __ASSEMBLY__ +struct vmi_relocation_info { + unsigned char *eip; + unsigned char type; + unsigned char reserved[3]; +}; +#endif + + +/* + *--------------------------------------------------------------------- + * + * Generic ROM structures and definitions + * + *--------------------------------------------------------------------- + */ + +#ifndef __ASSEMBLY__ + +struct vrom_header { + u16 rom_signature; /* option ROM signature */ + u8 rom_length; /* ROM length in 512 byte chunks */ + u8 rom_entry[4]; /* 16-bit code entry point */ + u8 rom_pad0; /* 4-byte align pad */ + u32 vrom_signature; /* VROM identification signature */ + u8 api_version_min;/* Minor version of API */ + u8 api_version_maj;/* Major version of API */ + u8 jump_slots; /* Number of jump slots */ + u8 reserved1; /* Reserved for expansion */ + u32 virtual_top; /* Hypervisor virtual address start */ + u16 reserved2; /* Reserved for expansion */ + u16 license_offs; /* Offset to License string */ + u16 pci_header_offs;/* Offset to PCI OPROM header */ + u16 pnp_header_offs;/* Offset to PnP OPROM header */ + u32 rom_pad3; /* PnP reserverd / VMI reserved */ + u8 reserved[96]; /* Reserved for headers */ + char vmi_init[8]; /* VMI_Init jump point */ + char get_reloc[8]; /* VMI_GetRelocationInfo jump point */ +} __attribute__((packed)); + +struct pnp_header { + char sig[4]; + char rev; + char size; + short next; + short res; + long devID; + unsigned short manufacturer_offset; + unsigned short product_offset; +} __attribute__((packed)); + +struct pci_header { + char sig[4]; + short vendorID; + short deviceID; + short vpdData; + short size; + char rev; + char class; + char subclass; + char interface; + short chunks; + char rom_version_min; + char rom_version_maj; + char codetype; + char lastRom; + short reserved; +} __attribute__((packed)); + +/* Function prototypes for bootstrapping */ +extern void vmi_init(void); +extern void vmi_bringup(void); +extern void vmi_apply_boot_page_allocations(void); + +/* State needed to start an application processor in an SMP system. */ +struct vmi_ap_state { + u32 cr0; + u32 cr2; + u32 cr3; + u32 cr4; + + u64 efer; + + u32 eip; + u32 eflags; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esp; + u32 ebp; + u32 esi; + u32 edi; + u16 cs; + u16 ss; + u16 ds; + u16 es; + u16 fs; + u16 gs; + u16 ldtr; + + u16 gdtr_limit; + u32 gdtr_base; + u32 idtr_base; + u16 idtr_limit; +}; + +#endif diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h new file mode 100644 index 00000000000..c6e0bee93e3 --- /dev/null +++ b/arch/x86/include/asm/vmi_time.h @@ -0,0 +1,98 @@ +/* + * VMI Time wrappers + * + * Copyright (C) 2006, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to dhecht@vmware.com + * + */ + +#ifndef _ASM_X86_VMI_TIME_H +#define _ASM_X86_VMI_TIME_H + +/* + * Raw VMI call indices for timer functions + */ +#define VMI_CALL_GetCycleFrequency 66 +#define VMI_CALL_GetCycleCounter 67 +#define VMI_CALL_SetAlarm 68 +#define VMI_CALL_CancelAlarm 69 +#define VMI_CALL_GetWallclockTime 70 +#define VMI_CALL_WallclockUpdated 71 + +/* Cached VMI timer operations */ +extern struct vmi_timer_ops { + u64 (*get_cycle_frequency)(void); + u64 (*get_cycle_counter)(int); + u64 (*get_wallclock)(void); + int (*wallclock_updated)(void); + void (*set_alarm)(u32 flags, u64 expiry, u64 period); + void (*cancel_alarm)(u32 flags); +} vmi_timer_ops; + +/* Prototypes */ +extern void __init vmi_time_init(void); +extern unsigned long vmi_get_wallclock(void); +extern int vmi_set_wallclock(unsigned long now); +extern unsigned long long vmi_sched_clock(void); +extern unsigned long vmi_tsc_khz(void); + +#ifdef CONFIG_X86_LOCAL_APIC +extern void __devinit vmi_time_bsp_init(void); +extern void __devinit vmi_time_ap_init(void); +#endif + +/* + * When run under a hypervisor, a vcpu is always in one of three states: + * running, halted, or ready. The vcpu is in the 'running' state if it + * is executing. When the vcpu executes the halt interface, the vcpu + * enters the 'halted' state and remains halted until there is some work + * pending for the vcpu (e.g. an alarm expires, host I/O completes on + * behalf of virtual I/O). At this point, the vcpu enters the 'ready' + * state (waiting for the hypervisor to reschedule it). Finally, at any + * time when the vcpu is not in the 'running' state nor the 'halted' + * state, it is in the 'ready' state. + * + * Real time is advances while the vcpu is 'running', 'ready', or + * 'halted'. Stolen time is the time in which the vcpu is in the + * 'ready' state. Available time is the remaining time -- the vcpu is + * either 'running' or 'halted'. + * + * All three views of time are accessible through the VMI cycle + * counters. + */ + +/* The cycle counters. */ +#define VMI_CYCLES_REAL 0 +#define VMI_CYCLES_AVAILABLE 1 +#define VMI_CYCLES_STOLEN 2 + +/* The alarm interface 'flags' bits */ +#define VMI_ALARM_COUNTERS 2 + +#define VMI_ALARM_COUNTER_MASK 0x000000ff + +#define VMI_ALARM_WIRED_IRQ0 0x00000000 +#define VMI_ALARM_WIRED_LVTT 0x00010000 + +#define VMI_ALARM_IS_ONESHOT 0x00000000 +#define VMI_ALARM_IS_PERIODIC 0x00000100 + +#define CONFIG_VMI_ALARM_HZ 100 + +#endif /* _ASM_X86_VMI_TIME_H */ diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h new file mode 100644 index 00000000000..9c811d2e6f9 --- /dev/null +++ b/arch/x86/include/asm/voyager.h @@ -0,0 +1,528 @@ +/* Copyright (C) 1999,2001 + * + * Author: J.E.J.Bottomley@HansenPartnership.com + * + * Standard include definitions for the NCR Voyager system */ + +#undef VOYAGER_DEBUG +#undef VOYAGER_CAT_DEBUG + +#ifdef VOYAGER_DEBUG +#define VDEBUG(x) printk x +#else +#define VDEBUG(x) +#endif + +/* There are three levels of voyager machine: 3,4 and 5. The rule is + * if it's less than 3435 it's a Level 3 except for a 3360 which is + * a level 4. A 3435 or above is a Level 5 */ +#define VOYAGER_LEVEL5_AND_ABOVE 0x3435 +#define VOYAGER_LEVEL4 0x3360 + +/* The L4 DINO ASIC */ +#define VOYAGER_DINO 0x43 + +/* voyager ports in standard I/O space */ +#define VOYAGER_MC_SETUP 0x96 + + +#define VOYAGER_CAT_CONFIG_PORT 0x97 +# define VOYAGER_CAT_DESELECT 0xff +#define VOYAGER_SSPB_RELOCATION_PORT 0x98 + +/* Valid CAT controller commands */ +/* start instruction register cycle */ +#define VOYAGER_CAT_IRCYC 0x01 +/* start data register cycle */ +#define VOYAGER_CAT_DRCYC 0x02 +/* move to execute state */ +#define VOYAGER_CAT_RUN 0x0F +/* end operation */ +#define VOYAGER_CAT_END 0x80 +/* hold in idle state */ +#define VOYAGER_CAT_HOLD 0x90 +/* single step an "intest" vector */ +#define VOYAGER_CAT_STEP 0xE0 +/* return cat controller to CLEMSON mode */ +#define VOYAGER_CAT_CLEMSON 0xFF + +/* the default cat command header */ +#define VOYAGER_CAT_HEADER 0x7F + +/* the range of possible CAT module ids in the system */ +#define VOYAGER_MIN_MODULE 0x10 +#define VOYAGER_MAX_MODULE 0x1f + +/* The voyager registers per asic */ +#define VOYAGER_ASIC_ID_REG 0x00 +#define VOYAGER_ASIC_TYPE_REG 0x01 +/* the sub address registers can be made auto incrementing on reads */ +#define VOYAGER_AUTO_INC_REG 0x02 +# define VOYAGER_AUTO_INC 0x04 +# define VOYAGER_NO_AUTO_INC 0xfb +#define VOYAGER_SUBADDRDATA 0x03 +#define VOYAGER_SCANPATH 0x05 +# define VOYAGER_CONNECT_ASIC 0x01 +# define VOYAGER_DISCONNECT_ASIC 0xfe +#define VOYAGER_SUBADDRLO 0x06 +#define VOYAGER_SUBADDRHI 0x07 +#define VOYAGER_SUBMODSELECT 0x08 +#define VOYAGER_SUBMODPRESENT 0x09 + +#define VOYAGER_SUBADDR_LO 0xff +#define VOYAGER_SUBADDR_HI 0xffff + +/* the maximum size of a scan path -- used to form instructions */ +#define VOYAGER_MAX_SCAN_PATH 0x100 +/* the biggest possible register size (in bytes) */ +#define VOYAGER_MAX_REG_SIZE 4 + +/* Total number of possible modules (including submodules) */ +#define VOYAGER_MAX_MODULES 16 +/* Largest number of asics per module */ +#define VOYAGER_MAX_ASICS_PER_MODULE 7 + +/* the CAT asic of each module is always the first one */ +#define VOYAGER_CAT_ID 0 +#define VOYAGER_PSI 0x1a + +/* voyager instruction operations and registers */ +#define VOYAGER_READ_CONFIG 0x1 +#define VOYAGER_WRITE_CONFIG 0x2 +#define VOYAGER_BYPASS 0xff + +typedef struct voyager_asic { + __u8 asic_addr; /* ASIC address; Level 4 */ + __u8 asic_type; /* ASIC type */ + __u8 asic_id; /* ASIC id */ + __u8 jtag_id[4]; /* JTAG id */ + __u8 asic_location; /* Location within scan path; start w/ 0 */ + __u8 bit_location; /* Location within bit stream; start w/ 0 */ + __u8 ireg_length; /* Instruction register length */ + __u16 subaddr; /* Amount of sub address space */ + struct voyager_asic *next; /* Next asic in linked list */ +} voyager_asic_t; + +typedef struct voyager_module { + __u8 module_addr; /* Module address */ + __u8 scan_path_connected; /* Scan path connected */ + __u16 ee_size; /* Size of the EEPROM */ + __u16 num_asics; /* Number of Asics */ + __u16 inst_bits; /* Instruction bits in the scan path */ + __u16 largest_reg; /* Largest register in the scan path */ + __u16 smallest_reg; /* Smallest register in the scan path */ + voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */ + struct voyager_module *submodule; /* Submodule pointer */ + struct voyager_module *next; /* Next module in linked list */ +} voyager_module_t; + +typedef struct voyager_eeprom_hdr { + __u8 module_id[4]; + __u8 version_id; + __u8 config_id; + __u16 boundry_id; /* boundary scan id */ + __u16 ee_size; /* size of EEPROM */ + __u8 assembly[11]; /* assembly # */ + __u8 assembly_rev; /* assembly rev */ + __u8 tracer[4]; /* tracer number */ + __u16 assembly_cksum; /* asm checksum */ + __u16 power_consump; /* pwr requirements */ + __u16 num_asics; /* number of asics */ + __u16 bist_time; /* min. bist time */ + __u16 err_log_offset; /* error log offset */ + __u16 scan_path_offset;/* scan path offset */ + __u16 cct_offset; + __u16 log_length; /* length of err log */ + __u16 xsum_end; /* offset to end of + checksum */ + __u8 reserved[4]; + __u8 sflag; /* starting sentinal */ + __u8 part_number[13]; /* prom part number */ + __u8 version[10]; /* version number */ + __u8 signature[8]; + __u16 eeprom_chksum; + __u32 data_stamp_offset; + __u8 eflag ; /* ending sentinal */ +} __attribute__((packed)) voyager_eprom_hdr_t; + + + +#define VOYAGER_EPROM_SIZE_OFFSET \ + ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size))) +#define VOYAGER_XSUM_END_OFFSET 0x2a + +/* the following three definitions are for internal table layouts + * in the module EPROMs. We really only care about the IDs and + * offsets */ +typedef struct voyager_sp_table { + __u8 asic_id; + __u8 bypass_flag; + __u16 asic_data_offset; + __u16 config_data_offset; +} __attribute__((packed)) voyager_sp_table_t; + +typedef struct voyager_jtag_table { + __u8 icode[4]; + __u8 runbist[4]; + __u8 intest[4]; + __u8 samp_preld[4]; + __u8 ireg_len; +} __attribute__((packed)) voyager_jtt_t; + +typedef struct voyager_asic_data_table { + __u8 jtag_id[4]; + __u16 length_bsr; + __u16 length_bist_reg; + __u32 bist_clk; + __u16 subaddr_bits; + __u16 seed_bits; + __u16 sig_bits; + __u16 jtag_offset; +} __attribute__((packed)) voyager_at_t; + +/* Voyager Interrupt Controller (VIC) registers */ + +/* Base to add to Cross Processor Interrupts (CPIs) when triggering + * the CPU IRQ line */ +/* register defines for the WCBICs (one per processor) */ +#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */ +#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */ +#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */ +#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */ +#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */ +#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */ +#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */ +#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */ + + +/* top of memory registers */ +#define VOYAGER_WCBIC_TOM_L 0x4 +#define VOYAGER_WCBIC_TOM_H 0x5 + +/* register defines for Voyager Memory Contol (VMC) + * these are present on L4 machines only */ +#define VOYAGER_VMC1 0x81 +#define VOYAGER_VMC2 0x91 +#define VOYAGER_VMC3 0xa1 +#define VOYAGER_VMC4 0xb1 + +/* VMC Ports */ +#define VOYAGER_VMC_MEMORY_SETUP 0x9 +# define VMC_Interleaving 0x01 +# define VMC_4Way 0x02 +# define VMC_EvenCacheLines 0x04 +# define VMC_HighLine 0x08 +# define VMC_Start0_Enable 0x20 +# define VMC_Start1_Enable 0x40 +# define VMC_Vremap 0x80 +#define VOYAGER_VMC_BANK_DENSITY 0xa +# define VMC_BANK_EMPTY 0 +# define VMC_BANK_4MB 1 +# define VMC_BANK_16MB 2 +# define VMC_BANK_64MB 3 +# define VMC_BANK0_MASK 0x03 +# define VMC_BANK1_MASK 0x0C +# define VMC_BANK2_MASK 0x30 +# define VMC_BANK3_MASK 0xC0 + +/* Magellan Memory Controller (MMC) defines - present on L5 */ +#define VOYAGER_MMC_ASIC_ID 1 +/* the two memory modules corresponding to memory cards in the system */ +#define VOYAGER_MMC_MEMORY0_MODULE 0x14 +#define VOYAGER_MMC_MEMORY1_MODULE 0x15 +/* the Magellan Memory Address (MMA) defines */ +#define VOYAGER_MMA_ASIC_ID 2 + +/* Submodule number for the Quad Baseboard */ +#define VOYAGER_QUAD_BASEBOARD 1 + +/* ASIC defines for the Quad Baseboard */ +#define VOYAGER_QUAD_QDATA0 1 +#define VOYAGER_QUAD_QDATA1 2 +#define VOYAGER_QUAD_QABC 3 + +/* Useful areas in extended CMOS */ +#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a +#define VOYAGER_MEMORY_CLICKMAP 0xa23 +#define VOYAGER_DUMP_LOCATION 0xb1a + +/* SUS In Control bit - used to tell SUS that we don't need to be + * babysat anymore */ +#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff +# define VOYAGER_IN_CONTROL_FLAG 0x80 + +/* Voyager PSI defines */ +#define VOYAGER_PSI_STATUS_REG 0x08 +# define PSI_DC_FAIL 0x01 +# define PSI_MON 0x02 +# define PSI_FAULT 0x04 +# define PSI_ALARM 0x08 +# define PSI_CURRENT 0x10 +# define PSI_DVM 0x20 +# define PSI_PSCFAULT 0x40 +# define PSI_STAT_CHG 0x80 + +#define VOYAGER_PSI_SUPPLY_REG 0x8000 + /* read */ +# define PSI_FAIL_DC 0x01 +# define PSI_FAIL_AC 0x02 +# define PSI_MON_INT 0x04 +# define PSI_SWITCH_OFF 0x08 +# define PSI_HX_OFF 0x10 +# define PSI_SECURITY 0x20 +# define PSI_CMOS_BATT_LOW 0x40 +# define PSI_CMOS_BATT_FAIL 0x80 + /* write */ +# define PSI_CLR_SWITCH_OFF 0x13 +# define PSI_CLR_HX_OFF 0x14 +# define PSI_CLR_CMOS_BATT_FAIL 0x17 + +#define VOYAGER_PSI_MASK 0x8001 +# define PSI_MASK_MASK 0x10 + +#define VOYAGER_PSI_AC_FAIL_REG 0x8004 +#define AC_FAIL_STAT_CHANGE 0x80 + +#define VOYAGER_PSI_GENERAL_REG 0x8007 + /* read */ +# define PSI_SWITCH_ON 0x01 +# define PSI_SWITCH_ENABLED 0x02 +# define PSI_ALARM_ENABLED 0x08 +# define PSI_SECURE_ENABLED 0x10 +# define PSI_COLD_RESET 0x20 +# define PSI_COLD_START 0x80 + /* write */ +# define PSI_POWER_DOWN 0x10 +# define PSI_SWITCH_DISABLE 0x01 +# define PSI_SWITCH_ENABLE 0x11 +# define PSI_CLEAR 0x12 +# define PSI_ALARM_DISABLE 0x03 +# define PSI_ALARM_ENABLE 0x13 +# define PSI_CLEAR_COLD_RESET 0x05 +# define PSI_SET_COLD_RESET 0x15 +# define PSI_CLEAR_COLD_START 0x07 +# define PSI_SET_COLD_START 0x17 + + + +struct voyager_bios_info { + __u8 len; + __u8 major; + __u8 minor; + __u8 debug; + __u8 num_classes; + __u8 class_1; + __u8 class_2; +}; + +/* The following structures and definitions are for the Kernel/SUS + * interface these are needed to find out how SUS initialised any Quad + * boards in the system */ + +#define NUMBER_OF_MC_BUSSES 2 +#define SLOTS_PER_MC_BUS 8 +#define MAX_CPUS 16 /* 16 way CPU system */ +#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */ +#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */ +#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */ +#define NUMBER_OF_POS_REGS 8 + +typedef struct { + __u8 MC_Slot; + __u8 POS_Values[NUMBER_OF_POS_REGS]; +} __attribute__((packed)) MC_SlotInformation_t; + +struct QuadDescription { + __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields + * will be zero except for slot */ + __u8 StructureVersion; + __u32 CPI_BaseAddress; + __u32 LARC_BankSize; + __u32 LocalMemoryStateBits; + __u8 Slot; /* Processor slots 1 - 4 */ +} __attribute__((packed)); + +struct ProcBoardInfo { + __u8 Type; + __u8 StructureVersion; + __u8 NumberOfBoards; + struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS]; +} __attribute__((packed)); + +struct CacheDescription { + __u8 Level; + __u32 TotalSize; + __u16 LineSize; + __u8 Associativity; + __u8 CacheType; + __u8 WriteType; + __u8 Number_CPUs_SharedBy; + __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS]; + +} __attribute__((packed)); + +struct CPU_Description { + __u8 CPU_HardwareId; + char *FRU_String; + __u8 NumberOfCacheLevels; + struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS]; +} __attribute__((packed)); + +struct CPU_Info { + __u8 Type; + __u8 StructureVersion; + __u8 NumberOf_CPUs; + struct CPU_Description CPU_Data[MAX_CPUS]; +} __attribute__((packed)); + + +/* + * This structure will be used by SUS and the OS. + * The assumption about this structure is that no blank space is + * packed in it by our friend the compiler. + */ +typedef struct { + __u8 Mailbox_SUS; /* Written to by SUS to give + commands/response to the OS */ + __u8 Mailbox_OS; /* Written to by the OS to give + commands/response to SUS */ + __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the + interface SUS supports */ + __u8 OS_MailboxVersion; /* Tells SUS which iteration of the + interface the OS supports */ + __u32 OS_Flags; /* Flags set by the OS as info for + SUS */ + __u32 SUS_Flags; /* Flags set by SUS as info + for the OS */ + __u32 WatchDogPeriod; /* Watchdog period (in seconds) which + the DP uses to see if the OS + is dead */ + __u32 WatchDogCount; /* Updated by the OS on every tic. */ + __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS + where to stuff the SUS error log + on a dump */ + MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS]; + /* Storage for MCA POS data */ + /* All new SECOND_PASS_INTERFACE fields added from this point */ + struct ProcBoardInfo *BoardData; + struct CPU_Info *CPU_Data; + /* All new fields must be added from this point */ +} Voyager_KernelSUS_Mbox_t; + +/* structure for finding the right memory address to send a QIC CPI to */ +struct voyager_qic_cpi { + /* Each cache line (32 bytes) can trigger a cpi. The cpi + * read/write may occur anywhere in the cache line---pick the + * middle to be safe */ + struct { + __u32 pad1[3]; + __u32 cpi; + __u32 pad2[4]; + } qic_cpi[8]; +}; + +struct voyager_status { + __u32 power_fail:1; + __u32 switch_off:1; + __u32 request_from_kernel:1; +}; + +struct voyager_psi_regs { + __u8 cat_id; + __u8 cat_dev; + __u8 cat_control; + __u8 subaddr; + __u8 dummy4; + __u8 checkbit; + __u8 subaddr_low; + __u8 subaddr_high; + __u8 intstatus; + __u8 stat1; + __u8 stat3; + __u8 fault; + __u8 tms; + __u8 gen; + __u8 sysconf; + __u8 dummy15; +}; + +struct voyager_psi_subregs { + __u8 supply; + __u8 mask; + __u8 present; + __u8 DCfail; + __u8 ACfail; + __u8 fail; + __u8 UPSfail; + __u8 genstatus; +}; + +struct voyager_psi { + struct voyager_psi_regs regs; + struct voyager_psi_subregs subregs; +}; + +struct voyager_SUS { +#define VOYAGER_DUMP_BUTTON_NMI 0x1 +#define VOYAGER_SUS_VALID 0x2 +#define VOYAGER_SYSINT_COMPLETE 0x3 + __u8 SUS_mbox; +#define VOYAGER_NO_COMMAND 0x0 +#define VOYAGER_IGNORE_DUMP 0x1 +#define VOYAGER_DO_DUMP 0x2 +#define VOYAGER_SYSINT_HANDSHAKE 0x3 +#define VOYAGER_DO_MEM_DUMP 0x4 +#define VOYAGER_SYSINT_WAS_RECOVERED 0x5 + __u8 kernel_mbox; +#define VOYAGER_MAILBOX_VERSION 0x10 + __u8 SUS_version; + __u8 kernel_version; +#define VOYAGER_OS_HAS_SYSINT 0x1 +#define VOYAGER_OS_IN_PROGRESS 0x2 +#define VOYAGER_UPDATING_WDPERIOD 0x4 + __u32 kernel_flags; +#define VOYAGER_SUS_BOOTING 0x1 +#define VOYAGER_SUS_IN_PROGRESS 0x2 + __u32 SUS_flags; + __u32 watchdog_period; + __u32 watchdog_count; + __u32 SUS_errorlog; + /* lots of system configuration stuff under here */ +}; + +/* Variables exported by voyager_smp */ +extern __u32 voyager_extended_vic_processors; +extern __u32 voyager_allowed_boot_processors; +extern __u32 voyager_quad_processors; +extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS]; +extern struct voyager_SUS *voyager_SUS; + +/* variables exported always */ +extern struct task_struct *voyager_thread; +extern int voyager_level; +extern struct voyager_status voyager_status; + +/* functions exported by the voyager and voyager_smp modules */ +extern int voyager_cat_readb(__u8 module, __u8 asic, int reg); +extern void voyager_cat_init(void); +extern void voyager_detect(struct voyager_bios_info *); +extern void voyager_trap_init(void); +extern void voyager_setup_irqs(void); +extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length); +extern void voyager_smp_intr_init(void); +extern __u8 voyager_extended_cmos_read(__u16 cmos_address); +extern void voyager_smp_dump(void); +extern void voyager_timer_interrupt(void); +extern void smp_local_timer_interrupt(void); +extern void voyager_power_off(void); +extern void smp_voyager_power_off(void *dummy); +extern void voyager_restart(void); +extern void voyager_cat_power_off(void); +extern void voyager_cat_do_common_interrupt(void); +extern void voyager_handle_nmi(void); +/* Commands for the following are */ +#define VOYAGER_PSI_READ 0 +#define VOYAGER_PSI_WRITE 1 +#define VOYAGER_PSI_SUBREAD 2 +#define VOYAGER_PSI_SUBWRITE 3 +extern void voyager_cat_psi(__u8, __u16, __u8 *); diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h new file mode 100644 index 00000000000..d0983d255fb --- /dev/null +++ b/arch/x86/include/asm/vsyscall.h @@ -0,0 +1,44 @@ +#ifndef _ASM_X86_VSYSCALL_H +#define _ASM_X86_VSYSCALL_H + +enum vsyscall_num { + __NR_vgettimeofday, + __NR_vtime, + __NR_vgetcpu, +}; + +#define VSYSCALL_START (-10UL << 20) +#define VSYSCALL_SIZE 1024 +#define VSYSCALL_END (-2UL << 20) +#define VSYSCALL_MAPPED_PAGES 1 +#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) + +#ifdef __KERNEL__ +#include <linux/seqlock.h> + +#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) +#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) + +/* Definitions for CONFIG_GENERIC_TIME definitions */ +#define __section_vsyscall_gtod_data __attribute__ \ + ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) +#define __section_vsyscall_clock __attribute__ \ + ((unused, __section__ (".vsyscall_clock"),aligned(16))) +#define __vsyscall_fn \ + __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace + +#define VGETCPU_RDTSCP 1 +#define VGETCPU_LSL 2 + +extern int __vgetcpu_mode; +extern volatile unsigned long __jiffies; + +/* kernel space (writeable) */ +extern int vgetcpu_mode; +extern struct timezone sys_tz; + +extern void map_vsyscall(void); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h new file mode 100644 index 00000000000..f2cba4e79a2 --- /dev/null +++ b/arch/x86/include/asm/xcr.h @@ -0,0 +1,49 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * asm-x86/xcr.h + * + * Definitions for the eXtended Control Register instructions + */ + +#ifndef _ASM_X86_XCR_H +#define _ASM_X86_XCR_H + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +#ifdef __KERNEL__ +# ifndef __ASSEMBLY__ + +#include <linux/types.h> + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + +# endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_XCR_H */ diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h new file mode 100644 index 00000000000..19144184983 --- /dev/null +++ b/arch/x86/include/asm/xen/events.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_XEN_EVENTS_H +#define _ASM_X86_XEN_EVENTS_H + +enum ipi_vector { + XEN_RESCHEDULE_VECTOR, + XEN_CALL_FUNCTION_VECTOR, + XEN_CALL_FUNCTION_SINGLE_VECTOR, + XEN_SPIN_UNLOCK_VECTOR, + + XEN_NR_IPIS, +}; + +static inline int xen_irqs_disabled(struct pt_regs *regs) +{ + return raw_irqs_disabled_flags(regs->flags); +} + +static inline void xen_do_IRQ(int irq, struct pt_regs *regs) +{ + regs->orig_ax = ~irq; + do_IRQ(regs); +} + +#endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/include/asm/xen/grant_table.h b/arch/x86/include/asm/xen/grant_table.h new file mode 100644 index 00000000000..fdbbb45767a --- /dev/null +++ b/arch/x86/include/asm/xen/grant_table.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_XEN_GRANT_TABLE_H +#define _ASM_X86_XEN_GRANT_TABLE_H + +#define xen_alloc_vm_area(size) alloc_vm_area(size) +#define xen_free_vm_area(area) free_vm_area(area) + +#endif /* _ASM_X86_XEN_GRANT_TABLE_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h new file mode 100644 index 00000000000..3f6000d95fe --- /dev/null +++ b/arch/x86/include/asm/xen/hypercall.h @@ -0,0 +1,527 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_X86_XEN_HYPERCALL_H +#define _ASM_X86_XEN_HYPERCALL_H + +#include <linux/errno.h> +#include <linux/string.h> + +#include <xen/interface/xen.h> +#include <xen/interface/sched.h> +#include <xen/interface/physdev.h> + +/* + * The hypercall asms have to meet several constraints: + * - Work on 32- and 64-bit. + * The two architectures put their arguments in different sets of + * registers. + * + * - Work around asm syntax quirks + * It isn't possible to specify one of the rNN registers in a + * constraint, so we use explicit register variables to get the + * args into the right place. + * + * - Mark all registers as potentially clobbered + * Even unused parameters can be clobbered by the hypervisor, so we + * need to make sure gcc knows it. + * + * - Avoid compiler bugs. + * This is the tricky part. Because x86_32 has such a constrained + * register set, gcc versions below 4.3 have trouble generating + * code when all the arg registers and memory are trashed by the + * asm. There are syntactically simpler ways of achieving the + * semantics below, but they cause the compiler to crash. + * + * The only combination I found which works is: + * - assign the __argX variables first + * - list all actually used parameters as "+r" (__argX) + * - clobber the rest + * + * The result certainly isn't pretty, and it really shows up cpp's + * weakness as as macro language. Sorry. (But let's just give thanks + * there aren't more than 5 arguments...) + */ + +extern struct { char _entry[32]; } hypercall_page[]; + +#define __HYPERCALL "call hypercall_page+%c[offset]" +#define __HYPERCALL_ENTRY(x) \ + [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) + +#ifdef CONFIG_X86_32 +#define __HYPERCALL_RETREG "eax" +#define __HYPERCALL_ARG1REG "ebx" +#define __HYPERCALL_ARG2REG "ecx" +#define __HYPERCALL_ARG3REG "edx" +#define __HYPERCALL_ARG4REG "esi" +#define __HYPERCALL_ARG5REG "edi" +#else +#define __HYPERCALL_RETREG "rax" +#define __HYPERCALL_ARG1REG "rdi" +#define __HYPERCALL_ARG2REG "rsi" +#define __HYPERCALL_ARG3REG "rdx" +#define __HYPERCALL_ARG4REG "r10" +#define __HYPERCALL_ARG5REG "r8" +#endif + +#define __HYPERCALL_DECLS \ + register unsigned long __res asm(__HYPERCALL_RETREG); \ + register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ + register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ + register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ + register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; + +#define __HYPERCALL_0PARAM "=r" (__res) +#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) +#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) +#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) +#define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) +#define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) + +#define __HYPERCALL_0ARG() +#define __HYPERCALL_1ARG(a1) \ + __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); +#define __HYPERCALL_2ARG(a1,a2) \ + __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); +#define __HYPERCALL_3ARG(a1,a2,a3) \ + __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); +#define __HYPERCALL_4ARG(a1,a2,a3,a4) \ + __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); +#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ + __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); + +#define __HYPERCALL_CLOBBER5 "memory" +#define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG +#define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG +#define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG +#define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG +#define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG + +#define _hypercall0(type, name) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_0ARG(); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_0PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER0); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_1ARG(a1); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_1PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER1); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_2ARG(a1, a2); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_2PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER2); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_3ARG(a1, a2, a3); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_3PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER3); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_4ARG(a1, a2, a3, a4); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_4PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER4); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + __HYPERCALL_DECLS; \ + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_5PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER5); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table(struct trap_info *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update(struct mmu_update *req, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +#ifdef CONFIG_X86_32 +static inline int +HYPERVISOR_set_callbacks(unsigned long event_selector, + unsigned long event_address, + unsigned long failsafe_selector, + unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} +#else /* CONFIG_X86_64 */ +static inline int +HYPERVISOR_set_callbacks(unsigned long event_address, + unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, + syscall_address); +} +#endif /* CONFIG_X86_{32,64} */ + +static inline int +HYPERVISOR_callback_op(int cmd, void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int +HYPERVISOR_fpu_taskswitch(int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op(int cmd, void *arg) +{ + return _hypercall2(int, sched_op_new, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op(u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op(unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall(void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + if (sizeof(new_val) == sizeof(long)) + return _hypercall3(int, update_va_mapping, va, + new_val.pte, flags); + else + return _hypercall4(int, update_va_mapping, va, + new_val.pte, new_val.pte >> 32, flags); +} + +static inline int +HYPERVISOR_event_channel_op(int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + if (unlikely(rc == -ENOSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } + return rc; +} + +static inline int +HYPERVISOR_xen_version(int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io(int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op(int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + if (unlikely(rc == -ENOSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } + return rc; +} + +static inline int +HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, + unsigned long flags, domid_t domid) +{ + if (sizeof(new_val) == sizeof(long)) + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); + else + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte, new_val.pte >> 32, + flags, domid); +} + +static inline int +HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +#ifdef CONFIG_X86_64 +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} +#endif + +static inline int +HYPERVISOR_suspend(unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; + if (sizeof(new_val) == sizeof(long)) { + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + } else { + mcl->args[1] = new_val.pte; + mcl->args[2] = new_val.pte >> 32; + mcl->args[3] = flags; + } +} + +static inline void +MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +static inline void +MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags, + domid_t domid) +{ + mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; + mcl->args[0] = va; + if (sizeof(new_val) == sizeof(long)) { + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + mcl->args[3] = domid; + } else { + mcl->args[1] = new_val.pte; + mcl->args[2] = new_val.pte >> 32; + mcl->args[3] = flags; + mcl->args[4] = domid; + } +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + if (sizeof(maddr) == sizeof(long)) { + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + } else { + mcl->args[0] = maddr; + mcl->args[1] = maddr >> 32; + mcl->args[2] = desc.a; + mcl->args[3] = desc.b; + } +} + +static inline void +MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) +{ + mcl->op = __HYPERVISOR_memory_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)arg; +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; +} + +static inline void +MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) +{ + mcl->op = __HYPERVISOR_set_gdt; + mcl->args[0] = (unsigned long)frames; + mcl->args[1] = entries; +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; +} + +#endif /* _ASM_X86_XEN_HYPERCALL_H */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h new file mode 100644 index 00000000000..a38d25ac87d --- /dev/null +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_X86_XEN_HYPERVISOR_H +#define _ASM_X86_XEN_HYPERVISOR_H + +#include <linux/types.h> +#include <linux/kernel.h> + +#include <xen/interface/xen.h> +#include <xen/interface/version.h> + +#include <asm/ptrace.h> +#include <asm/page.h> +#include <asm/desc.h> +#if defined(__i386__) +# ifdef CONFIG_X86_PAE +# include <asm-generic/pgtable-nopud.h> +# else +# include <asm-generic/pgtable-nopmd.h> +# endif +#endif +#include <asm/xen/hypercall.h> + +/* arch/i386/kernel/setup.c */ +extern struct shared_info *HYPERVISOR_shared_info; +extern struct start_info *xen_start_info; + +/* arch/i386/mach-xen/evtchn.c */ +/* Force a proper event-channel callback from Xen. */ +extern void force_evtchn_callback(void); + +/* Turn jiffies into Xen system time. */ +u64 jiffies_to_st(unsigned long jiffies); + + +#define MULTI_UVMFLAGS_INDEX 3 +#define MULTI_UVMDOMID_INDEX 4 + +enum xen_domain_type { + XEN_NATIVE, + XEN_PV_DOMAIN, + XEN_HVM_DOMAIN, +}; + +extern enum xen_domain_type xen_domain_type; + +#define xen_domain() (xen_domain_type != XEN_NATIVE) +#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) +#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) +#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) + +#endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h new file mode 100644 index 00000000000..e8506c1f0c5 --- /dev/null +++ b/arch/x86/include/asm/xen/interface.h @@ -0,0 +1,175 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef _ASM_X86_XEN_INTERFACE_H +#define _ASM_X86_XEN_INTERFACE_H + +#ifdef __XEN__ +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#ifdef __XEN__ +#if defined(__i386__) +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while (0) +#elif defined(__x86_64__) +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#endif +#else +#if defined(__i386__) +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(uint64_t *)&(hnd) = 0; \ + (hnd) = val; \ + } while (0) +#elif defined(__x86_64__) +#define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0) +#endif +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +__DEFINE_GUEST_HANDLE(ulong, unsigned long); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(long); +DEFINE_GUEST_HANDLE(void); +#endif + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* + * Send an array of these to HYPERVISOR_set_trap_table() + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) + +#ifndef __ASSEMBLY__ +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +DEFINE_GUEST_HANDLE_STRUCT(trap_info); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; +}; +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +#include "interface_32.h" +#else +#include "interface_64.h" +#endif + +#ifndef __ASSEMBLY__ +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; + unsigned long syscall_callback_eip; +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h new file mode 100644 index 00000000000..42a7e004ae5 --- /dev/null +++ b/arch/x86/include/asm/xen/interface_32.h @@ -0,0 +1,97 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef _ASM_X86_XEN_INTERFACE_32_H +#define _ASM_X86_XEN_INTERFACE_32_H + + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define __HYPERVISOR_VIRT_START 0xF5800000 + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ +}; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#define XEN_CALLBACK(__cs, __eip) \ + ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) +#endif /* !__ASSEMBLY__ */ + + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +#endif /* _ASM_X86_XEN_INTERFACE_32_H */ diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h new file mode 100644 index 00000000000..100d2662b97 --- /dev/null +++ b/arch/x86/include/asm/xen/interface_64.h @@ -0,0 +1,159 @@ +#ifndef _ASM_X86_XEN_INTERFACE_64_H +#define _ASM_X86_XEN_INTERFACE_64_H + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +typedef unsigned long xen_callback_t; + +#define XEN_CALLBACK(__cs, __rip) \ + ((unsigned long)(__rip)) + +#endif /* !__ASSEMBLY__ */ + + +#endif /* _ASM_X86_XEN_INTERFACE_64_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h new file mode 100644 index 00000000000..bc628998a1b --- /dev/null +++ b/arch/x86/include/asm/xen/page.h @@ -0,0 +1,165 @@ +#ifndef _ASM_X86_XEN_PAGE_H +#define _ASM_X86_XEN_PAGE_H + +#include <linux/pfn.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +#include <xen/features.h> + +/* Xen machine address */ +typedef struct xmaddr { + phys_addr_t maddr; +} xmaddr_t; + +/* Xen pseudo-physical address */ +typedef struct xpaddr { + phys_addr_t paddr; +} xpaddr_t; + +#define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) +#define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) + +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME_BIT (1UL<<31) +#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) + +/* Maximum amount of memory we can handle in a domain in pages */ +#define MAX_DOMAIN_PAGES \ + ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) + + +extern unsigned long get_phys_to_machine(unsigned long pfn); +extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); + +static inline unsigned long pfn_to_mfn(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return pfn; + + return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; +} + +static inline int phys_to_machine_mapping_valid(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 1; + + return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + +#if 0 + if (unlikely((mfn >> machine_to_phys_order) != 0)) + return max_mapnr; +#endif + + pfn = 0; + /* + * The array access can fail (e.g., device space beyond end of RAM). + * In such cases it doesn't matter what we return (we return garbage), + * but we must handle the fault without crashing! + */ + __get_user(pfn, &machine_to_phys_mapping[mfn]); + + return pfn; +} + +static inline xmaddr_t phys_to_machine(xpaddr_t phys) +{ + unsigned offset = phys.paddr & ~PAGE_MASK; + return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); +} + +static inline xpaddr_t machine_to_phys(xmaddr_t machine) +{ + unsigned offset = machine.maddr & ~PAGE_MASK; + return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); +} + +/* + * We detect special mappings in one of two ways: + * 1. If the MFN is an I/O page then Xen will set the m2p entry + * to be outside our maximum possible pseudophys range. + * 2. If the MFN belongs to a different domain then we will certainly + * not have MFN in our p2m table. Conversely, if the page is ours, + * then we'll have p2m(m2p(MFN))==MFN. + * If we detect a special mapping then it doesn't have a 'struct page'. + * We force !pfn_valid() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. + */ +static inline unsigned long mfn_to_local_pfn(unsigned long mfn) +{ + extern unsigned long max_mapnr; + unsigned long pfn = mfn_to_pfn(mfn); + if ((pfn < max_mapnr) + && !xen_feature(XENFEAT_auto_translated_physmap) + && (get_phys_to_machine(pfn) != mfn)) + return max_mapnr; /* force !pfn_valid() */ + /* XXX fixme; not true with sparsemem */ + return pfn; +} + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) +#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) + +static inline unsigned long pte_mfn(pte_t pte) +{ + return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + + pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | + (pgprot_val(pgprot) & __supported_pte_mask); + + return pte; +} + +static inline pteval_t pte_val_ma(pte_t pte) +{ + return pte.pte; +} + +static inline pte_t __pte_ma(pteval_t x) +{ + return (pte_t) { .pte = x }; +} + +#define pmd_val_ma(v) ((v).pmd) +#ifdef __PAGETABLE_PUD_FOLDED +#define pud_val_ma(v) ((v).pgd.pgd) +#else +#define pud_val_ma(v) ((v).pud) +#endif +#define __pmd_ma(x) ((pmd_t) { (x) } ) + +#define pgd_val_ma(x) ((x).pgd) + + +xmaddr_t arbitrary_virt_to_machine(void *address); +void make_lowmem_page_readonly(void *vaddr); +void make_lowmem_page_readwrite(void *vaddr); + +#endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h new file mode 100644 index 00000000000..11b3bb86e17 --- /dev/null +++ b/arch/x86/include/asm/xor.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "xor_32.h" +#else +# include "xor_64.h" +#endif diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h new file mode 100644 index 00000000000..133b40a0f49 --- /dev/null +++ b/arch/x86/include/asm/xor_32.h @@ -0,0 +1,888 @@ +#ifndef _ASM_X86_XOR_32_H +#define _ASM_X86_XOR_32_H + +/* + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + +#include <asm/i387.h> + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + ST(i, 0) \ + XO1(i+1, 1) \ + ST(i+1, 1) \ + XO1(i + 2, 2) \ + ST(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + ST(i, 0) \ + XO2(i + 1, 1) \ + ST(i + 1, 1) \ + XO2(i + 2, 2) \ + ST(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + ST(i, 0) \ + XO3(i + 1, 1) \ + ST(i + 1, 1) \ + XO3(i + 2, 2) \ + ST(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + kernel_fpu_end(); +} + + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 7; + + kernel_fpu_begin(); + + /* Make sure GCC forgets anything it knows about p4 or p5, + such that it won't pass to the asm volatile below a + register that is shared with any other variable. That's + because we modify p4 and p5 there, but we can't mark them + as read/write, otherwise we'd overflow the 10-asm-operands + limit of GCC < 3.1. */ + asm("" : "+r" (p4), "+r" (p5)); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + ST(i, 0) \ + XO4(i + 1, 1) \ + ST(i + 1, 1) \ + XO4(i + 2, 2) \ + ST(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i + 3, 3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + /* p4 and p5 were modified, and now the variables are dead. + Clobber them just to be sure nobody does something stupid + like assuming they have some legal value. */ + asm("" : "=r" (p4), "=r" (p5)); + + kernel_fpu_end(); +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory" ); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 6; + + kernel_fpu_begin(); + + /* Make sure GCC forgets anything it knows about p4 or p5, + such that it won't pass to the asm volatile below a + register that is shared with any other variable. That's + because we modify p4 and p5 there, but we can't mark them + as read/write, otherwise we'd overflow the 10-asm-operands + limit of GCC < 3.1. */ + asm("" : "+r" (p4), "+r" (p5)); + + asm volatile( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + /* p4 and p5 were modified, and now the variables are dead. + Clobber them just to be sure nobody does something stupid + like assuming they have some legal value. */ + asm("" : "=r" (p4), "=r" (p5)); + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_pII_mmx = { + .name = "pII_mmx", + .do_2 = xor_pII_mmx_2, + .do_3 = xor_pII_mmx_3, + .do_4 = xor_pII_mmx_4, + .do_5 = xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + .name = "p5_mmx", + .do_2 = xor_p5_mmx_2, + .do_3 = xor_p5_mmx_3, + .do_4 = xor_p5_mmx_4, + .do_5 = xor_p5_mmx_5, +}; + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +#define XMMS_SAVE \ +do { \ + preempt_disable(); \ + cr0 = read_cr0(); \ + clts(); \ + asm volatile( \ + "movups %%xmm0,(%0) ;\n\t" \ + "movups %%xmm1,0x10(%0) ;\n\t" \ + "movups %%xmm2,0x20(%0) ;\n\t" \ + "movups %%xmm3,0x30(%0) ;\n\t" \ + : \ + : "r" (xmm_save) \ + : "memory"); \ +} while (0) + +#define XMMS_RESTORE \ +do { \ + asm volatile( \ + "sfence ;\n\t" \ + "movups (%0),%%xmm0 ;\n\t" \ + "movups 0x10(%0),%%xmm1 ;\n\t" \ + "movups 0x20(%0),%%xmm2 ;\n\t" \ + "movups 0x30(%0),%%xmm3 ;\n\t" \ + : \ + : "r" (xmm_save) \ + : "memory"); \ + write_cr0(cr0); \ + preempt_enable(); \ +} while (0) + +#define ALIGN16 __attribute__((aligned(16))) + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" +#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i,0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i,0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i,0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i,0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r"(p2), "+r"(p3) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i,0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i,0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i,0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i,0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i,0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + /* Make sure GCC forgets anything it knows about p4 or p5, + such that it won't pass to the asm volatile below a + register that is shared with any other variable. That's + because we modify p4 and p5 there, but we can't mark them + as read/write, otherwise we'd overflow the 10-asm-operands + limit of GCC < 3.1. */ + asm("" : "+r" (p4), "+r" (p5)); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i,0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i,0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i,0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i,0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i,0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i,0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " addl $256, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + /* p4 and p5 were modified, and now the variables are dead. + Clobber them just to be sure nobody does something stupid + like assuming they have some legal value. */ + asm("" : "=r" (p4), "=r" (p5)); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_pIII_sse = { + .name = "pIII_sse", + .do_2 = xor_sse_2, + .do_3 = xor_sse_3, + .do_4 = xor_sse_4, + .do_5 = xor_sse_5, +}; + +/* Also try the generic routines. */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + if (cpu_has_xmm) \ + xor_speed(&xor_block_pIII_sse); \ + if (cpu_has_mmx) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } \ +} while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) + +#endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h new file mode 100644 index 00000000000..1549b5e261f --- /dev/null +++ b/arch/x86/include/asm/xor_64.h @@ -0,0 +1,361 @@ +#ifndef _ASM_X86_XOR_64_H +#define _ASM_X86_XOR_64_H + +/* + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +/* + * Based on + * High-speed RAID5 checksumming functions utilizing SSE instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +/* + * x86-64 changes / gcc fixes from Andi Kleen. + * Copyright 2002 Andi Kleen, SuSE Labs. + * + * This hasn't been optimized for the hammer yet, but there are likely + * no advantages to be gotten from x86-64 here anyways. + */ + +typedef struct { + unsigned long a, b; +} __attribute__((aligned(16))) xmm_store_t; + +/* Doesn't use gcc to save the XMM registers, because there is no easy way to + tell it to do a clts before the register saving. */ +#define XMMS_SAVE \ +do { \ + preempt_disable(); \ + asm volatile( \ + "movq %%cr0,%0 ;\n\t" \ + "clts ;\n\t" \ + "movups %%xmm0,(%1) ;\n\t" \ + "movups %%xmm1,0x10(%1) ;\n\t" \ + "movups %%xmm2,0x20(%1) ;\n\t" \ + "movups %%xmm3,0x30(%1) ;\n\t" \ + : "=&r" (cr0) \ + : "r" (xmm_save) \ + : "memory"); \ +} while (0) + +#define XMMS_RESTORE \ +do { \ + asm volatile( \ + "sfence ;\n\t" \ + "movups (%1),%%xmm0 ;\n\t" \ + "movups 0x10(%1),%%xmm1 ;\n\t" \ + "movups 0x20(%1),%%xmm2 ;\n\t" \ + "movups 0x30(%1),%%xmm3 ;\n\t" \ + "movq %0,%%cr0 ;\n\t" \ + : \ + : "r" (cr0), "r" (xmm_save) \ + : "memory"); \ + preempt_enable(); \ +} while (0) + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" +#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned int lines = bytes >> 8; + unsigned long cr0; + xmm_store_t xmm_save[4]; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addq %[inc], %[p1] ;\n" + " addq %[inc], %[p2] ;\n" + " decl %[cnt] ; jnz 1b" + : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines) + : [inc] "r" (256UL) + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned int lines = bytes >> 8; + xmm_store_t xmm_save[4]; + unsigned long cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addq %[inc], %[p1] ;\n" + " addq %[inc], %[p2] ;\n" + " addq %[inc], %[p3] ;\n" + " decl %[cnt] ; jnz 1b" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] "r" (256UL) + : "memory"); + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned int lines = bytes >> 8; + xmm_store_t xmm_save[4]; + unsigned long cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addq %[inc], %[p1] ;\n" + " addq %[inc], %[p2] ;\n" + " addq %[inc], %[p3] ;\n" + " addq %[inc], %[p4] ;\n" + " decl %[cnt] ; jnz 1b" + : [cnt] "+c" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] "r" (256UL) + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned int lines = bytes >> 8; + xmm_store_t xmm_save[4]; + unsigned long cr0; + + XMMS_SAVE; + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addq %[inc], %[p1] ;\n" + " addq %[inc], %[p2] ;\n" + " addq %[inc], %[p3] ;\n" + " addq %[inc], %[p4] ;\n" + " addq %[inc], %[p5] ;\n" + " decl %[cnt] ; jnz 1b" + : [cnt] "+c" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), + [p5] "+r" (p5) + : [inc] "r" (256UL) + : "memory"); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_sse = { + .name = "generic_sse", + .do_2 = xor_sse_2, + .do_3 = xor_sse_3, + .do_4 = xor_sse_4, + .do_5 = xor_sse_5, +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_sse); \ +} while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) + +#endif /* _ASM_X86_XOR_64_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h new file mode 100644 index 00000000000..08e9a1ac07a --- /dev/null +++ b/arch/x86/include/asm/xsave.h @@ -0,0 +1,118 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include <linux/types.h> +#include <asm/processor.h> +#include <asm/i387.h> + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) + +#define FXSAVE_SIZE 512 + +/* + * These are the features that the OS can handle currently. + */ +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +extern unsigned int xstate_size; +extern u64 pcntxt_mask; +extern struct xsave_struct *init_xstate_buf; + +extern void xsave_cntxt_init(void); +extern void xsave_init(void); +extern int init_fpu(struct task_struct *child); +extern int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *sw); + +static inline int xrstor_checking(struct xsave_struct *fx) +{ + int err; + + asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) + : "memory"); + + return err; +} + +static inline int xsave_user(struct xsave_struct __user *buf) +{ + int err; + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) +{ + int err; + struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + +static inline void xrstor_state(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static inline void xsave(struct task_struct *tsk) +{ + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" + : : "D" (&(tsk->thread.xstate->xsave)), + "a" (-1), "d"(-1) : "memory"); +} +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d41f0343dc..d7e5a58ee22 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o -obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o +obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o @@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o @@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o + obj-y += bios_uv.o uv_irq.o uv_sysfs.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index eb875cdc736..8c1f76abae9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -153,12 +153,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) } #ifdef CONFIG_PCI_MMCONFIG + +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) { if (!strcmp(mcfg->header.oem_id, "SGI")) @@ -1136,7 +1137,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n", + pr_debug("Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); @@ -1256,7 +1257,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); @@ -1276,7 +1277,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -1598,6 +1599,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + {} +}; + +/* second table for DMI checks that should run after early-quirks */ +static struct dmi_system_id __initdata acpi_dmi_table_late[] = { /* * HP laptops which use a DSDT reporting as HP/SB400/10000, * which includes some code which overrides all temperature @@ -1726,6 +1732,9 @@ int __init early_acpi_boot_init(void) int __init acpi_boot_init(void) { + /* those are executed after early-quirks are executed */ + dmi_check_system(acpi_dmi_table_late); + /* * If acpi_disabled, bail out * One exception: acpi=ht continues far enough to enumerate LAPICs diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 426e5d91b63..806b4e9051b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -10,6 +10,7 @@ #include <linux/dmi.h> #include <linux/cpumask.h> #include <asm/segment.h> +#include <asm/desc.h> #include "realmode/wakeup.h" #include "sleep.h" @@ -21,7 +22,7 @@ unsigned long acpi_realmode_flags; static unsigned long acpi_realmode; #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) -static char temp_stack[10240]; +static char temp_stack[4096]; #endif /** @@ -97,7 +98,9 @@ int acpi_save_state_mem(void) #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP - stack_start.sp = temp_stack + 4096; + stack_start.sp = temp_stack + sizeof(temp_stack); + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 4cd8083c58b..0cdcda35a05 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -212,7 +212,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) /* Programs the physical address of the device table into the IOMMU hardware */ static void __init iommu_set_device_table(struct amd_iommu *iommu) { - u32 entry; + u64 entry; BUG_ON(iommu->mmio_base == NULL); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c index 21c831d96af..04a7f960bbc 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic.c @@ -23,11 +23,13 @@ #include <linux/mc146818rtc.h> #include <linux/kernel_stat.h> #include <linux/sysdev.h> +#include <linux/ioport.h> #include <linux/cpu.h> #include <linux/clockchips.h> #include <linux/acpi_pmtmr.h> #include <linux/module.h> #include <linux/dmi.h> +#include <linux/dmar.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -36,8 +38,14 @@ #include <asm/desc.h> #include <asm/arch_hooks.h> #include <asm/hpet.h> +#include <asm/pgalloc.h> #include <asm/i8253.h> #include <asm/nmi.h> +#include <asm/idle.h> +#include <asm/proto.h> +#include <asm/timex.h> +#include <asm/apic.h> +#include <asm/i8259.h> #include <mach_apic.h> #include <mach_apicdef.h> @@ -50,16 +58,58 @@ # error SPURIOUS_APIC_VECTOR definition error #endif -unsigned long mp_lapic_addr; - +#ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. * * +1=force-enable */ static int force_enable_local_apic; -int disable_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + +#endif + +#ifdef CONFIG_X86_64 +static int apic_calibrate_pmtmr __initdata; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC +int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif +unsigned long mp_lapic_addr; +int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - static unsigned long apic_phys; /* @@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +#ifdef HAVE_X2APIC +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; +#endif + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void) apic_write(APIC_LVT0, v); } +#ifdef CONFIG_X86_32 /** * get_physical_broadcast - Get number of physical broadcast IDs */ @@ -226,6 +310,7 @@ int get_physical_broadcast(void) { return modern_apic() ? 0xff : 0xf; } +#endif /** * lapic_get_maxlvt - get the maximum number of local vector table entries @@ -247,11 +332,7 @@ int lapic_get_maxlvt(void) */ /* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else #define APIC_DIVISOR 16 -#endif /* * This function sets up the local APIC timer, with a timeout of @@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask) * Setup the local APIC timer for this CPU. Copy the initilized values * of the boot CPU and register the clock event in the framework. */ -static void __devinit setup_APIC_timer(void) +static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); @@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } +static int __init calibrate_by_pmtimer(long deltapm, long *delta) +{ + const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; + const long pm_thresh = pm_100ms / 100; + unsigned long mult; + u64 res; + +#ifndef CONFIG_X86_PM_TIMER + return -1; +#endif + + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + /* Check, if the PM timer is available */ + if (!deltapm) + return -1; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + } + + return 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta, deltapm; + long delta; int pm_referenced = 0; local_irq_disable(); @@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void) global_clock_event->event_handler = lapic_cal_handler; /* - * Setup the APIC counter to 1e9. There is no way the lapic + * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ - __setup_APIC_LVTT(1000000000, 0, 0); + __setup_APIC_LVTT(0xffffffff, 0, 0); /* Let the interrupts run */ local_irq_enable(); @@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } + /* we trust the PM based calibration if possible */ + pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, + &delta); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - /* We trust the pm timer based calibration */ + /* + * PM timer calibration failed or not turned on + * so lets try APIC timer based calibration + */ if (!pm_referenced) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); @@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void) setup_APIC_timer(); } -void __devinit setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { setup_APIC_timer(); } @@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); local_apic_timer_interrupt(); irq_exit(); @@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void) static void __cpuinit lapic_setup_esr(void) { - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); + unsigned int oldvalue, value, maxlvt; + + if (!lapic_is_integrated()) { + printk(KERN_INFO "No ESR for 82489DX.\n"); + return; + } - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); + if (esr_disable) { /* - * spec says clear errors after enabling vector. + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; } + + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08x after: 0x%08x\n", + oldvalue, value); } @@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void) */ void __cpuinit setup_local_APIC(void) { - unsigned long value, integrated; + unsigned int value; int i, j; +#ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { + if (lapic_is_integrated() && esr_disable) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); } +#endif - integrated = lapic_is_integrated(); + preempt_disable(); /* * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. */ if (!apic_id_registered()) - WARN_ON_ONCE(1); + BUG(); /* * Intel recommends to set DFR, LDR and TPR before enabling @@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void) */ value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* * Some unknown Intel IO/APIC (or APIC) errata is biting us with * certain networking cards. If high frequency interrupts are @@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void) * See also the comment in end_level_ioapic_irq(). --macro */ - /* Enable focus processor (bit==0) */ + /* + * - enable focus processor (bit==0) + * - 64bit mode always use processor focus + * so no need to set it + */ value &= ~APIC_SPIV_FOCUS_DISABLED; +#endif /* * Set spurious IRQ vector @@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!integrated) /* 82489DX */ + if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); + + preempt_enable(); } void __cpuinit end_local_APIC_setup(void) @@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef HAVE_X2APIC +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + + ret = save_mask_IO_APIC_setup(); + if (ret) { + printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); + goto end; + } + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end_restore; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } + +end_restore: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + +end: + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} +#endif /* HAVE_X2APIC */ + +#ifdef CONFIG_X86_64 +/* + * Detect and enable local APICs on non-SMP boards. + * Original code written by Keir Fraser. + * On AMD64 we trust the BIOS - if it says no APIC it is likely + * not correctly set up (usually the APIC timer won't work etc.) + */ +static int __init detect_init_APIC(void) +{ + if (!cpu_has_apic) { + printk(KERN_INFO "No local APIC present\n"); + return -1; + } + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_physical_apicid = 0; + return 0; +} +#else /* * Detect and initialize APIC */ @@ -1255,12 +1515,46 @@ no_apic: printk(KERN_INFO "No local APIC present or hardware disabled\n"); return -1; } +#endif + +#ifdef CONFIG_X86_64 +void __init early_init_lapic_mapping(void) +{ + unsigned long phys_addr; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + phys_addr = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, phys_addr); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_physical_apicid = read_apic_id(); +} +#endif /** * init_apic_mappings - initialize APIC mappings */ void __init init_apic_mappings(void) { +#ifdef HAVE_X2APIC + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } +#endif + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", + APIC_BASE, apic_phys); /* * Fetch the APIC ID of the BSP in case we have a @@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void) */ if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = read_apic_id(); - } /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ - int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { +#ifdef CONFIG_X86_64 + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } +#else if (!smp_found_config && !cpu_has_apic) return -1; @@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void) */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } +#endif - verify_local_APIC(); +#ifdef HAVE_X2APIC + enable_IR_x2apic(); +#endif +#ifdef CONFIG_X86_64 + setup_apic_routing(); +#endif + verify_local_APIC(); connect_bsp_APIC(); +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else /* * Hack: In case of kdump, after a crash, kernel might be booting * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid * might be zero if read from MP tables. Get it from LAPIC. */ -#ifdef CONFIG_CRASH_DUMP +# ifdef CONFIG_CRASH_DUMP boot_cpu_physical_apicid = read_apic_id(); +# endif #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); +#ifdef CONFIG_X86_64 + /* + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + #ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) #endif localise_nmi_watchdog(); end_local_APIC_setup(); + #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +# ifdef CONFIG_X86_64 + else + nr_ioapics = 0; +# endif #endif + +#ifdef CONFIG_X86_64 + setup_boot_APIC_clock(); + check_nmi_watchdog(); +#else setup_boot_clock(); +#endif return 0; } @@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void) */ void smp_spurious_interrupt(struct pt_regs *regs) { - unsigned long v; + u32 v; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); +#ifdef CONFIG_X86_64 + add_pda(irq_spurious_count, 1); +#else /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); __get_cpu_var(irq_stat).irq_spurious_count++; +#endif irq_exit(); } @@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) */ void smp_error_interrupt(struct pt_regs *regs) { - unsigned long v, v1; + u32 v, v1; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); @@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs) 6: Received illegal vector 7: Illegal register address */ - printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } @@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +#ifdef CONFIG_X86_64 +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} +#endif + /* * Power management */ @@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 +#ifdef HAVE_X2APIC if (x2apic) enable_x2apic(); else @@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __devinit apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +#ifdef CONFIG_X86_64 /* - * APIC command line parameters + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. */ -static int __init parse_lapic(char *arg) +__cpuinit int apic_is_clustered_box(void) { - force_enable_local_apic = 1; - return 0; + int i, clusters, zeros; + unsigned id; + u16 *bios_cpu_apicid; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * vsmp box still need checking... + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) + return 0; + + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + /* are we being called early in kernel startup? */ + if (bios_cpu_apicid) { + id = bios_cpu_apicid[i]; + } + else if (i < nr_cpu_ids) { + if (cpu_present(i)) + id = per_cpu(x86_bios_cpu_apicid, i); + else + continue; + } + else + break; + + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. + * Since clusters are allocated sequentially, count zeros only if + * they are bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); } -early_param("lapic", parse_lapic); +#endif +/* + * APIC command line parameters + */ static int __init setup_disableapic(char *arg) { disable_apic = 1; @@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg) if (!arg) { #ifdef CONFIG_X86_64 skip_ioapic_setup = 0; - ioapic_force = 1; return 0; #endif return -EINVAL; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c deleted file mode 100644 index 94ddb69ae15..00000000000 --- a/arch/x86/kernel/apic_64.c +++ /dev/null @@ -1,1848 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/bootmem.h> -#include <linux/interrupt.h> -#include <linux/mc146818rtc.h> -#include <linux/kernel_stat.h> -#include <linux/sysdev.h> -#include <linux/ioport.h> -#include <linux/clockchips.h> -#include <linux/acpi_pmtmr.h> -#include <linux/module.h> -#include <linux/dmar.h> - -#include <asm/atomic.h> -#include <asm/smp.h> -#include <asm/mtrr.h> -#include <asm/mpspec.h> -#include <asm/hpet.h> -#include <asm/pgalloc.h> -#include <asm/nmi.h> -#include <asm/idle.h> -#include <asm/proto.h> -#include <asm/timex.h> -#include <asm/apic.h> -#include <asm/i8259.h> - -#include <mach_ipi.h> -#include <mach_apic.h> - -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; -static int apic_calibrate_pmtmr __initdata; -int disable_apic; -int disable_x2apic; -int x2apic; - -/* x2apic enabled before OS handover */ -int x2apic_preenabled; - -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -static unsigned long apic_phys; - -unsigned long mp_lapic_addr; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 safe_xapic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - * - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - exit_idle(); - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - -/** - * setup_local_APIC - setup the local APIC - */ -void __cpuinit setup_local_APIC(void) -{ - unsigned int value; - int i, j; - - preempt_disable(); - value = apic_read(APIC_LVR); - - BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1<<j)) - ack_APIC_irq(); - } - } - - /* - * Now that we are all set up, enable the APIC - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - /* - * Enable APIC - */ - value |= APIC_SPIV_APIC_ENABLED; - - /* We always use processor focus */ - - /* - * Set spurious IRQ vector - */ - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up LVT0, LVT1: - * - * set up through-local-APIC on the BP's LINT0. This is not - * strictly necessary in pure symmetric-IO mode, but sometimes - * we delegate interrupts to the 8259A. - */ - /* - * TODO: set up through-local-APIC from through-I/O-APIC? --macro - */ - value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!smp_processor_id() && !value) { - value = APIC_DM_EXTINT; - apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", - smp_processor_id()); - } else { - value = APIC_DM_EXTINT | APIC_LVT_MASKED; - apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", - smp_processor_id()); - } - apic_write(APIC_LVT0, value); - - /* - * only the BP should see the LINT1 NMI signal, obviously. - */ - if (!smp_processor_id()) - value = APIC_DM_NMI; - else - value = APIC_DM_NMI | APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); - preempt_enable(); -} - -void __cpuinit end_local_APIC_setup(void) -{ - lapic_setup_esr(); - -#ifdef CONFIG_X86_32 - { - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); - } -#endif - - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); -} - -void check_x2apic(void) -{ - int msr, msr2; - - rdmsr(MSR_IA32_APICBASE, msr, msr2); - - if (msr & X2APIC_ENABLE) { - printk("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; - apic_ops = &x2apic_ops; - } -} - -void enable_x2apic(void) -{ - int msr, msr2; - - rdmsr(MSR_IA32_APICBASE, msr, msr2); - if (!(msr & X2APIC_ENABLE)) { - printk("Enabling x2apic\n"); - wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); - } -} - -void enable_IR_x2apic(void) -{ -#ifdef CONFIG_INTR_REMAP - int ret; - unsigned long flags; - - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - printk(KERN_INFO - "Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; - } - - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - printk(KERN_INFO - "Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; - } - - ret = dmar_table_init(); - if (ret) { - printk(KERN_INFO - "dmar_table_init() failed with %d:\n", ret); - - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - printk(KERN_INFO - "Not enabling x2apic,Intr-remapping\n"); - return; - } - - local_irq_save(flags); - mask_8259A(); - save_mask_IO_APIC_setup(); - - ret = enable_intr_remapping(1); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } - - if (ret) - goto end; - - if (!x2apic) { - x2apic = 1; - apic_ops = &x2apic_ops; - enable_x2apic(); - } -end: - if (ret) - /* - * IR enabling failed - */ - restore_IO_APIC_setup(); - else - reinit_intr_remapped_IO_APIC(x2apic_preenabled); - - unmask_8259A(); - local_irq_restore(flags); - - if (!ret) { - if (!x2apic_preenabled) - printk(KERN_INFO - "Enabled x2apic and interrupt-remapping\n"); - else - printk(KERN_INFO - "Enabled Interrupt-remapping\n"); - } else - printk(KERN_ERR - "Failed to enable Interrupt-remapping and x2apic\n"); -#else - if (!cpu_has_x2apic) - return; - - if (x2apic_preenabled) - panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); -#endif - - return; -} - -/* - * Detect and enable local APICs on non-SMP boards. - * Original code written by Keir Fraser. - * On AMD64 we trust the BIOS - if it says no APIC it is likely - * not correctly set up (usually the APIC timer won't work etc.) - */ -static int __init detect_init_APIC(void) -{ - if (!cpu_has_apic) { - printk(KERN_INFO "No local APIC present\n"); - return -1; - } - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_physical_apicid = 0; - return 0; -} - -void __init early_init_lapic_mapping(void) -{ - unsigned long phys_addr; - - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_APICS]; - -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; - } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; - } - - enable_IR_x2apic(); - setup_apic_routing(); - - verify_local_APIC(); - - connect_bsp_APIC(); - - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); - - setup_local_APIC(); - - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); - - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) - localise_nmi_watchdog(); - end_local_APIC_setup(); - - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else - nr_ioapics = 0; - setup_boot_APIC_clock(); - check_nmi_watchdog(); - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - unsigned int v; - exit_idle(); - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - add_pda(irq_spurious_count, 1); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -asmlinkage void smp_error_interrupt(void) -{ - unsigned int v, v1; - - exit_idle(); - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj <ashok.raj@intel.com> - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - -#ifdef CONFIG_X86_64 - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - local_irq_restore(flags); - - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - .name = "lapic", - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < NR_CPUS; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } - else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } - else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) - return 1; - - /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. - */ - return (clusters > 2); -} - -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); - - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 505543a75a5..7fcf63d22f8 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef ASM_X86__UNISTD_64_H +#undef _ASM_X86_UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include <asm/unistd.h> diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index fdd585f9c53..f0dfe6f17e7 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -1,8 +1,6 @@ /* * BIOS run time interface routines. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,33 +14,128 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson */ +#include <linux/efi.h> +#include <asm/efi.h> +#include <linux/io.h> #include <asm/uv/bios.h> +#include <asm/uv/uv_hub.h> + +struct uv_systab uv_systab; -const char * -x86_bios_strerror(long status) +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { - const char *str; - switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; - } - return str; + struct uv_systab *tab = &uv_systab; + + if (!tab->function) + /* + * BIOS does not support UV systab + */ + return BIOS_STATUS_UNIMPLEMENTED; + + return efi_call6((void *)__va(tab->function), + (u64)which, a1, a2, a3, a4, a5); } -long -x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, - unsigned long *drift_info) +s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { - struct uv_bios_retval isrv; + unsigned long bios_flags; + s64 ret; - BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); - *ticks_per_second = isrv.v0; - *drift_info = isrv.v1; - return isrv.status; + local_irq_save(bios_flags); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + return ret; } -EXPORT_SYMBOL_GPL(x86_bios_freq_base); + +s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + s64 ret; + + preempt_disable(); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + preempt_enable(); + + return ret; +} + + +long sn_partition_id; +EXPORT_SYMBOL_GPL(sn_partition_id); +long uv_coherency_id; +EXPORT_SYMBOL_GPL(uv_coherency_id); +long uv_region_size; +EXPORT_SYMBOL_GPL(uv_region_size); +int uv_type; + + +s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, + long *region) +{ + s64 ret; + u64 v0, v1; + union partition_info_u part; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, + (u64)(&v0), (u64)(&v1), 0, 0); + if (ret != BIOS_STATUS_SUCCESS) + return ret; + + part.val = v0; + if (uvtype) + *uvtype = part.hub_version; + if (partid) + *partid = part.partition_id; + if (coher) + *coher = part.coherence_id; + if (region) + *region = part.region_size; + return ret; +} + + +s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) +{ + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, + (u64)ticks_per_second, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_freq_base); + + +#ifdef CONFIG_EFI +void uv_bios_init(void) +{ + struct uv_systab *tab; + + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + (efi.uv_systab == (unsigned long)NULL)) { + printk(KERN_CRIT "No EFI UV System Table.\n"); + uv_systab.function = (unsigned long)NULL; + return; + } + + tab = (struct uv_systab *)ioremap(efi.uv_systab, + sizeof(struct uv_systab)); + if (strncmp(tab->signature, "UVST", 4) != 0) + printk(KERN_ERR "bad signature in UV system table!"); + + /* + * Copy table to permanent spot for later use. + */ + memcpy(&uv_systab, tab, sizeof(struct uv_systab)); + iounmap(tab); + + printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); +} +#else /* !CONFIG_EFI */ + +void uv_bios_init(void) { } +#endif + diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 7f0b45a5d78..82ec6075c05 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ -cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h +cpufeature = $(src)/../../include/asm/cpufeature.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 32e73520adf..8f1e31db2ad 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index c24c4a487b7..8e48c5d4467 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -780,6 +780,9 @@ static int __init acpi_cpufreq_init(void) { int ret; + if (acpi_disabled) + return 0; + dprintk("acpi_cpufreq_init\n"); ret = acpi_cpufreq_early_init(); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 06fcce516d5..b0461856acf 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -1,5 +1,5 @@ /* - * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> + * (C) 2001-2004 Dave Jones. <davej@redhat.com> * (C) 2002 Padraig Brady. <padraig@antefacto.com> * * Licensed under the terms of the GNU GPL License version 2. @@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(revid_errata, int, 0644); MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); -MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); +MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index b5ced806a31..c1ac5790c63 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); +MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 0a61159d7b7..7c7d56b4313 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -1,6 +1,6 @@ /* * AMD K7 Powernow driver. - * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. + * (C) 2003 Dave Jones on behalf of SuSE Labs. * (C) 2003-2004 Dave Jones <davej@redhat.com> * * Licensed under the terms of the GNU GPL License version 2. @@ -692,7 +692,7 @@ static void __exit powernow_exit (void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); +MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 84bb395038d..d3dcd58b87c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -7,7 +7,7 @@ * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs + * (C) 2003 Dave Jones on behalf of SuSE Labs * (C) 2004 Dominik Brodowski <linux@brodo.de> * (C) 2004 Pavel Machek <pavel@suse.cz> * Licensed under the terms of the GNU GPL License version 2. @@ -45,7 +45,6 @@ #endif #define PFX "powernow-k8: " -#define BFX PFX "BIOS error: " #define VERSION "version 2.20.00" #include "powernow-k8.h" @@ -536,35 +535,40 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 for (j = 0; j < data->numps; j++) { if (pst[j].vid > LEAST_VID) { - printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); + printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", + j, pst[j].vid); return -EINVAL; } if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ - printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); + printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" + " %d\n", j); return -ENODEV; } if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ - printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); + printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" + " %d\n", j); return -ENODEV; } if (pst[j].fid > MAX_FID) { - printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" + " %d\n", j); return -ENODEV; } if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + printk(KERN_ERR FW_BUG PFX "two low fids - %d : " + "0x%x\n", j, pst[j].fid); return -EINVAL; } if (pst[j].fid < lastfid) lastfid = pst[j].fid; } if (lastfid & 1) { - printk(KERN_ERR BFX "lastfid invalid\n"); + printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO BFX "first fid not from lo freq table\n"); + printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); return 0; } @@ -672,13 +676,13 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("table vers: 0x%x\n", psb->tableversion); if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_ERR BFX "PSB table is not v1.4\n"); + printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); return -ENODEV; } dprintk("flags: 0x%x\n", psb->flags1); if (psb->flags1) { - printk(KERN_ERR BFX "unknown flags\n"); + printk(KERN_ERR FW_BUG PFX "unknown flags\n"); return -ENODEV; } @@ -705,7 +709,7 @@ static int find_psb_table(struct powernow_k8_data *data) } } if (cpst != 1) { - printk(KERN_ERR BFX "numpst must be 1\n"); + printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); return -ENODEV; } @@ -1130,17 +1134,19 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI Processor module before starting this " "driver.\n"); #else - printk(KERN_ERR PFX "Your BIOS does not provide ACPI " - "_PSS objects in a way that Linux understands. " - "Please report this to the Linux ACPI maintainers" - " and complain to your BIOS vendor.\n"); + printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" + " ACPI _PSS objects in a way that Linux " + "understands. Please report this to the Linux " + "ACPI maintainers and complain to your BIOS " + "vendor.\n"); #endif kfree(data); return -ENODEV; } if (pol->cpu != 0) { - printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than " - "CPU0. Complain to your BIOS vendor.\n"); + printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " + "CPU other than CPU0. Complain to your BIOS " + "vendor.\n"); kfree(data); return -ENODEV; } @@ -1193,7 +1199,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* min/max the cpu is capable of */ if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { - printk(KERN_ERR PFX "invalid powernow_table\n"); + printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); powernow_k8_cpu_exit_acpi(data); kfree(data->powernow_table); kfree(data); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 191f7263c61..04d0376b64b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -431,7 +431,7 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); +MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 99468dbd08d..cce0b6118d5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void) node = first_node(node_online_map); numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index f390c9f6635..dd3af6e7b39 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -1,6 +1,6 @@ /* - * Athlon/Hammer specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> + * Athlon specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones <davej@redhat.com> */ #include <linux/init.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 774d87cfd8c..0ebf3fc6a61 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -1,6 +1,6 @@ /* * mce.c - x86 Machine Check Exception Reporting - * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> + * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com> */ #include <linux/init.h> diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index cc1fccdd31e..a74af128efc 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -1,7 +1,7 @@ /* * Non Fatal Machine Check Exception Reporting * - * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> + * (C) Copyright 2002 Dave Jones. <davej@redhat.com> * * This file contains routines to check for non-fatal MCEs every 15s * diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6bff382094f..9abd48b2267 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -17,6 +17,8 @@ #include <linux/bitops.h> #include <linux/smp.h> #include <linux/nmi.h> +#include <linux/kprobes.h> + #include <asm/apic.h> #include <asm/intel_arch_perfmon.h> @@ -336,7 +338,8 @@ static void single_msr_unreserve(void) release_perfctr_nmi(wd_ops->perfctr); } -static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes +single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* start the cycle over again */ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); @@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) return 1; } -static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* * P6 based Pentium M need to re-unmask @@ -605,7 +608,7 @@ static void p4_unreserve(void) release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); } -static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { unsigned dummy; /* @@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz) return hz; } -int lapic_wd_event(unsigned nmi_hz) +int __kprobes lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index a26c480b949..01b1244ef1c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -160,14 +160,16 @@ static void *c_start(struct seq_file *m, loff_t *pos) { if (*pos == 0) /* just in case, cpu 0 is not the first */ *pos = first_cpu(cpu_online_map); - if ((*pos) < nr_cpu_ids && cpu_online(*pos)) + else + *pos = next_cpu_nr(*pos - 1, cpu_online_map); + if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - *pos = next_cpu(*pos, cpu_online_map); + (*pos)++; return c_start(m, pos); } diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1a78180f08d..b3614752197 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -405,7 +405,6 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) panic("Non maskable interrupt"); console_silent(); spin_unlock(&nmi_print_lock); - bust_spinlocks(0); /* * If we are in kernel we are probably nested up pretty bad @@ -416,6 +415,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) crash_kexec(regs); } + bust_spinlocks(0); do_exit(SIGSEGV); } diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 733c4f8d42e..3ce029ffaa5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,7 +95,8 @@ static void __init nvidia_bugs(int num, int slot, int func) } -static u32 ati_ixp4x0_rev(int num, int slot, int func) +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) +static u32 __init ati_ixp4x0_rev(int num, int slot, int func) { u32 d; u8 b; @@ -115,7 +116,6 @@ static u32 ati_ixp4x0_rev(int num, int slot, int func) static void __init ati_bugs(int num, int slot, int func) { -#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC) u32 d; u8 b; @@ -138,9 +138,56 @@ static void __init ati_bugs(int num, int slot, int func) printk(KERN_INFO "If you got timer trouble " "try acpi_use_timer_override\n"); } -#endif } +static u32 __init ati_sbx00_rev(int num, int slot, int func) +{ + u32 old, d; + + d = read_pci_config(num, slot, func, 0x70); + old = d; + d &= ~(1<<8); + write_pci_config(num, slot, func, 0x70, d); + d = read_pci_config(num, slot, func, 0x8); + d &= 0xff; + write_pci_config(num, slot, func, 0x70, old); + + return d; +} + +static void __init ati_bugs_contd(int num, int slot, int func) +{ + u32 d, rev; + + if (acpi_use_timer_override) + return; + + rev = ati_sbx00_rev(num, slot, func); + if (rev > 0x13) + return; + + /* check for IRQ0 interrupt swap */ + d = read_pci_config(num, slot, func, 0x64); + if (!(d & (1<<14))) + acpi_skip_timer_override = 1; + + if (acpi_skip_timer_override) { + printk(KERN_INFO "SB600 revision 0x%x\n", rev); + printk(KERN_INFO "Ignoring ACPI timer override.\n"); + printk(KERN_INFO "If you got timer trouble " + "try acpi_use_timer_override\n"); + } +} +#else +static void __init ati_bugs(int num, int slot, int func) +{ +} + +static void __init ati_bugs_contd(int num, int slot, int func) +{ +} +#endif + #ifdef CONFIG_DMAR static void __init intel_g33_dmar(int num, int slot, int func) { @@ -176,6 +223,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, + { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, #ifdef CONFIG_DMAR { PCI_VENDOR_ID_INTEL, 0x29c0, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 945a31cdd81..1119d247fe1 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -367,6 +367,10 @@ void __init efi_init(void) efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); } else if (!efi_guidcmp(config_tables[i].guid, + UV_SYSTEM_TABLE_GUID)) { + efi.uv_systab = config_tables[i].table; + printk(" UVsystab=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx ", config_tables[i].table); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b21fbfaffe3..dd65143941a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -629,7 +629,7 @@ ENTRY(interrupt) ENTRY(irq_entries_start) RING0_INT_FRAME vector=0 -.rept NR_IRQS +.rept NR_VECTORS ALIGN .if vector CFI_ADJUST_CFA_OFFSET -4 @@ -1024,7 +1024,7 @@ ENTRY(machine_check) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_machine_check + pushl machine_check_vector CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - subl $MCOUNT_INSN_SIZE, %eax - -.globl mcount_call -mcount_call: - call ftrace_stub - - popl %edx - popl %ecx - popl %eax - ret END(mcount) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1db6ce4314e..09e7145484c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -64,32 +64,6 @@ #ifdef CONFIG_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - - movq 0x38(%rsp), %rdi - subq $MCOUNT_INSN_SIZE, %rdi - -.globl mcount_call -mcount_call: - call ftrace_stub - - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp - retq END(mcount) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ab115cd15fd..d073d981a73 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -11,17 +11,18 @@ #include <linux/spinlock.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/list.h> -#include <asm/alternative.h> #include <asm/ftrace.h> +#include <asm/nops.h> /* Long is fine, even if it is only 4 bytes ;-) */ -static long *ftrace_nop; +static unsigned long *ftrace_nop; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -60,11 +61,7 @@ notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned replaced; - unsigned old = *(unsigned *)old_code; /* 4 bytes */ - unsigned new = *(unsigned *)new_code; /* 4 bytes */ - unsigned char newch = new_code[4]; - int faulted = 0; + unsigned char replaced[MCOUNT_INSN_SIZE]; /* * Note: Due to modules and __init, code can @@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * as well as code changing. * * No real locking needed, this code is run through - * kstop_machine. + * kstop_machine, or before SMP starts. */ - asm volatile ( - "1: lock\n" - " cmpxchg %3, (%2)\n" - " jnz 2f\n" - " movb %b4, 4(%2)\n" - "2:\n" - ".section .fixup, \"ax\"\n" - "3: movl $1, %0\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "c"(newch), - "0"(faulted), "a"(old) - : "memory"); - sync_core(); + if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) + return 1; + + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return 2; - if (replaced != old && replaced != new) - faulted = 2; + WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)); - return faulted; + sync_core(); + + return 0; } notrace int ftrace_update_ftrace_func(ftrace_func_t func) @@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) notrace int ftrace_mcount_set(unsigned long *data) { - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - + /* mcount is initialized as a nop */ + *data = 0; return 0; } int __init ftrace_dyn_arch_init(void *data) { - const unsigned char *const *noptable = find_nop_table(); - - /* This is running in kstop_machine */ - - ftrace_mcount_set(data); + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; - ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "jmp ftrace_test_jmp\n" + /* This code needs to stay around */ + ".section .text, \"ax\"\n" + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "jmp 1f\n" + ".previous\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + ftrace_nop = (unsigned long *)ftrace_test_p6nop; + break; + case 1: + pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + ftrace_nop = (unsigned long *)ftrace_test_nop5; + break; + case 2: + pr_info("ftrace: converting mcount calls to jmp . + 5\n"); + ftrace_nop = (unsigned long *)ftrace_test_jmp; + break; + } + + /* The return code is retured via data */ + *(unsigned long *)data = 0; return 0; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9eca5ba7a6b..c0262791bda 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -25,7 +25,7 @@ #include <acpi/acpi_bus.h> #endif -static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 1; } @@ -170,7 +170,7 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ -static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { #ifdef CONFIG_ACPI /* @@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) * is an example). */ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; + } #endif return 0; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index e4bf2cc0d74..f6a2c8eb48a 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -12,7 +12,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); -static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (cpu_has_x2apic) return 1; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 8f1343df262..d042211768b 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -19,7 +19,7 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); -static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (cpu_has_x2apic && x2apic_phys) return 1; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 33581d94a90..680a06557c5 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -30,7 +30,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; -static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { if (!strcmp(oem_table_id, "UVL")) @@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode) static __init void uv_rtc_init(void) { - long status, ticks_per_sec, drift; + long status; + u64 ticks_per_sec; - status = - x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, - &drift); - if (status != 0 || ticks_per_sec < 100000) { + status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, + &ticks_per_sec); + if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { printk(KERN_WARNING "unable to determine platform RTC clock frequency, " "guessing.\n"); @@ -356,7 +356,22 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static bool uv_system_inited; +/* + * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet + */ +void __cpuinit uv_cpu_init(void) +{ + /* CPU 0 initilization will be done via uv_system_init. */ + if (!uv_blade_info) + return; + + uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; + + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) + set_x2apic_extra_bits(uv_hub_info->pnode); +} + void __init uv_system_init(void) { @@ -412,6 +427,9 @@ void __init uv_system_init(void) gnode_upper = (((unsigned long)node_id.s.node_id) & ~((1 << n_val) - 1)) << m_val; + uv_bios_init(); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, + &uv_coherency_id, &uv_region_size); uv_rtc_init(); for_each_present_cpu(cpu) { @@ -433,7 +451,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); @@ -448,21 +466,6 @@ void __init uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); - uv_system_inited = true; -} -/* - * Called on each cpu to initialize the per_cpu UV data area. - * ZZZ hotplug not supported yet - */ -void __cpuinit uv_cpu_init(void) -{ - BUG_ON(!uv_system_inited); - - uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; - - if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->pnode); + uv_cpu_init(); } - - diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index acf62fc233d..77017e834cf 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,29 +1,49 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/sysdev.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/hpet.h> #include <linux/init.h> -#include <linux/sysdev.h> +#include <linux/cpu.h> #include <linux/pm.h> +#include <linux/io.h> #include <asm/fixmap.h> -#include <asm/hpet.h> #include <asm/i8253.h> -#include <asm/io.h> +#include <asm/hpet.h> -#define HPET_MASK CLOCKSOURCE_MASK(32) -#define HPET_SHIFT 22 +#define HPET_MASK CLOCKSOURCE_MASK(32) +#define HPET_SHIFT 22 /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000L +#define FSEC_PER_NSEC 1000000L + +#define HPET_DEV_USED_BIT 2 +#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) +#define HPET_DEV_VALID 0x8 +#define HPET_DEV_FSB_CAP 0x1000 +#define HPET_DEV_PERI_CAP 0x2000 + +#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ -unsigned long hpet_address; -static void __iomem *hpet_virt_address; +unsigned long hpet_address; +unsigned long hpet_num_timers; +static void __iomem *hpet_virt_address; + +struct hpet_dev { + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int flags; + char name[10]; +}; unsigned long hpet_readl(unsigned long a) { @@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void) static int boot_hpet_disable; int hpet_force_user; -static int __init hpet_setup(char* str) +static int __init hpet_setup(char *str) { if (str) { if (!strncmp("disable", str, 7)) @@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet); static inline int is_hpet_capable(void) { - return (!boot_hpet_disable && hpet_address); + return !boot_hpet_disable && hpet_address; } /* @@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET + +static void hpet_reserve_msi_timers(struct hpet_data *hd); + static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; @@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id) nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - memset(&hd, 0, sizeof (hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; + memset(&hd, 0, sizeof(hd)); + hd.hd_phys_address = hpet_address; + hd.hd_address = hpet; + hd.hd_nirqs = nrtimers; hpet_reserve_timer(&hd, 0); #ifdef CONFIG_HPET_EMULATE_RTC @@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_irq[1] = HPET_LEGACY_RTC; for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; + hd.hd_irq[i] = (readl(&timer->hpet_config) & + Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } + hpet_reserve_msi_timers(&hd); + hpet_alloc(&hd); } @@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static int hpet_setup_msi_irq(unsigned int irq); + +static void hpet_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt, int timer) { unsigned long cfg, cmp, now; uint64_t delta; - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; - delta >>= hpet_clockevent.shift; + delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; + delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); /* * The first write after writing TN_SETVAL to the * config register sets the counter value, the second * write sets the period. */ - hpet_writel(cmp, HPET_T0_CMP); + hpet_writel(cmp, HPET_Tn_CMP(timer)); udelay(1); - hpet_writel((unsigned long) delta, HPET_T0_CMP); + hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); break; case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_RESUME: - hpet_enable_legacy_int(); + if (timer == 0) { + hpet_enable_legacy_int(); + } else { + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_setup_msi_irq(hdev->irq); + disable_irq(hdev->irq); + irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + enable_irq(hdev->irq); + } break; } } -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) +static int hpet_next_event(unsigned long delta, + struct clock_event_device *evt, int timer) { u32 cnt; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; - hpet_writel(cnt, HPET_T0_CMP); + hpet_writel(cnt, HPET_Tn_CMP(timer)); /* * We need to read back the CMP register to make sure that @@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta, return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } +static void hpet_legacy_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + hpet_set_mode(mode, evt, 0); +} + +static int hpet_legacy_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return hpet_next_event(delta, evt, 0); +} + +/* + * HPET MSI Support + */ +#ifdef CONFIG_PCI_MSI + +static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); +static struct hpet_dev *hpet_devs; + +void hpet_msi_unmask(unsigned int irq) +{ + struct hpet_dev *hdev = get_irq_data(irq); + unsigned long cfg; + + /* unmask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg |= HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_mask(unsigned int irq) +{ + unsigned long cfg; + struct hpet_dev *hdev = get_irq_data(irq); + + /* mask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg &= ~HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_write(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); + hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); +} + +void hpet_msi_read(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); + msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); + msg->address_hi = 0; +} + +static void hpet_msi_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_set_mode(mode, evt, hdev->num); +} + +static int hpet_msi_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + return hpet_next_event(delta, evt, hdev->num); +} + +static int hpet_setup_msi_irq(unsigned int irq) +{ + if (arch_setup_hpet_msi(irq)) { + destroy_irq(irq); + return -EINVAL; + } + return 0; +} + +static int hpet_assign_irq(struct hpet_dev *dev) +{ + unsigned int irq; + + irq = create_irq(); + if (!irq) + return -EINVAL; + + set_irq_data(irq, dev); + + if (hpet_setup_msi_irq(irq)) + return -EINVAL; + + dev->irq = irq; + return 0; +} + +static irqreturn_t hpet_interrupt_handler(int irq, void *data) +{ + struct hpet_dev *dev = (struct hpet_dev *)data; + struct clock_event_device *hevt = &dev->evt; + + if (!hevt->event_handler) { + printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", + dev->num); + return IRQ_HANDLED; + } + + hevt->event_handler(hevt); + return IRQ_HANDLED; +} + +static int hpet_setup_irq(struct hpet_dev *dev) +{ + + if (request_irq(dev->irq, hpet_interrupt_handler, + IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + return -1; + + disable_irq(dev->irq); + irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + enable_irq(dev->irq); + + printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", + dev->name, dev->irq); + + return 0; +} + +/* This should be called in specific @cpu */ +static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) +{ + struct clock_event_device *evt = &hdev->evt; + uint64_t hpet_freq; + + WARN_ON(cpu != smp_processor_id()); + if (!(hdev->flags & HPET_DEV_VALID)) + return; + + if (hpet_setup_msi_irq(hdev->irq)) + return; + + hdev->cpu = cpu; + per_cpu(cpu_hpet_dev, cpu) = hdev; + evt->name = hdev->name; + hpet_setup_irq(hdev); + evt->irq = hdev->irq; + + evt->rating = 110; + evt->features = CLOCK_EVT_FEAT_ONESHOT; + if (hdev->flags & HPET_DEV_PERI_CAP) + evt->features |= CLOCK_EVT_FEAT_PERIODIC; + + evt->set_mode = hpet_msi_set_mode; + evt->set_next_event = hpet_msi_next_event; + evt->shift = 32; + + /* + * The period is a femto seconds value. We need to calculate the + * scaled math multiplication factor for nanosecond to hpet tick + * conversion. + */ + hpet_freq = 1000000000000000ULL; + do_div(hpet_freq, hpet_period); + evt->mult = div_sc((unsigned long) hpet_freq, + NSEC_PER_SEC, evt->shift); + /* Calculate the max delta */ + evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); + /* 5 usec minimum reprogramming delta. */ + evt->min_delta_ns = 5000; + + evt->cpumask = cpumask_of_cpu(hdev->cpu); + clockevents_register_device(evt); +} + +#ifdef CONFIG_HPET +/* Reserve at least one timer for userspace (/dev/hpet) */ +#define RESERVE_TIMERS 1 +#else +#define RESERVE_TIMERS 0 +#endif + +static void hpet_msi_capability_lookup(unsigned int start_timer) +{ + unsigned int id; + unsigned int num_timers; + unsigned int num_timers_used = 0; + int i; + + id = hpet_readl(HPET_ID); + + num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + num_timers++; /* Value read out starts from 0 */ + + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); + if (!hpet_devs) + return; + + hpet_num_timers = num_timers; + + for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { + struct hpet_dev *hdev = &hpet_devs[num_timers_used]; + unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); + + /* Only consider HPET timer with MSI support */ + if (!(cfg & HPET_TN_FSB_CAP)) + continue; + + hdev->flags = 0; + if (cfg & HPET_TN_PERIODIC_CAP) + hdev->flags |= HPET_DEV_PERI_CAP; + hdev->num = i; + + sprintf(hdev->name, "hpet%d", i); + if (hpet_assign_irq(hdev)) + continue; + + hdev->flags |= HPET_DEV_FSB_CAP; + hdev->flags |= HPET_DEV_VALID; + num_timers_used++; + if (num_timers_used == num_possible_cpus()) + break; + } + + printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", + num_timers, num_timers_used); +} + +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + int i; + + if (!hpet_devs) + return; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd->hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(hd, hdev->num); + } +} +#endif + +static struct hpet_dev *hpet_get_unused_timer(void) +{ + int i; + + if (!hpet_devs) + return NULL; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + if (test_and_set_bit(HPET_DEV_USED_BIT, + (unsigned long *)&hdev->flags)) + continue; + return hdev; + } + return NULL; +} + +struct hpet_work_struct { + struct delayed_work work; + struct completion complete; +}; + +static void hpet_work(struct work_struct *w) +{ + struct hpet_dev *hdev; + int cpu = smp_processor_id(); + struct hpet_work_struct *hpet_work; + + hpet_work = container_of(w, struct hpet_work_struct, work.work); + + hdev = hpet_get_unused_timer(); + if (hdev) + init_one_hpet_msi_clockevent(hdev, cpu); + + complete(&hpet_work->complete); +} + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct hpet_work_struct work; + struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); + + switch (action & 0xf) { + case CPU_ONLINE: + INIT_DELAYED_WORK(&work.work, hpet_work); + init_completion(&work.complete); + /* FIXME: add schedule_work_on() */ + schedule_delayed_work_on(cpu, &work.work, 0); + wait_for_completion(&work.complete); + break; + case CPU_DEAD: + if (hdev) { + free_irq(hdev->irq, hdev); + hdev->flags &= ~HPET_DEV_USED; + per_cpu(cpu_hpet_dev, cpu) = NULL; + } + break; + } + return NOTIFY_OK; +} +#else + +static int hpet_setup_msi_irq(unsigned int irq) +{ + return 0; +} +static void hpet_msi_capability_lookup(unsigned int start_timer) +{ + return; +} + +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + return; +} +#endif + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + return NOTIFY_OK; +} + +#endif + /* * Clock source related code */ @@ -427,8 +803,10 @@ int __init hpet_enable(void) if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(); + hpet_msi_capability_lookup(2); return 1; } + hpet_msi_capability_lookup(0); return 0; out_nohpet: @@ -445,6 +823,8 @@ out_nohpet: */ static __init int hpet_late_init(void) { + int cpu; + if (boot_hpet_disable) return -ENODEV; @@ -460,6 +840,13 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + for_each_online_cpu(cpu) { + hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); + } + + /* This notifier should be called after workqueue is ready */ + hotcpu_notifier(hpet_cpuhp_notify, -20); + return 0; } fs_initcall(hpet_late_init); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c index 02063ae042f..b764d7429c6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic.c @@ -27,17 +27,21 @@ #include <linux/sched.h> #include <linux/pci.h> #include <linux/mc146818rtc.h> +#include <linux/compiler.h> #include <linux/acpi.h> +#include <linux/module.h> #include <linux/sysdev.h> #include <linux/msi.h> #include <linux/htirq.h> -#include <linux/dmar.h> -#include <linux/jiffies.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/jiffies.h> /* time_after() */ #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> #endif #include <linux/bootmem.h> #include <linux/dmar.h> +#include <linux/hpet.h> #include <asm/idle.h> #include <asm/io.h> @@ -46,61 +50,28 @@ #include <asm/proto.h> #include <asm/acpi.h> #include <asm/dma.h> +#include <asm/timer.h> #include <asm/i8259.h> #include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> +#include <asm/setup.h> #include <asm/irq_remapping.h> +#include <asm/hpet.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_irq.h> #include <mach_ipi.h> #include <mach_apic.h> +#include <mach_apicdef.h> #define __apicdebuginit(type) static type __init -struct irq_cfg { - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, -}; - -static int assign_irq_vector(int irq, cpumask_t mask); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -int sis_apic_bug; /* not actually supported, dummy for compile */ - -static int no_timer_check; - -static int disable_timer_pin_1 __initdata; - -int timer_through_8259 __initdata; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; +/* + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes + */ +int sis_apic_bug = -1; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); @@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; -/* I/O APIC RTE contents at the OS boot up */ -struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); +int skip_ioapic_setup; + +static int __init parse_noapic(char *str) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); + +struct irq_pin_list; +struct irq_cfg { + unsigned int irq; + struct irq_pin_list *irq_2_pin; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +static struct irq_cfg irq_cfgx[NR_IRQS] = { + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +}; + +#define for_each_irq_cfg(irq, cfg) \ + for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq < nr_irqs ? irq_cfgx + irq : NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + return irq_cfg(irq); +} + /* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. + * Rough estimation of how many shared IRQs there are, can be changed + * anytime. */ #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) @@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); * between pins and IRQs. */ -static struct irq_pin_list { - short apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; +static struct irq_pin_list *irq_2_pin_ptr; + +static void __init irq_2_pin_init(void) +{ + struct irq_pin_list *pin = irq_2_pin_head; + int i; + + for (i = 1; i < PIN_MAP_SIZE; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin = irq_2_pin_ptr; + + if (!pin) + panic("can not get more irq_2_pin\n"); + + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; +} struct io_apic { unsigned int index; @@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i /* * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index register */ -static inline void io_apic_modify(unsigned int apic, unsigned int value) +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); - entry = irq_2_pin + irq; + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; int pin; - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { @@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq) } if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); return false; } -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - BUG_ON(irq >= NR_IRQS); \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin) static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) { int apic, pin; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; - BUG_ON(irq >= NR_IRQS); + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; + + if (!entry) + break; + apic = entry->apic; pin = entry->pin; - if (pin == -1) - break; +#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); +#else + io_apic_write(apic, 0x11 + pin*2, dest); +#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; - io_apic_modify(apic, reg); + io_apic_modify(apic, 0x10 + pin*2, reg); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } } +static int assign_irq_vector(int irq, cpumask_t mask); + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + cfg = irq_cfg(irq); if (assign_irq_vector(irq, mask)) return; cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - /* * Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); + desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } -#endif +#endif /* CONFIG_SMP */ /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are @@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + return; + } - BUG_ON(irq >= NR_IRQS); - while (entry->next) - entry = irq_2_pin + entry->next; + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: ran out of irq_2_pin entries!"); + entry = entry->next; } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; entry->apic = apic; entry->pin = pin; } @@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; - while (1) { + while (entry) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - } - if (!entry->next) + replaced = 1; + /* every one is different, right? */ break; - entry = irq_2_pin + entry->next; + } + entry = entry->next; + } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); +} + +static inline void io_apic_modify_irq(unsigned int irq, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + int pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + unsigned int reg; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); } } +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); +} -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) +#ifdef CONFIG_X86_64 +void io_apic_sync(struct irq_pin_list *entry) +{ + /* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ + struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); + readl(&io_apic->data); +} -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); +} +#else /* CONFIG_X86_32 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); +} -/* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); +} +#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -450,6 +556,68 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + +#ifdef CONFIG_X86_32 +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +static int pirq_entries [MAX_PIRQS]; +static int pirqs_enabled; + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* * Saves and masks all the unmasked IO-APIC RTE's */ @@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void) kzalloc(sizeof(struct IO_APIC_route_entry) * nr_ioapic_registers[apic], GFP_KERNEL); if (!early_ioapic_entries[apic]) - return -ENOMEM; + goto nomem; } for (apic = 0; apic < nr_ioapics; apic++) @@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void) ioapic_write_entry(apic, pin, entry); } } + return 0; + +nomem: + while (apic >= 0) + kfree(early_ioapic_entries[apic--]); + memset(early_ioapic_entries, 0, + ARRAY_SIZE(early_ioapic_entries)); + + return -ENOMEM; } void restore_IO_APIC_setup(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) + for (apic = 0; apic < nr_ioapics; apic++) { + if (!early_ioapic_entries[apic]) + break; for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, early_ioapic_entries[apic][pin]); + kfree(early_ioapic_entries[apic]); + early_ioapic_entries[apic] = NULL; + } } void reinit_intr_remapped_IO_APIC(int intr_remapping) @@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) */ restore_IO_APIC_setup(); } - -int skip_ioapic_setup; -int ioapic_force; - -static int __init parse_noapic(char *str) -{ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - +#endif /* * Find the IRQ entry number of a certain pin. @@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } - BUG_ON(best_guess >= NR_IRQS); return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +#endif + /* ISA interrupts are always polarity zero edge triggered, * when listed as conforming in the MP table. */ #define default_ISA_trigger(idx) (0) #define default_ISA_polarity(idx) (0) +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) + /* PCI interrupts are always polarity one level triggered, * when listed as conforming in the MP table. */ #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) + static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mp_srcbus; @@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx) trigger = default_ISA_trigger(idx); else trigger = default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif break; case 1: /* edge */ { @@ -744,6 +970,7 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } - BUG_ON(irq >= NR_IRQS); + +#ifdef CONFIG_X86_32 + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, KERN_DEBUG + "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } +#endif + return irq; } @@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) int cpu; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= NR_IRQS); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); @@ -837,8 +1087,13 @@ next: } if (unlikely(current_vector == vector)) continue; +#ifdef CONFIG_X86_64 if (vector == IA32_SYSCALL_VECTOR) goto next; +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq) cpumask_t mask; int cpu, vector; - BUG_ON((unsigned)irq >= NR_IRQS); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu) /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ int irq, vector; + struct irq_cfg *cfg; /* Mark the inuse vectors */ - for (irq = 0; irq < NR_IRQS; ++irq) { - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + for_each_irq_cfg(irq, cfg) { + if (!cpu_isset(cpu, cfg->domain)) continue; - vector = irq_cfg[irq].vector; + vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu) irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip; static struct irq_chip ir_ioapic_chip; #endif +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_32 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif + static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) - irq_desc[irq].status |= IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + desc->status |= IRQ_LEVEL; else - irq_desc[irq].status &= ~IRQ_LEVEL; + desc->status &= ~IRQ_LEVEL; #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + desc->status |= IRQ_MOVE_PCNTXT; if (trigger) set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, handle_fasteoi_irq, @@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger) return; } #endif - if (trigger) + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); @@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq, static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, int trigger, int polarity) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct IO_APIC_route_entry entry; cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; + cfg = irq_cfg(irq); + mask = TARGET_CPUS; if (assign_irq_vector(irq, mask)) return; @@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq, first_notcon = 1; + int apic, pin, idx, irq; + int notcon = 0; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - irq = pin_2_irq(idx, apic, pin); - add_pin_to_irq(irq, apic, pin); + idx = find_irq_entry(apic, pin, mp_INT); + if (idx == -1) { + if (!notcon) { + notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); + continue; + } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } + irq = pin_2_irq(idx, apic, pin); +#ifdef CONFIG_X86_32 + if (multi_timer_check(apic, irq)) + continue; +#endif + add_pin_to_irq(irq, apic, pin); + + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); + } } - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); + if (notcon) + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); } /* @@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; +#endif memset(&entry, 0, sizeof(entry)); @@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; + union IO_APIC_reg_03 reg_03; unsigned long flags; + struct irq_cfg *cfg; + unsigned int irq; if (apic_verbosity == APIC_QUIET) return; @@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); @@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - if (reg_01.bits.version >= 0x10) { + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); } + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && + reg_03.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + } + printk(KERN_DEBUG ".... IRQ redirection table:\n"); printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" @@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) + for_each_irq_cfg(irq, cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } printk("\n"); } @@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) __apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; - unsigned long icr; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (!APIC_XAPIC(ver)) { + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + } + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + } + + /* + * Remote read supported only in the 82489DX and local APIC for + * Pentium processors. + */ + if (!APIC_INTEGRATED(ver) || maxlvt == 3) { + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + } - v = apic_read(APIC_EOI); - printk(KERN_DEBUG "... APIC EOI: %08x\n", v); - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); v = apic_read(APIC_LDR); printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + if (!x2apic_enabled()) { + v = apic_read(APIC_DFR); + printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + } v = apic_read(APIC_SPIV); printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); @@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } icr = apic_icr_read(); printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); @@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy) __apicdebuginit(void) print_all_local_APICs(void) { - on_each_cpu(print_local_APIC, NULL, 1); + int cpu; + + preempt_disable(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, print_local_APIC, NULL, 1); + preempt_enable(); } __apicdebuginit(void) print_PIC(void) @@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void) fs_initcall(print_all_ICs); +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; - int i, apic; + int apic; unsigned long flags; - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } +#ifdef CONFIG_X86_32 + int i; + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; +#endif /* * The number of IO-APIC IRQ registers (== #pins): @@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void) } found_i8259: /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); i8259_apic = find_isa_irq_apic(0, mp_ExtINT); /* Trust the MP table if nothing is setup in the hardware */ @@ -1458,6 +1818,133 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +#ifdef CONFIG_X86_32 +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc(void) +{ + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + return; + + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mp_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(phys_id_present_map, + mp_ioapics[apic].mp_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mp_apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; + if (i >= get_physical_broadcast()) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); + mp_ioapics[apic].mp_apicid = i; + } else { + physid_mask_t tmp; + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", + mp_ioapics[apic].mp_apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mp_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mp_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, reg_00.raw); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); + } +} +#endif + +int no_timer_check __initdata; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void) unsigned long t1 = jiffies; unsigned long flags; + if (no_timer_check) + return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ @@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { - struct irq_cfg *cfg = &irq_cfg[irq]; + + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; spin_lock_irqsave(&vector_lock, flags); @@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } +#else +static int ioapic_retrigger_irq(unsigned int irq) +{ + send_IPI_self(irq_cfg(irq)->vector); + + return 1; +} +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); */ static void migrate_ioapic_irq(int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; - struct irq_desc *desc = irq_desc + irq; + struct irq_cfg *cfg; + struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; - int modify_ioapic_rte = desc->status & IRQ_LEVEL; + int modify_ioapic_rte; unsigned int dest; unsigned long flags; @@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); @@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc->affinity = mask; } static int migrate_irq_remapped_level(int irq) { int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); mask_IO_APIC_irq(irq); if (io_apic_level_ack_pending(irq)) { /* - * Interrupt in progress. Migrating irq now will change the + * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse * the EOI broadcast performed by cpu. * So, delay the irq migration to the next instance. @@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + migrate_ioapic_irq(irq, desc->pending_mask); ret = 0; - irq_desc[irq].status &= ~IRQ_MOVE_PENDING; - cpus_clear(irq_desc[irq].pending_mask); + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq(irq); @@ -1656,10 +2160,10 @@ unmask: static void ir_irq_migration(struct work_struct *work) { - int irq; + unsigned int irq; + struct irq_desc *desc; - for (irq = 0; irq < NR_IRQS; irq++) { - struct irq_desc *desc = irq_desc + irq; + for_each_irq_desc(irq, desc) { if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, - irq_desc[irq].pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work) */ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - if (irq_desc[irq].status & IRQ_LEVEL) { - irq_desc[irq].status |= IRQ_MOVE_PENDING; - irq_desc[irq].pending_mask = mask; + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; migrate_irq_remapped_level(irq); return; } @@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; ack_APIC_irq(); +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); me = smp_processor_id(); @@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq >= NR_IRQS) + + desc = irq_to_desc(irq); + if (!desc) continue; - desc = irq_desc + irq; - cfg = irq_cfg + irq; + cfg = irq_cfg(irq); spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -1730,7 +2238,7 @@ unlock: static void irq_complete_move(unsigned int irq) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg = irq_cfg(irq); unsigned vector, me; if (likely(!cfg->move_in_progress)) @@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +atomic_t irq_mis_count; + static void ack_apic_level(unsigned int irq) { +#ifdef CONFIG_X86_32 + unsigned long v; + int i; +#endif int do_unmask_irq = 0; irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_IO_APIC_irq(irq); } #endif +#ifdef CONFIG_X86_32 + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); +#endif + /* * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly. @@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq(irq); } + +#ifdef CONFIG_X86_32 + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +#endif } static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, + .set_affinity = set_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; #ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, + .set_affinity = set_ir_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; @@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { static inline void init_IO_APIC_traps(void) { int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; /* * NOTE! The local APIC isn't very good at handling @@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < NR_IRQS ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { + for_each_irq_cfg(irq, cfg) { + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; + desc->chip = &no_irq_chip; + } } } } -static void unmask_lapic_irq(unsigned int irq) +/* + * The local APIC irq-chip implementation: + */ + +static void mask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); } -static void mask_lapic_irq(unsigned int irq) +static void unmask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } static void ack_lapic_irq (unsigned int irq) @@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { - irq_desc[irq].status &= ~IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } @@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq) static void __init setup_nmi(void) { /* - * Dirty trick to enable the NMI watchdog ... + * Dirty trick to enable the NMI watchdog ... * We put the 8259A master into AEOI mode and * unmask on all local APICs LVT0 as NMI. * * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') * is from Maciej W. Rozycki - so we do not have to EOI from * the NMI handler or the timer interrupt. - */ - printk(KERN_INFO "activating NMI Watchdog ..."); + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); enable_NMI_through_LVT0(); - printk(" done.\n"); + apic_printk(APIC_VERBOSE, " done.\n"); } /* @@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void) unsigned char save_control, save_freq_select; pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } apic = find_isa_irq_apic(8, mp_INT); - if (pin == -1) + if (apic == -1) { + WARN_ON_ONCE(1); return; + } entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } +static int disable_timer_pin_1 __initdata; +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +static int __init disable_timer_pin_setup(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", disable_timer_pin_setup); + +int timer_through_8259 __initdata; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. * - * FIXME: really need to revamp this for modern platforms only. + * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg + 0; + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; + unsigned int ver; int no_pin1 = 0; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2013,10 +2593,18 @@ static inline void __init check_timer(void) /* * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); +#ifdef CONFIG_X86_32 + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2035,8 +2623,10 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); +#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2054,7 +2644,7 @@ static inline void __init check_timer(void) setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { + if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); enable_8259A_irq(0); @@ -2063,8 +2653,10 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2104,6 +2696,9 @@ static inline void __init check_timer(void) "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2140,13 +2735,6 @@ out: local_irq_restore(flags); } -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - /* * Traditionally ISA IRQ2 is the cascade IRQ, and is not available * to devices. However there may be an I/O APIC pin available for @@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck); * the I/O APIC in all cases now. No actual device should request * it anyway. --macro */ -#define PIC_IRQS (1<<2) +#define PIC_IRQS (1 << PIC_CASCADE_IR) void __init setup_IO_APIC(void) { +#ifdef CONFIG_X86_32 + enable_IO_APIC(); +#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ +#endif io_apic_irqs = ~PIC_IRQS; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); } +/* + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path + */ + +static int __init io_apic_bug_finalize(void) +{ + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; +} + +late_initcall(io_apic_bug_finalize); + struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; @@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs); /* * Dynamic irq allocate and deallocation */ -int create_irq(void) +unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - int irq; - int new; + unsigned int irq; + unsigned int new; unsigned long flags; + struct irq_cfg *cfg_new; + + irq_want = nr_irqs - 1; - irq = -ENOSPC; + irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_cfg[new].vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; } spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) { + if (irq > 0) { dynamic_irq_init(irq); } return irq; } +int create_irq(void) +{ + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; +} + void destroy_irq(unsigned int irq) { unsigned long flags; @@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; unsigned dest; cpumask_t tmp; @@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP @@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif #endif /* CONFIG_SMP */ @@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) if (index < 0) { printk(KERN_ERR "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); + pci_name(dev)); return -ENOSPC; } return index; @@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); /* * irq migration in process context */ @@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); + return 0; } +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - int irq, ret; + unsigned int irq; + int ret; + unsigned int irq_want; - irq = create_irq(); - if (irq < 0) - return irq; + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) @@ -2574,18 +3230,22 @@ error: int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret, sub_handle; + unsigned int irq; + int ret, sub_handle; struct msi_desc *desc; + unsigned int irq_want; + #ifdef CONFIG_INTR_REMAP struct intel_iommu *iommu = 0; int index = 0; #endif + irq_want = build_irq_for_pci_dev(dev) + 0x100; sub_handle = 0; list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq(); - if (irq < 0) - return irq; + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq) } #endif +#ifdef CONFIG_HPET_TIMER + +#ifdef CONFIG_SMP +static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + hpet_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .unmask = hpet_msi_unmask, + .mask = hpet_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = hpet_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_hpet_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + + hpet_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, + "edge"); + + return 0; +} +#endif + #endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support @@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif @@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; cpumask_t tmp; @@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); + + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); } return err; } #endif /* CONFIG_HT_IRQ */ +#ifdef CONFIG_X86_64 +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long flags; + int err; + + err = assign_irq_vector(irq, *eligible_cpu); + if (err != 0) + return err; + + spin_lock_irqsave(&vector_lock, flags); + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + spin_unlock_irqrestore(&vector_lock, flags); + + cfg = irq_cfg(irq); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->vector = cfg->vector; + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cpu_mask_to_apicid(*eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->mask = 1; + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} +#endif /* CONFIG_X86_64 */ + +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + +int __init probe_nr_irqs(void) +{ + int idx; + int nr = 0; +#ifndef CONFIG_XEN + int nr_min = 32; +#else + int nr_min = NR_IRQS; +#endif + + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + /* double it for hotplug and msi and nmi */ + nr <<= 1; + + /* something wrong ? */ + if (nr < nr_min) + nr = nr_min; + + return nr; +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI -#define IO_APIC_MAX_ID 0xFE +#ifdef CONFIG_X86_32 +int __init io_apic_get_unique_id(int ioapic, int apic_id) +{ + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + physid_mask_t tmp; + unsigned long flags; + int i = 0; -int __init io_apic_get_redir_entries (int ioapic) + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= get_physical_broadcast()) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { + if (!check_apicid_used(apic_id_map, i)) + break; + } + + if (i == get_physical_broadcast()) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + tmp = apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } + } + + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + +int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic) reg_01.raw = io_apic_read(ioapic, 1); spin_unlock_irqrestore(&ioapic_lock, flags); - return reg_01.bits.entries; + return reg_01.bits.version; } - +#endif int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { @@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; if (skip_ioapic_setup == 1) return; @@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - if (!irq_cfg[irq].vector) + cfg = irq_cfg(irq); + if (!cfg->vector) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); @@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; + irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif } else { +#ifdef CONFIG_X86_32 +fake_ioapic_page: +#endif ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %016lx (%016lx)\n", + "mapped IOAPIC to %08lx (%08lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; @@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void) /* Insert the IO APIC resources after PCI initialization has occured to handle * IO APICS that are mapped in on a BAR in PCI space. */ late_initcall(ioapic_insert_resources); - diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c deleted file mode 100644 index e710289f673..00000000000 --- a/arch/x86/kernel/io_apic_32.c +++ /dev/null @@ -1,2908 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and - * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, - * further tested and cleaned up by Zach Brown <zab@redhat.com> - * and Ingo Molnar <mingo@redhat.com> - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/bootmem.h> -#include <linux/mc146818rtc.h> -#include <linux/compiler.h> -#include <linux/acpi.h> -#include <linux/module.h> -#include <linux/sysdev.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <linux/htirq.h> -#include <linux/freezer.h> -#include <linux/kthread.h> -#include <linux/jiffies.h> /* time_after() */ - -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/desc.h> -#include <asm/timer.h> -#include <asm/i8259.h> -#include <asm/nmi.h> -#include <asm/msidef.h> -#include <asm/hypertransport.h> -#include <asm/setup.h> - -#include <mach_apic.h> -#include <mach_apicdef.h> - -#define __apicdebuginit(type) static type __init - -int (*ioapic_renumber_irq)(int ioapic, int irq); -atomic_t irq_mis_count; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); - -int timer_through_8259 __initdata; - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -static int disable_timer_pin_1 __initdata; - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -static struct irq_pin_list { - int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - volatile struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - static int first_free_entry = NR_IRQS; - struct irq_pin_list *entry = irq_2_pin + irq; - - while (entry->next) - entry = irq_2_pin + entry->next; - - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); - } - entry->apic = apic; - entry->pin = pin; -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - - while (1) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - } - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; - - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} - -static void mask_IO_APIC_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) -{ - unsigned long flags; - int pin; - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int apicid_value; - cpumask_t tmp; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); - - apicid_value = cpu_mask_to_apicid(cpumask); - /* Prepare to do the io_apic_write */ - apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - irq_desc[irq].affinity = cpumask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#if defined(CONFIG_IRQBALANCE) -# include <asm/processor.h> /* kernel_thread() */ -# include <linux/kernel_stat.h> /* kstat */ -# include <linux/slab.h> /* kmalloc() */ -# include <linux/timer.h> - -#define IRQBALANCE_CHECK_ARCH -999 -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; -static int physical_balance __read_mostly; -static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; - -static struct irq_cpu_info { - unsigned long *last_irq; - unsigned long *irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) - -static cpumask_t balance_irq_affinity[NR_IRQS] = { - [0 ... NR_IRQS-1] = CPU_MASK_ALL -}; - -void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ - balance_irq_affinity[irq] = mask; -} - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu, now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) - set_pending_irq(irq, cpumask_of_cpu(new_cpu)); -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - - for_each_online_cpu(i) { - for (j = 0; j < NR_IRQS; j++) { - if (!irq_desc[j].action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - - for_each_possible_cpu(i) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ or balancing disabled ? */ - if (!irq_desc[j].action || irq_balancing_disabled(j)) - continue; - if (package_index == i) - IRQ_DELTA(package_index, j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i, j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i, j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index, j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* - * Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* - * In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* - * if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) - goto not_worth_the_effort; - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { - /* Is this an active IRQ? */ - if (!irq_desc[j].action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded, j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded, j)) { - move_this_load = IRQ_DELTA(max_loaded, j); - selected_irq = j; - } - } - if (selected_irq == -1) - goto tryanothercpu; - - imbalance = move_this_load; - - /* For physical_balance case, we accumulated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, - cpu_online_map, - balance_irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - /* mark for change destination */ - set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); - set_pending_irq(i, cpumask_of_cpu(0)); - } - - set_freezable(); - for ( ; ; ) { - time_remaining = schedule_timeout_interruptible(time_remaining); - try_to_freeze(); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - preempt_disable(); - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - preempt_enable(); - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); - irq_cpu_data[i].irq_delta = NULL; - kfree(irq_cpu_data[i].last_irq); - irq_cpu_data[i].last_irq = NULL; - } - return 0; -} - -int __devinit irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 1; -} - -__setup("noirqbalance", irqbalance_disable); - -late_initcall(balanced_irq_init); -#endif /* CONFIG_IRQBALANCE */ -#endif /* CONFIG_SMP */ - -#ifndef CONFIG_SMP -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP */ - - -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; -int skip_ioapic_setup; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - - return mp_irqs[i].mp_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for (apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " - "slot:%d, pin:%d.\n", bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mp_irqflag & 3) { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { - case 0: /* conforms, ie. bus-type dependent */ - { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mp_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mp_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].mp_srcbusirq; - else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - } - - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } - return irq; -} - -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; - -static int __assign_irq_vector(int irq) -{ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset; - int vector, offset; - - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - - if (irq_vector[irq] > 0) - return irq_vector[irq]; - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (vector == current_vector) - return -ENOSPC; - if (test_and_set_bit(vector, used_vectors)) - goto next; - - current_vector = vector; - current_offset = offset; - irq_vector[irq] = vector; - - return vector; -} - -static int assign_irq_vector(int irq) -{ - unsigned long flags; - int vector; - - spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); - - return vector; -} - -static struct irq_chip ioapic_chip; - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) -{ - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { - irq_desc[irq].status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); - } - set_intr_gate(vector, interrupt[irq]); -} - -static void __init setup_IO_APIC_irqs(void) -{ - struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = - cpu_mask_to_apicid(TARGET_CPUS); - - idx = find_irq_entry(apic, pin, mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mp_apicid, - pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mp_apicid, pin); - continue; - } - - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } - - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if (multi_timer_check(apic, irq)) - continue; - else - add_pin_to_irq(irq, apic, pin); - - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, - int vector) -{ - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 1; /* mask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - ioapic_register_intr(0, vector, IOAPIC_EDGE); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); -} - - -__apicdebuginit(void) print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) - continue; - printk(KERN_DEBUG "IRQ%d ", i); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -__apicdebuginit(void) print_APIC_bitfield(int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1<<j)) - printk("1"); - else - printk("0"); - } - printk("\n"); - } -} - -__apicdebuginit(void) print_local_APIC(void *dummy) -{ - unsigned int v, ver, maxlvt; - u64 icr; - - if (apic_verbosity == APIC_QUIET) - return; - - printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(v)); - v = apic_read(APIC_LVR); - printk(KERN_INFO "... APIC VERSION: %08x\n", v); - ver = GET_APIC_VERSION(v); - maxlvt = lapic_get_maxlvt(); - - v = apic_read(APIC_TASKPRI); - printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } - - v = apic_read(APIC_EOI); - printk(KERN_DEBUG "... APIC EOI: %08x\n", v); - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); - v = apic_read(APIC_LDR); - printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); - v = apic_read(APIC_SPIV); - printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); - - printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_bitfield(APIC_ISR); - printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_bitfield(APIC_TMR); - printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_bitfield(APIC_IRR); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -__apicdebuginit(void) print_all_local_APICs(void) -{ - on_each_cpu(print_local_APIC, NULL, 1); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b, 0xa0); - outb(0x0b, 0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a, 0xa0); - outb(0x0a, 0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - - -static void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int i, apic; - unsigned long flags; - - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for (apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest.physical.physical_dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) - return; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mp_apicid; - - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mp_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - if (time_after(jiffies, t1 + 4)) - return 1; - - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Startup quirk: - * - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - * - * (We do this for level-triggered IRQs too - it cannot hurt.) - */ -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -static void ack_ioapic_irq(unsigned int irq) -{ - move_native_irq(irq); - ack_APIC_irq(); -} - -static void ack_ioapic_quirk_irq(unsigned int irq) -{ - unsigned long v; - int i; - - move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_vector[irq]; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_vector[irq]); - - return 1; -} - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_ioapic_irq, - .eoi = ack_ioapic_quirk_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - - -static inline void init_IO_APIC_traps(void) -{ - int irq; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for (irq = 0; irq < NR_IRQS ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else - /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void ack_lapic_irq(unsigned int irq) -{ - ack_APIC_irq(); -} - -static void mask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq, int vector) -{ - irq_desc[irq].status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); - set_intr_gate(vector, interrupt[irq]); -} - -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - */ -static inline void __init check_timer(void) -{ - int apic1, pin1, apic2, pin2; - int no_pin1 = 0; - int vector; - unsigned int ver; - unsigned long flags; - - local_irq_save(flags); - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, vector); - } - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, vector); - unmask_IO_APIC_irq(0); - enable_8259A_irq(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - goto out; - } - /* - * Cleanup, just in case ... - */ - disable_8259A_irq(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } - timer_ack = 0; - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0, vector); - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - init_8259A(0); - make_8259A_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - int i; - - /* Reserve all the system vectors. */ - for (i = first_system_vector; i < NR_VECTORS; i++) - set_bit(i, used_vectors); - - enable_IO_APIC(); - - io_apic_irqs = ~PIC_IRQS; - - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -} - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - entry[i] = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device *dev; - int i, size, error = 0; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -int create_irq(void) -{ - /* Allocate an unused irq */ - int irq, new, vector = 0; - unsigned long flags; - - irq = -ENOSPC; - spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { - if (platform_legacy_irq(new)) - continue; - if (irq_vector[new] != 0) - continue; - vector = __assign_irq_vector(new); - if (likely(vector > 0)) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq >= 0) { - set_intr_gate(vector, interrupt[irq]); - dynamic_irq_init(irq); - } - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - - spin_lock_irqsave(&vector_lock, flags); - clear_bit(irq_vector[irq], used_vectors); - irq_vector[irq] = 0; - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - int vector; - unsigned dest; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - dest = cpu_mask_to_apicid(TARGET_CPUS); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? -MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? -MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); - } - return vector; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - int vector; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - vector = assign_irq_vector(irq); - if (vector < 0) - return; - - dest = cpu_mask_to_apicid(mask); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; -} -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - struct msi_msg msg; - int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, - "edge"); - - return 0; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#endif /* CONFIG_PCI_MSI */ - -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - unsigned int dest; - cpumask_t tmp; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); - - dest = cpu_mask_to_apicid(mask); - - target_ht_irq(irq, dest); - irq_desc[irq].affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - int vector; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - struct ht_irq_msg msg; - unsigned dest; - cpumask_t tmp; - - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return vector; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - - -int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} - - -int __init io_apic_get_redir_entries(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) -{ - struct IO_APIC_route_entry entry; - - if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - entry.vector = assign_irq_vector(irq); - - apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - ioapic_write_entry(ioapic, pin, entry); - - return 0; -} - -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) -{ - int i; - - if (skip_ioapic_setup) - return -1; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) - return -1; - - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); - return 0; -} - -#endif /* CONFIG_ACPI */ - -static int __init parse_disable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); - -static int __init parse_enable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = -1; - return 0; -} -early_param("enable_timer_pin_1", parse_enable_timer_pin_1); - -static int __init parse_noapic(char *arg) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -void __init ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } - } else { -fake_ioapic_page: - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } -} - diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c new file mode 100644 index 00000000000..d1d4dc52f64 --- /dev/null +++ b/arch/x86/kernel/irq.c @@ -0,0 +1,189 @@ +/* + * Common interrupt code for 32 and 64 bit + */ +#include <linux/cpu.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/seq_file.h> + +#include <asm/apic.h> +#include <asm/io_apic.h> +#include <asm/smp.h> + +atomic_t irq_err_count; + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But only ack when the APIC is enabled -AK + */ + if (cpu_has_apic) + ack_APIC_irq(); +#endif +} + +#ifdef CONFIG_X86_32 +# define irq_stats(x) (&per_cpu(irq_stat, x)) +#else +# define irq_stats(x) cpu_pda(x) +#endif +/* + * /proc/interrupts printing: + */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +# ifdef CONFIG_X86_64 + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +# endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + return 0; +} + +int show_interrupts(struct seq_file *p, void *v) +{ + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + + if (i == nr_irqs) + return show_other_interrupts(p); + + /* print header */ + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%-8d", j); + seq_putc(p, '\n'); + } + + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); +#endif + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); +#endif + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); + + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } + + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = irq_stats(cpu)->__nmi_count; + +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += irq_stats(cpu)->irq_resched_count; + sum += irq_stats(cpu)->irq_call_count; + sum += irq_stats(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += irq_stats(cpu)->irq_thermal_count; +# ifdef CONFIG_X86_64 + sum += irq_stats(cpu)->irq_threshold_count; +#endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b71e02d42f4..a51382672de 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) @@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_desc + irq; + int overflow; + unsigned vector = ~regs->orig_ax; + struct irq_desc *desc; + unsigned irq; - if (unlikely((unsigned)irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, irq); - BUG(); - } old_regs = set_irq_regs(regs); irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; overflow = check_stack_overflow(); + desc = irq_to_desc(irq); + if (unlikely(!desc)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", + __func__, irq, vector, smp_processor_id()); + BUG(); + } + if (!execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); @@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs) return 1; } -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - unsigned any_count = 0; - - spin_lock_irqsave(&irq_desc[i].lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; -#endif - action = irq_desc[i].action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#endif -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = nmi_count(cpu); - -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).apic_timer_irqs; -#endif -#ifdef CONFIG_SMP - sum += per_cpu(irq_stat, cpu).irq_resched_count; - sum += per_cpu(irq_stat, cpu).irq_call_count; - sum += per_cpu(irq_stat, cpu).irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += per_cpu(irq_stat, cpu).irq_thermal_count; -#endif -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).irq_spurious_count; -#endif - return sum; -} - -u64 arch_irq_stat(void) -{ - u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif - return sum; -} - #ifdef CONFIG_HOTPLUG_CPU #include <mach_apic.h> @@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < NR_IRQS; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; + if (irq == 2) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + cpus_and(mask, desc->affinity, map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); + else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f065fe9071b..60eb84eb77a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,28 +18,6 @@ #include <asm/idle.h> #include <asm/smp.h> -atomic_t irq_err_count; - -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But don't ack when the APIC is disabled. -AK - */ - if (!disable_apic) - ack_APIC_irq(); -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* * Probabilistic stack overflow check: @@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) #endif /* - * Generic, controller-independent functions: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - unsigned any_count = 0; - - spin_lock_irqsave(&irq_desc[i].lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; -#endif - action = irq_desc[i].action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); - seq_printf(p, "THR: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); - } - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = cpu_pda(cpu)->__nmi_count; - - sum += cpu_pda(cpu)->apic_timer_irqs; -#ifdef CONFIG_SMP - sum += cpu_pda(cpu)->irq_resched_count; - sum += cpu_pda(cpu)->irq_call_count; - sum += cpu_pda(cpu)->irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += cpu_pda(cpu)->irq_thermal_count; - sum += cpu_pda(cpu)->irq_threshold_count; -#endif - sum += cpu_pda(cpu)->irq_spurious_count; - return sum; -} - -u64 arch_irq_stat(void) -{ - return atomic_read(&irq_err_count); -} - -/* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). @@ -188,6 +50,7 @@ u64 arch_irq_stat(void) asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(irq < NR_IRQS)) - generic_handle_irq(irq); + desc = irq_to_desc(irq); + if (likely(desc)) + generic_handle_irq_desc(irq, desc); else { if (!disable_apic) ack_APIC_irq(); @@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < NR_IRQS; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; int break_affinity = 0; int set_affinity = 1; @@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map) continue; /* interrupt's are disabled at this point */ - spin_lock(&irq_desc[irq].lock); + spin_lock(&desc->lock); if (!irq_has_action(irq) || - cpus_equal(irq_desc[irq].affinity, map)) { - spin_unlock(&irq_desc[irq].lock); + cpus_equal(desc->affinity, map)) { + spin_unlock(&desc->lock); continue; } - cpus_and(mask, irq_desc[irq].affinity, map); + cpus_and(mask, desc->affinity, map); if (cpus_empty(mask)) { break_affinity = 1; mask = map; } - if (irq_desc[irq].chip->mask) - irq_desc[irq].chip->mask(irq); + if (desc->chip->mask) + desc->chip->mask(irq); - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); else if (!(warned++)) set_affinity = 0; - if (irq_desc[irq].chip->unmask) - irq_desc[irq].chip->unmask(irq); + if (desc->chip->unmask) + desc->chip->unmask(irq); - spin_unlock(&irq_desc[irq].lock); + spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9200a1e2752..845aa9803e8 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -69,6 +69,13 @@ void __init init_ISA_irqs (void) * 16 old-style INTA-cycle interrupts: */ for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); } @@ -83,6 +90,27 @@ static struct irqaction irq2 = { .name = "cascade", }; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,22 +126,14 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i]); } -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -128,6 +148,9 @@ void __init native_init_IRQ(void) /* IPI for single call function */ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 5b5be9d43c2..ff023539128 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,23 +142,19 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, + for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } } } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19262482021..1972266e8ba 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,8 +9,6 @@ #include <asm/calgary.h> #include <asm/amd_iommu.h> -static int forbid_dac __read_mostly; - struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -293,17 +291,3 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected." - "Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); -#endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cd8c0ed02b7..c958120fb1b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -63,6 +63,13 @@ void idle_notifier_register(struct notifier_block *n) { atomic_notifier_chain_register(&idle_notifier, n); } +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index f6a11b9b1f9..67465ed8931 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(word & (1 << 13))) { dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " "disabling irq balancing and affinity\n"); -#ifdef CONFIG_IRQBALANCE - irqbalance_disable(""); -#endif noirqdebug_setup(""); #ifdef CONFIG_PROC_FS no_irq_affinity = 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b2c97874ec0..0fa6790c1dd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1073,6 +1073,7 @@ void __init setup_arch(char **cmdline_p) #endif prefill_possible_map(); + #ifdef CONFIG_X86_64 init_cpu_to_node(); #endif @@ -1080,6 +1081,9 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); + /* need to wait for io_apic is mapped */ + nr_irqs = probe_nr_irqs(); + kvm_guest_init(); e820_reserve_resources(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0e67f72d931..ae0c0d3bb77 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size = PERCPU_ENOUGH_ROOM; + ssize_t size, old_size; char *ptr; int cpu; + unsigned long align = 1; /* Setup cpu_pda map */ setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; + old_size = PERCPU_ENOUGH_ROOM; + align = max_t(unsigned long, PAGE_SIZE, align); + size = roundup(old_size, align); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); @@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void) cpu, __pa(ptr)); } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, + __pa(MAX_DMA_ADDRESS)); if (ptr) printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); @@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", @@ -213,7 +218,7 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", + pr_debug("Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); /* node_to_cpumask() will now work */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7ed9e070a6e..7b109339731 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); + printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); + printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); /* * Wait for idle. @@ -874,7 +874,7 @@ do_rest: start_ip = setup_trampoline(); /* So we see what's up */ - printk(KERN_INFO "Booting processor %d/%d ip %lx\n", + printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", cpu, apicid, start_ip); /* @@ -893,9 +893,11 @@ do_rest: smpboot_setup_warm_reset_vector(start_ip); /* * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /* diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 3d1be4f0fac..de87d600829 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef ASM_X86__UNISTD_64_H +#undef _ASM_X86_UNISTD_64_H #include <asm/unistd_64.h> #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef ASM_X86__UNISTD_64_H +#undef _ASM_X86_UNISTD_64_H typedef void (*sys_call_ptr_t)(void); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 8b8c0d6640f..04431f34fd1 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -6,7 +6,7 @@ * This code is released under the GNU General Public License version 2 or * later. */ -#include <linux/mc146818rtc.h> +#include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/kernel.h> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e062974cce3..04d242ab016 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -931,14 +931,6 @@ do_device_not_available(struct pt_regs *regs, long error) } #ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_MCE -dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) -{ - conditional_sti(regs); - machine_check_vector(regs, error); -} -#endif - dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c new file mode 100644 index 00000000000..aeef529917e --- /dev/null +++ b/arch/x86/kernel/uv_irq.c @@ -0,0 +1,79 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/irq.h> + +#include <asm/apic.h> +#include <asm/uv/uv_irq.h> + +static void uv_noop(unsigned int irq) +{ +} + +static unsigned int uv_noop_ret(unsigned int irq) +{ + return 0; +} + +static void uv_ack_apic(unsigned int irq) +{ + ack_APIC_irq(); +} + +struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .startup = uv_noop_ret, + .shutdown = uv_noop, + .enable = uv_noop, + .disable = uv_noop, + .ack = uv_noop, + .mask = uv_noop, + .unmask = uv_noop, + .eoi = uv_ack_apic, + .end = uv_noop, +}; + +/* + * Set up a mapping of an available irq and vector, and enable the specified + * MMR that defines the MSI that is to be sent to the specified CPU when an + * interrupt is raised. + */ +int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + int irq; + int ret; + + irq = create_irq(); + if (irq <= 0) + return -EBUSY; + + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); + if (ret != irq) + destroy_irq(irq); + + return ret; +} +EXPORT_SYMBOL_GPL(uv_setup_irq); + +/* + * Tear down a mapping of an irq and vector, and disable the specified MMR that + * defined the MSI that was to be sent to the specified CPU when an interrupt + * was raised. + * + * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). + */ +void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +{ + arch_disable_uv_irq(mmr_blade, mmr_offset); + destroy_irq(irq); +} +EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c new file mode 100644 index 00000000000..67f9b9dbf80 --- /dev/null +++ b/arch/x86/kernel/uv_sysfs.c @@ -0,0 +1,72 @@ +/* + * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include <linux/sysdev.h> +#include <asm/uv/bios.h> + +struct kobject *sgi_uv_kobj; + +static ssize_t partition_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); +} + +static ssize_t coherence_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); +} + +static struct kobj_attribute partition_id_attr = + __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); + +static struct kobj_attribute coherence_id_attr = + __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); + + +static int __init sgi_uv_sysfs_init(void) +{ + unsigned long ret; + + if (!sgi_uv_kobj) + sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); + if (!sgi_uv_kobj) { + printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); + return -EINVAL; + } + + ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); + return ret; + } + + ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); + return ret; + } + + return 0; +} + +device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 61a97e616f7..0c9667f0752 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq) static unsigned int startup_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) - irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); + if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) + desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); return 0; @@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq) static void end_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) + if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); } @@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) spin_unlock_irqrestore(&i8259A_lock, flags); - desc = irq_desc + realirq; + desc = irq_to_desc(realirq); /* * handle this 'virtual interrupt' as a Cobalt one now. */ - kstat_cpu(smp_processor_id()).irqs[realirq]++; + kstat_incr_irqs_this_cpu(realirq, desc); if (likely(desc->action != NULL)) handle_IRQ_event(realirq, desc->action); @@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void) int i; for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = 0; - irq_desc[i].depth = 1; + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = 0; + desc->depth = 1; if (i == 0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE1) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_8259) { - irq_desc[i].chip = &piix4_master_irq_type; + desc->chip = &piix4_master_irq_type; } else if (i < CO_IRQ_APIC0) { - irq_desc[i].chip = &piix4_virtual_irq_type; + desc->chip = &piix4_virtual_irq_type; } else if (IS_CO_APIC(i)) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } } diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 6953859fe28..254ee07f863 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void) void __init vmi_time_init(void) { + unsigned int cpu; /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9abac8a9d82..b13acb75e82 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -248,7 +248,7 @@ clear: * This will be saved when ever the FP and extended state context is * saved on the user stack during the signal handler delivery to the user. */ -void prepare_fx_sw_frame(void) +static void prepare_fx_sw_frame(void) { int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + FP_XSTATE_MAGIC2_SIZE; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 634132a9a51..11c6725fb79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -204,10 +204,10 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) if (vcpu0 && waitqueue_active(&vcpu0->wq)) wake_up_interruptible(&vcpu0->wq); - pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); - pt->scheduled = ktime_to_ns(pt->timer.expires); + hrtimer_add_expires_ns(&pt->timer, pt->period); + pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = pt->timer.expires; + ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); return (pt->period == 0 ? 0 : 1); } @@ -257,7 +257,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) timer = &pit->pit_state.pit_timer.timer; if (hrtimer_cancel(timer)) - hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } static void destroy_pit_timer(struct kvm_kpit_timer *pt) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6571926bfd3..0fc3cab4894 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -946,9 +946,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) if (apic_lvtt_period(apic)) { result = 1; - apic->timer.dev.expires = ktime_add_ns( - apic->timer.dev.expires, - apic->timer.period); + hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period); } return result; } @@ -1117,7 +1115,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) timer = &apic->timer.dev; if (hrtimer_cancel(timer)) - hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 65f0b8a47be..48ee4f9435f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void) for (i = 0; i < LGUEST_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, interrupt[i]); + set_intr_gate(vector, interrupt[vector]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, handle_level_irq, "level"); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index df37fc9d6a2..3c3b471ea49 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; +static cpumask_t vector_allocation_domain(int cpu) +{ + return cpumask_of_cpu(cpu); +} static int probe_bigsmp(void) { diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 6513d41ea21..28459cab3dd 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8cf58394975..71a309b122e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6ad6b67a723..6272b5e69da 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -23,4 +23,18 @@ static int probe_summit(void) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 199a5f4a873..0f6e8a6523a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq) * the interrupt off to another CPU */ static void before_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); __u8 cpu = smp_processor_id(); _raw_spin_lock(&vic_irq_lock); @@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq) /* Finish the VIC interrupt: basically mask */ static void after_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); _raw_spin_lock(&vic_irq_lock); { diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 672e17f8262..9cab18b0b85 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -61,9 +61,9 @@ static void __init memtest(unsigned long start_phys, unsigned long size, last_bad += incr; } else { if (start_bad) { - printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved", + printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + reserve_early(start_bad, last_bad + incr, "BAD RAM"); } start_bad = last_bad = start_phys_aligned; } @@ -72,9 +72,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, if (start_bad) { printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + reserve_early(start_bad, last_bad + incr, "BAD RAM"); } - } /* default is disabled */ diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 635b50e8558..2c4baa88f2c 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -56,13 +56,6 @@ struct remap_trace { static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); -#if 0 /* XXX: no way gather this info anymore */ -/* Access to this is not per-cpu. */ -static DEFINE_PER_CPU(atomic_t, dropped); -#endif - -static struct dentry *marker_file; - static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; @@ -75,7 +68,7 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_record is allowed. + * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ @@ -97,44 +90,6 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } -#if 0 /* XXX: needs rewrite */ -/* - * Write callback for the debugfs entry: - * Read a marker and write it to the mmio trace log - */ -static ssize_t write_marker(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - char *event = NULL; - struct mm_io_header *headp; - ssize_t len = (count > 65535) ? 65535 : count; - - event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); - if (!event) - return -ENOMEM; - - headp = (struct mm_io_header *)event; - headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); - headp->data_len = len; - - if (copy_from_user(event + sizeof(*headp), buffer, len)) { - kfree(event); - return -EFAULT; - } - - spin_lock_irq(&trace_lock); -#if 0 /* XXX: convert this to use tracing */ - if (is_enabled()) - relay_write(chan, event, sizeof(*headp) + len); - else -#endif - len = -EINVAL; - spin_unlock_irq(&trace_lock); - kfree(event); - return len; -} -#endif - static void print_pte(unsigned long address) { unsigned int level; @@ -307,8 +262,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, map.map_id = trace->id; spin_lock_irq(&trace_lock); - if (!is_enabled()) + if (!is_enabled()) { + kfree(trace); goto not_enabled; + } mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); @@ -377,6 +334,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr) iounmap_trace_core(addr); } +int mmiotrace_printk(const char *fmt, ...) +{ + int ret = 0; + va_list args; + unsigned long flags; + va_start(args, fmt); + + spin_lock_irqsave(&trace_lock, flags); + if (is_enabled()) + ret = mmio_trace_printk(fmt, args); + spin_unlock_irqrestore(&trace_lock, flags); + + va_end(args); + return ret; +} +EXPORT_SYMBOL(mmiotrace_printk); + static void clear_trace_list(void) { struct remap_trace *trace; @@ -462,26 +436,12 @@ static void leave_uniprocessor(void) } #endif -#if 0 /* XXX: out of order */ -static struct file_operations fops_marker = { - .owner = THIS_MODULE, - .write = write_marker -}; -#endif - void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; -#if 0 /* XXX: tracing does not support text entries */ - marker_file = debugfs_create_file("marker", 0660, dir, NULL, - &fops_marker); - if (!marker_file) - pr_err(NAME "marker file creation failed.\n"); -#endif - if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); enter_uniprocessor(); @@ -506,11 +466,6 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); - if (marker_file) { - debugfs_remove(marker_file); - marker_file = NULL; - } - pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 407d8784f66..f1dc1b75d16 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -65,23 +65,22 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } -int arch_report_meminfo(char *page) +void arch_report_meminfo(struct seq_file *m) { - int n = sprintf(page, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - n += sprintf(page + n, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - n += sprintf(page + n, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - n += sprintf(page + n, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif - return n; } #else static inline void split_page_count(int level) { } diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index efa1911e20c..df3d5c861cd 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 }; static unsigned int mw64[] = { 0x89, 0x8B }; #endif /* not __i386__ */ -static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, - int *rexr) +struct prefix_bits { + unsigned shorted:1; + unsigned enlarged:1; + unsigned rexr:1; + unsigned rex:1; +}; + +static int skip_prefix(unsigned char *addr, struct prefix_bits *prf) { int i; unsigned char *p = addr; - *shorted = 0; - *enlarged = 0; - *rexr = 0; + prf->shorted = 0; + prf->enlarged = 0; + prf->rexr = 0; + prf->rex = 0; restart: for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { if (*p == prefix_codes[i]) { if (*p == 0x66) - *shorted = 1; + prf->shorted = 1; #ifdef __amd64__ if ((*p & 0xf8) == 0x48) - *enlarged = 1; + prf->enlarged = 1; if ((*p & 0xf4) == 0x44) - *rexr = 1; + prf->rexr = 1; + if ((*p & 0xf0) == 0x40) + prf->rex = 1; #endif p++; goto restart; @@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int shorted, enlarged, rexr; + struct prefix_bits prf; int i; enum reason_type rv = OTHERS; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); CHECK_OP_TYPE(opcode, reg_rop, REG_READ); @@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(rw8); i++) @@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(rw32); i++) if (rw32[i] == opcode) - return (shorted ? 2 : (enlarged ? 8 : 4)); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(mw8); i++) @@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(mw32); i++) if (mw32[i] == opcode) - return shorted ? 2 : 4; + return prf.shorted ? 2 : 4; for (i = 0; i < ARRAY_SIZE(mw64); i++) if (mw64[i] == opcode) - return shorted ? 2 : (enlarged ? 8 : 4); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -238,7 +249,7 @@ enum { #endif }; -static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs) { unsigned char *rv = NULL; @@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) case arg_DL: rv = (unsigned char *)®s->dx; break; - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; #ifdef __amd64__ case arg_R8: rv = (unsigned char *)®s->r8; @@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) break; #endif default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); break; } + + if (rv) + return rv; + + if (rex) { + /* + * If REX prefix exists, access low bytes of SI etc. + * instead of AH etc. + */ + switch (no) { + case arg_SI: + rv = (unsigned char *)®s->si; + break; + case arg_DI: + rv = (unsigned char *)®s->di; + break; + case arg_BP: + rv = (unsigned char *)®s->bp; + break; + case arg_SP: + rv = (unsigned char *)®s->sp; + break; + default: + break; + } + } else { + switch (no) { + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; + default: + break; + } + } + + if (!rv) + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + return rv; } @@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) unsigned char mod_rm; int reg; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) if (reg_rop[i] == opcode) { @@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) do_work: mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); switch (get_ins_reg_width(ins_addr)) { case 1: - return *get_reg_w8(reg, regs); + return *get_reg_w8(reg, prf.rex, regs); case 2: return *(unsigned short *)get_reg_w32(reg, regs); @@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) unsigned char mod_rm; unsigned char mod; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) if (imm_wop[i] == opcode) { diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index d877c5b423e..ab50a8d7402 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -3,6 +3,7 @@ */ #include <linux/module.h> #include <linux/io.h> +#include <linux/mmiotrace.h> #define MODULE_NAME "testmmiotrace" @@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); static void do_write_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) iowrite8(i, p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -39,6 +42,7 @@ static void do_test(void) pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } + mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); iounmap(p); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index e2095cba409..04df67f8a7b 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -52,8 +52,7 @@ struct frame_head { unsigned long ret; } __attribute__((packed)); -static struct frame_head * -dump_user_backtrace(struct frame_head * head) +static struct frame_head *dump_user_backtrace(struct frame_head *head) { struct frame_head bufhead[2]; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57f6c908808..022cd41ea9b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -28,85 +28,9 @@ static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); -static int nmi_start(void); -static void nmi_stop(void); -static void nmi_cpu_start(void *dummy); -static void nmi_cpu_stop(void *dummy); - /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; -#ifdef CONFIG_SMP -static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, - void *data) -{ - int cpu = (unsigned long)data; - switch (action) { - case CPU_DOWN_FAILED: - case CPU_ONLINE: - smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); - break; - case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block oprofile_cpu_nb = { - .notifier_call = oprofile_cpu_notifier -}; -#endif - -#ifdef CONFIG_PM - -static int nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* Only one CPU left, just stop that one */ - if (nmi_enabled == 1) - nmi_cpu_stop(NULL); - return 0; -} - -static int nmi_resume(struct sys_device *dev) -{ - if (nmi_enabled == 1) - nmi_cpu_start(NULL); - return 0; -} - -static struct sysdev_class oprofile_sysclass = { - .name = "oprofile", - .resume = nmi_resume, - .suspend = nmi_suspend, -}; - -static struct sys_device device_oprofile = { - .id = 0, - .cls = &oprofile_sysclass, -}; - -static int __init init_sysfs(void) -{ - int error; - - error = sysdev_class_register(&oprofile_sysclass); - if (!error) - error = sysdev_register(&device_oprofile); - return error; -} - -static void exit_sysfs(void) -{ - sysdev_unregister(&device_oprofile); - sysdev_class_unregister(&oprofile_sysclass); -} - -#else -#define init_sysfs() do { } while (0) -#define exit_sysfs() do { } while (0) -#endif /* CONFIG_PM */ - static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -361,6 +285,77 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) return 0; } +#ifdef CONFIG_SMP +static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block oprofile_cpu_nb = { + .notifier_call = oprofile_cpu_notifier +}; +#endif + +#ifdef CONFIG_PM + +static int nmi_suspend(struct sys_device *dev, pm_message_t state) +{ + /* Only one CPU left, just stop that one */ + if (nmi_enabled == 1) + nmi_cpu_stop(NULL); + return 0; +} + +static int nmi_resume(struct sys_device *dev) +{ + if (nmi_enabled == 1) + nmi_cpu_start(NULL); + return 0; +} + +static struct sysdev_class oprofile_sysclass = { + .name = "oprofile", + .resume = nmi_resume, + .suspend = nmi_suspend, +}; + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + +static int __init init_sysfs(void) +{ + int error; + + error = sysdev_class_register(&oprofile_sysclass); + if (!error) + error = sysdev_register(&device_oprofile); + return error; +} + +static void exit_sysfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_sysfs() do { } while (0) +#define exit_sysfs() do { } while (0) +#endif /* CONFIG_PM */ + static int p4force; module_param(p4force, int, 0); @@ -420,9 +415,6 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; - case 26: - *cpu_type = "i386/core_2"; - break; default: /* Unknown */ return 0; @@ -432,6 +424,16 @@ static int __init ppro_init(char **cpu_type) return 1; } +static int __init arch_perfmon_init(char **cpu_type) +{ + if (!cpu_has_arch_perfmon) + return 0; + *cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; + arch_perfmon_setup_counters(); + return 1; +} + /* in order to get sysfs right */ static int using_nmi; @@ -439,7 +441,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; __u8 family = boot_cpu_data.x86; - char *cpu_type; + char *cpu_type = NULL; int ret = 0; if (!cpu_has_apic) @@ -477,19 +479,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) switch (family) { /* Pentium IV */ case 0xf: - if (!p4_init(&cpu_type)) - return -ENODEV; + p4_init(&cpu_type); break; /* A P6-class processor */ case 6: - if (!ppro_init(&cpu_type)) - return -ENODEV; + ppro_init(&cpu_type); break; default: - return -ENODEV; + break; } + + if (!cpu_type && !arch_perfmon_init(&cpu_type)) + return -ENODEV; break; default: diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15c467..91b6a116165 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -6,22 +6,22 @@ * * @author John Levon */ - + #ifndef OP_COUNTER_H #define OP_COUNTER_H - + #define OP_MAX_COUNTER 8 - + /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { - unsigned long count; - unsigned long enabled; - unsigned long event; - unsigned long kernel; - unsigned long user; - unsigned long unit_mask; + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; }; extern struct op_counter_config counter_config[]; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index d9faf607b3a..509513760a6 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -67,8 +67,9 @@ static unsigned long reset_value[NUM_COUNTERS]; /* The function interface needs to be fixed, something like add data. Should then be added to linux/oprofile.h. */ -extern void oprofile_add_ibs_sample(struct pt_regs *const regs, - unsigned int * const ibs_sample, u8 code); +extern void +oprofile_add_ibs_sample(struct pt_regs *const regs, + unsigned int *const ibs_sample, int ibs_code); struct ibs_fetch_sample { /* MSRC001_1031 IBS Fetch Linear Address Register */ @@ -309,12 +310,15 @@ static void op_amd_start(struct op_msrs const * const msrs) #ifdef CONFIG_OPROFILE_IBS if (ibs_allowed && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = IBS_FETCH_HIGH_ENABLE; + high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ + + IBS_FETCH_HIGH_ENABLE; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); } if (ibs_allowed && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; + low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ + + IBS_OP_LOW_ENABLE; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } @@ -468,11 +472,10 @@ static void clear_ibs_nmi(void) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } -static int (*create_arch_files)(struct super_block * sb, struct dentry * root); +static int (*create_arch_files)(struct super_block *sb, struct dentry *root); -static int setup_ibs_files(struct super_block * sb, struct dentry * root) +static int setup_ibs_files(struct super_block *sb, struct dentry *root) { - char buf[12]; struct dentry *dir; int ret = 0; @@ -494,22 +497,22 @@ static int setup_ibs_files(struct super_block * sb, struct dentry * root) ibs_config.max_cnt_op = 250000; ibs_config.op_enabled = 0; ibs_config.dispatched_ops = 1; - snprintf(buf, sizeof(buf), "ibs_fetch"); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "rand_enable", - &ibs_config.rand_en); + + dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.fetch_enabled); + &ibs_config.fetch_enabled); oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_fetch); - snprintf(buf, sizeof(buf), "ibs_uops"); - dir = oprofilefs_mkdir(sb, root, buf); + &ibs_config.max_cnt_fetch); + oprofilefs_create_ulong(sb, dir, "rand_enable", + &ibs_config.rand_en); + + dir = oprofilefs_mkdir(sb, root, "ibs_op"); oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.op_enabled); + &ibs_config.op_enabled); oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_op); + &ibs_config.max_cnt_op); oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); + &ibs_config.dispatched_ops); return 0; } @@ -530,14 +533,14 @@ static void op_amd_exit(void) #endif struct op_x86_model_spec const op_amd_spec = { - .init = op_amd_init, - .exit = op_amd_exit, - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &op_amd_fill_in_addresses, - .setup_ctrs = &op_amd_setup_ctrs, - .check_ctrs = &op_amd_check_ctrs, - .start = &op_amd_start, - .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown + .init = op_amd_init, + .exit = op_amd_exit, + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &op_amd_fill_in_addresses, + .setup_ctrs = &op_amd_setup_ctrs, + .check_ctrs = &op_amd_check_ctrs, + .start = &op_amd_start, + .stop = &op_amd_stop, + .shutdown = &op_amd_shutdown }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 43ac5af338d..4c4a51c90bc 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -698,24 +698,24 @@ static void p4_shutdown(struct op_msrs const * const msrs) #ifdef CONFIG_SMP struct op_x86_model_spec const op_p4_ht2_spec = { - .num_counters = NUM_COUNTERS_HT2, - .num_controls = NUM_CONTROLS_HT2, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown }; #endif struct op_x86_model_spec const op_p4_spec = { - .num_counters = NUM_COUNTERS_NON_HT, - .num_controls = NUM_CONTROLS_NON_HT, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown }; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57..0620d6d45f7 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -1,32 +1,34 @@ /* * @file op_model_ppro.h - * pentium pro / P6 model-specific MSR operations + * Family 6 perfmon and architectural perfmon MSR operations * * @remark Copyright 2002 OProfile authors + * @remark Copyright 2008 Intel Corporation * @remark Read the file COPYING * * @author John Levon * @author Philippe Elie * @author Graydon Hoare + * @author Andi Kleen */ #include <linux/oprofile.h> +#include <linux/slab.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/apic.h> #include <asm/nmi.h> +#include <asm/intel_arch_perfmon.h> #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 2 -#define NUM_CONTROLS 2 +static int num_counters = 2; +static int counter_width = 32; #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_32BIT_WRITE(l, msrs, c) \ - do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) @@ -40,20 +42,20 @@ #define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT(val, e) (val |= e) -static unsigned long reset_value[NUM_COUNTERS]; +static u64 *reset_value; static void ppro_fill_in_addresses(struct op_msrs * const msrs) { int i; - for (i = 0; i < NUM_COUNTERS; i++) { + for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; else msrs->counters[i].addr = 0; } - for (i = 0; i < NUM_CONTROLS; i++) { + for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; else @@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) { + reset_value = kmalloc(sizeof(unsigned) * num_counters, + GFP_ATOMIC); + if (!reset_value) + return; + } + + if (cpu_has_arch_perfmon) { + union cpuid10_eax eax; + eax.full = cpuid_eax(0xa); + if (counter_width < eax.split.bit_width) + counter_width = eax.split.bit_width; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < num_counters; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; - CTR_32BIT_WRITE(1, msrs, i); + wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; - CTR_32BIT_WRITE(counter_config[i].count, msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); @@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { + for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); - CTR_32BIT_WRITE(reset_value[i], msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); } } @@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); @@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); @@ -169,24 +185,70 @@ static void ppro_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < num_counters ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < num_counters ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } + if (reset_value) { + kfree(reset_value); + reset_value = NULL; + } } -struct op_x86_model_spec const op_ppro_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &ppro_fill_in_addresses, - .setup_ctrs = &ppro_setup_ctrs, - .check_ctrs = &ppro_check_ctrs, - .start = &ppro_start, - .stop = &ppro_stop, - .shutdown = &ppro_shutdown +struct op_x86_model_spec op_ppro_spec = { + .num_counters = 2, /* can be overriden */ + .num_controls = 2, /* dito */ + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, + .shutdown = &ppro_shutdown +}; + +/* + * Architectural performance monitoring. + * + * Newer Intel CPUs (Core1+) have support for architectural + * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. + * The advantage of this is that it can be done without knowing about + * the specific CPU. + */ + +void arch_perfmon_setup_counters(void) +{ + union cpuid10_eax eax; + + eax.full = cpuid_eax(0xa); + + /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ + if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && + current_cpu_data.x86_model == 15) { + eax.split.version_id = 2; + eax.split.num_counters = 2; + eax.split.bit_width = 40; + } + + num_counters = eax.split.num_counters; + + op_arch_perfmon_spec.num_counters = num_counters; + op_arch_perfmon_spec.num_controls = num_counters; + op_ppro_spec.num_counters = num_counters; + op_ppro_spec.num_controls = num_counters; +} + +struct op_x86_model_spec op_arch_perfmon_spec = { + /* num_counters/num_controls filled in at runtime */ + .fill_in_addresses = &ppro_fill_in_addresses, + /* user space does the cpuid check for available events */ + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, + .shutdown = &ppro_shutdown }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 05a0261ba0c..825e79064d6 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -22,8 +22,8 @@ struct op_msr { }; struct op_msrs { - struct op_msr * counters; - struct op_msr * controls; + struct op_msr *counters; + struct op_msr *controls; }; struct pt_regs; @@ -34,8 +34,8 @@ struct pt_regs; struct op_x86_model_spec { int (*init)(struct oprofile_operations *ops); void (*exit)(void); - unsigned int const num_counters; - unsigned int const num_controls; + unsigned int num_counters; + unsigned int num_controls; void (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_msrs const * const msrs); int (*check_ctrs)(struct pt_regs * const regs, @@ -45,9 +45,12 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; -extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; +extern struct op_x86_model_spec op_arch_perfmon_spec; + +extern void arch_perfmon_setup_counters(void); #endif /* OP_X86_MODEL_H */ diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 006599db0dc..bf69dbe08bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n", dev->vendor, dev->device, pirq, irq); return irq; } @@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); @@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_PCH_0: - case PCI_DEVICE_ID_INTEL_PCH_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; return 1; } + + if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + return 0; } @@ -823,7 +830,7 @@ static void __init pirq_find_router(struct irq_router *r) r->get = NULL; r->set = NULL; - DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", rt->rtr_vendor, rt->rtr_device); pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); @@ -843,7 +850,7 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n", pirq_router.name, pirq_router_dev->vendor, pirq_router_dev->device); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 28b85ab8422..bb042608c60 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void) static void __init __xen_init_IRQ(void) { -#ifdef CONFIG_X86_64 int i; /* Create identity vector->irq map */ @@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void) for_each_possible_cpu(cpu) per_cpu(vector_irq, cpu)[i] = i; } -#endif /* CONFIG_X86_64 */ xen_init_IRQ(); } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index dd71e3a021c..5601506f2dd 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); out: raw_local_irq_restore(flags); |