diff options
Diffstat (limited to 'arch/x86')
62 files changed, 1949 insertions, 648 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 31758378bcd..34bc3a89228 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -169,6 +169,9 @@ config GENERIC_HARDIRQS bool default y +config GENERIC_HARDIRQS_NO__DO_IRQ + def_bool y + config GENERIC_IRQ_PROBE bool default y @@ -931,6 +934,12 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. +config X86_CPU_DEBUG + tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" + ---help--- + If you select this option, this will provide various x86 CPUs + information through debugfs. + choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ @@ -1123,7 +1132,7 @@ config NUMA_EMU config NODES_SHIFT int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP - range 1 9 if X86_64 + range 1 9 default "9" if MAXSMP default "6" if X86_64 default "4" if X86_NUMAQ @@ -1431,7 +1440,7 @@ config CRASH_DUMP config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on KEXEC && HIBERNATION && X86_32 + depends on KEXEC && HIBERNATION ---help--- Jump between original kernel and kexeced kernel and invoke code in physical address mode via KEXEC diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1836191839e..f05d8c91d9e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -153,34 +153,23 @@ endif boot := arch/x86/boot -PHONY += zImage bzImage compressed zlilo bzlilo \ - zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install +BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install + +PHONY += bzImage $(BOOT_TARGETS) # Default kernel to build all: bzImage # KBUILD_IMAGE specify target image being built - KBUILD_IMAGE := $(boot)/bzImage -zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage +KBUILD_IMAGE := $(boot)/bzImage -zImage bzImage: vmlinux +bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ -compressed: zImage - -zlilo bzlilo: vmlinux - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo - -zdisk bzdisk: vmlinux - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk - -fdimage fdimage144 fdimage288 isoimage: vmlinux - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ - -install: - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ PHONY += vdso_install vdso_install: @@ -205,7 +194,3 @@ define archhelp echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' endef - -CLEAN_FILES += arch/x86/boot/fdimage \ - arch/x86/boot/image.iso \ - arch/x86/boot/mtools.conf diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index c70eff69a1f..fb737ce5888 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -6,26 +6,24 @@ # for more details. # # Copyright (C) 1994 by Linus Torvalds +# Changed by many, many contributors over the years. # # ROOT_DEV specifies the default root-device when making the image. # This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case # the default of FLOPPY is used by 'build'. -ROOT_DEV := CURRENT +ROOT_DEV := CURRENT # If you want to preset the SVGA mode, uncomment the next line and # set SVGA_MODE to whatever number you want. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup. -SVGA_MODE := -DSVGA_MODE=NORMAL_VGA +SVGA_MODE := -DSVGA_MODE=NORMAL_VGA -# If you want the RAM disk device, define this to be the size in blocks. - -#RAMDISK := -DRAMDISK=512 - -targets := vmlinux.bin setup.bin setup.elf zImage bzImage +targets := vmlinux.bin setup.bin setup.elf bzImage +targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o @@ -71,17 +69,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ KBUILD_CFLAGS += $(call cc-option,-m32) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__ -$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ -$(obj)/bzImage: BUILDFLAGS := -b +$(obj)/bzImage: asflags-y := $(SVGA_MODE) quiet_cmd_image = BUILD $@ -cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(ROOT_DEV) > $@ +cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ + $(ROOT_DEV) > $@ -$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(obj)/tools/build FORCE +$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' @@ -116,9 +110,11 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ -# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel +# Set this if you want to pass append arguments to the +# bzdisk/fdimage/isoimage kernel FDARGS = -# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel +# Set this if you want an initrd included with the +# bzdisk/fdimage/isoimage kernel FDINITRD = image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) @@ -127,7 +123,7 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ # This requires write access to /dev/fd0 -zdisk: $(BOOTIMAGE) $(obj)/mtools.conf +bzdisk: $(obj)/bzImage $(obj)/mtools.conf MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync syslinux /dev/fd0 ; sync echo '$(image_cmdline)' | \ @@ -135,10 +131,10 @@ zdisk: $(BOOTIMAGE) $(obj)/mtools.conf if [ -f '$(FDINITRD)' ] ; then \ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync # These require being root or having syslinux 2.02 or higher installed -fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf +fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync syslinux $(obj)/fdimage ; sync @@ -147,9 +143,9 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf if [ -f '$(FDINITRD)' ] ; then \ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync -fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf +fdimage288: $(obj)/bzImage $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync syslinux $(obj)/fdimage ; sync @@ -158,9 +154,9 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf if [ -f '$(FDINITRD)' ] ; then \ MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync -isoimage: $(BOOTIMAGE) +isoimage: $(obj)/bzImage -rm -rf $(obj)/isoimage mkdir $(obj)/isoimage for i in lib lib64 share end ; do \ @@ -170,7 +166,7 @@ isoimage: $(BOOTIMAGE) fi ; \ if [ $$i = end ] ; then exit 1 ; fi ; \ done - cp $(BOOTIMAGE) $(obj)/isoimage/linux + cp $(obj)/bzImage $(obj)/isoimage/linux echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg if [ -f '$(FDINITRD)' ] ; then \ cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ @@ -181,12 +177,13 @@ isoimage: $(BOOTIMAGE) isohybrid $(obj)/image.iso 2>/dev/null || true rm -rf $(obj)/isoimage -zlilo: $(BOOTIMAGE) +bzlilo: $(obj)/bzImage if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi - cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz + cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz cp System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi install: - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + System.map "$(INSTALL_PATH)" diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 7ccff4884a2..5d84d1c74e4 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -24,12 +24,8 @@ #include "boot.h" #include "offsets.h" -SETUPSECTS = 4 /* default nr of setup-sectors */ BOOTSEG = 0x07C0 /* original address of boot-sector */ -SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ -SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ - /* to be loaded */ -ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ +SYSSEG = 0x1000 /* historical load address >> 4 */ #ifndef SVGA_MODE #define SVGA_MODE ASK_VGA @@ -97,12 +93,12 @@ bugger_off_msg: .section ".header", "a" .globl hdr hdr: -setup_sects: .byte SETUPSECTS +setup_sects: .byte 0 /* Filled in by build.c */ root_flags: .word ROOT_RDONLY -syssize: .long SYSSIZE -ram_size: .word RAMDISK +syssize: .long 0 /* Filled in by build.c */ +ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE -root_dev: .word ROOT_DEV +root_dev: .word 0 /* Filled in by build.c */ boot_flag: .word 0xAA55 # offset 512, entry point @@ -123,14 +119,15 @@ _start: # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG -start_sys_seg: .word SYSSEG +start_sys_seg: .word SYSSEG # obsolete and meaningless, but just + # in case something decided to "use" it .word kernel_version-512 # pointing to kernel version string # above section of header is compatible # with loadlin-1.5 (header v1.5). Don't # change it. -type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, - # Bootlin, SYSLX, bootsect...) +type_of_loader: .byte 0 # 0 means ancient bootloader, newer + # bootloaders know to change this. # See Documentation/i386/boot.txt for # assigned ids @@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set # space behind setup.S can be used for # heap purposes. # Only the loader knows what is free -#ifndef __BIG_KERNEL__ - .byte 0 -#else .byte LOADED_HIGH -#endif setup_move_size: .word 0x8000 # size to move, when setup is not # loaded at 0x90000. We will move setup @@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not code32_start: # here loaders can put a different # start address for 32-bit code. -#ifndef __BIG_KERNEL__ - .long 0x1000 # 0x1000 = default for zImage -#else .long 0x100000 # 0x100000 = default for big kernel -#endif ramdisk_image: .long 0 # address of loaded ramdisk image # Here the loader puts the 32-bit diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index 85a1cd8a8ff..8062f891525 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -33,47 +33,6 @@ static void realmode_switch_hook(void) } /* - * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000. - * A bzImage kernel is loaded and runs at 0x100000. - */ -static void move_kernel_around(void) -{ - /* Note: rely on the compile-time option here rather than - the LOADED_HIGH flag. The Qemu kernel loader unconditionally - sets the loadflags to zero. */ -#ifndef __BIG_KERNEL__ - u16 dst_seg, src_seg; - u32 syssize; - - dst_seg = 0x1000 >> 4; - src_seg = 0x10000 >> 4; - syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */ - - while (syssize) { - int paras = (syssize >= 0x1000) ? 0x1000 : syssize; - int dwords = paras << 2; - - asm volatile("pushw %%es ; " - "pushw %%ds ; " - "movw %1,%%es ; " - "movw %2,%%ds ; " - "xorw %%di,%%di ; " - "xorw %%si,%%si ; " - "rep;movsl ; " - "popw %%ds ; " - "popw %%es" - : "+c" (dwords) - : "r" (dst_seg), "r" (src_seg) - : "esi", "edi"); - - syssize -= paras; - dst_seg += paras; - src_seg += paras; - } -#endif -} - -/* * Disable all interrupts at the legacy PIC. */ static void mask_all_interrupts(void) @@ -147,9 +106,6 @@ void go_to_protected_mode(void) /* Hook before leaving real mode, also disables interrupts */ realmode_switch_hook(); - /* Move the kernel/setup to their final resting places */ - move_kernel_around(); - /* Enable the A20 gate */ if (enable_a20()) { puts("A20 gate not responding, unable to boot...\n"); diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 44dc1923c0e..ee3a4ea923a 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -130,7 +130,7 @@ static void die(const char * str, ...) static void usage(void) { - die("Usage: build [-b] setup system [rootdev] [> image]"); + die("Usage: build setup system [rootdev] [> image]"); } int main(int argc, char ** argv) @@ -145,11 +145,6 @@ int main(int argc, char ** argv) void *kernel; u32 crc = 0xffffffffUL; - if (argc > 2 && !strcmp(argv[1], "-b")) - { - is_big_kernel = 1; - argc--, argv++; - } if ((argc < 3) || (argc > 4)) usage(); if (argc > 3) { @@ -216,8 +211,6 @@ int main(int argc, char ** argv) die("Unable to mmap '%s': %m", argv[2]); /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ sys_size = (sz + 15 + 4) / 16; - if (!is_big_kernel && sys_size > DEF_SYSSIZE) - die("System is too big. Try using bzImage or modules."); /* Patch the setup code with the appropriate size parameters */ buf[0x1f1] = setup_sectors-1; diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6526cf08b0e..6ba23dd9fc9 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -1,10 +1,6 @@ #ifndef _ASM_X86_BOOT_H #define _ASM_X86_BOOT_H -/* Don't touch these, unless you really know what you're doing. */ -#define DEF_SYSSEG 0x1000 -#define DEF_SYSSIZE 0x7F00 - /* Internal svga startup constants */ #define NORMAL_VGA 0xffff /* 80x25 mode */ #define EXTENDED_VGA 0xfffe /* 80x50 mode */ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h new file mode 100755 index 00000000000..56f1635e461 --- /dev/null +++ b/arch/x86/include/asm/cpu_debug.h @@ -0,0 +1,199 @@ +#ifndef _ASM_X86_CPU_DEBUG_H +#define _ASM_X86_CPU_DEBUG_H + +/* + * CPU x86 architecture debug + * + * Copyright(C) 2009 Jaswinder Singh Rajput + */ + +/* Register flags */ +enum cpu_debug_bit { +/* Model Specific Registers (MSRs) */ + CPU_MC_BIT, /* Machine Check */ + CPU_MONITOR_BIT, /* Monitor */ + CPU_TIME_BIT, /* Time */ + CPU_PMC_BIT, /* Performance Monitor */ + CPU_PLATFORM_BIT, /* Platform */ + CPU_APIC_BIT, /* APIC */ + CPU_POWERON_BIT, /* Power-on */ + CPU_CONTROL_BIT, /* Control */ + CPU_FEATURES_BIT, /* Features control */ + CPU_LBRANCH_BIT, /* Last Branch */ + CPU_BIOS_BIT, /* BIOS */ + CPU_FREQ_BIT, /* Frequency */ + CPU_MTTR_BIT, /* MTRR */ + CPU_PERF_BIT, /* Performance */ + CPU_CACHE_BIT, /* Cache */ + CPU_SYSENTER_BIT, /* Sysenter */ + CPU_THERM_BIT, /* Thermal */ + CPU_MISC_BIT, /* Miscellaneous */ + CPU_DEBUG_BIT, /* Debug */ + CPU_PAT_BIT, /* PAT */ + CPU_VMX_BIT, /* VMX */ + CPU_CALL_BIT, /* System Call */ + CPU_BASE_BIT, /* BASE Address */ + CPU_SMM_BIT, /* System mgmt mode */ + CPU_SVM_BIT, /*Secure Virtual Machine*/ + CPU_OSVM_BIT, /* OS-Visible Workaround*/ +/* Standard Registers */ + CPU_TSS_BIT, /* Task Stack Segment */ + CPU_CR_BIT, /* Control Registers */ + CPU_DT_BIT, /* Descriptor Table */ +/* End of Registers flags */ + CPU_REG_ALL_BIT, /* Select all Registers */ +}; + +#define CPU_REG_ALL (~0) /* Select all Registers */ + +#define CPU_MC (1 << CPU_MC_BIT) +#define CPU_MONITOR (1 << CPU_MONITOR_BIT) +#define CPU_TIME (1 << CPU_TIME_BIT) +#define CPU_PMC (1 << CPU_PMC_BIT) +#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) +#define CPU_APIC (1 << CPU_APIC_BIT) +#define CPU_POWERON (1 << CPU_POWERON_BIT) +#define CPU_CONTROL (1 << CPU_CONTROL_BIT) +#define CPU_FEATURES (1 << CPU_FEATURES_BIT) +#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) +#define CPU_BIOS (1 << CPU_BIOS_BIT) +#define CPU_FREQ (1 << CPU_FREQ_BIT) +#define CPU_MTRR (1 << CPU_MTTR_BIT) +#define CPU_PERF (1 << CPU_PERF_BIT) +#define CPU_CACHE (1 << CPU_CACHE_BIT) +#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) +#define CPU_THERM (1 << CPU_THERM_BIT) +#define CPU_MISC (1 << CPU_MISC_BIT) +#define CPU_DEBUG (1 << CPU_DEBUG_BIT) +#define CPU_PAT (1 << CPU_PAT_BIT) +#define CPU_VMX (1 << CPU_VMX_BIT) +#define CPU_CALL (1 << CPU_CALL_BIT) +#define CPU_BASE (1 << CPU_BASE_BIT) +#define CPU_SMM (1 << CPU_SMM_BIT) +#define CPU_SVM (1 << CPU_SVM_BIT) +#define CPU_OSVM (1 << CPU_OSVM_BIT) +#define CPU_TSS (1 << CPU_TSS_BIT) +#define CPU_CR (1 << CPU_CR_BIT) +#define CPU_DT (1 << CPU_DT_BIT) + +/* Register file flags */ +enum cpu_file_bit { + CPU_INDEX_BIT, /* index */ + CPU_VALUE_BIT, /* value */ +}; + +#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) + +/* + * DisplayFamily_DisplayModel Processor Families/Processor Number Series + * -------------------------- ------------------------------------------ + * 05_01, 05_02, 05_04 Pentium, Pentium with MMX + * + * 06_01 Pentium Pro + * 06_03, 06_05 Pentium II Xeon, Pentium II + * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III + * + * 06_09, 060D Pentium M + * + * 06_0E Core Duo, Core Solo + * + * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, + * Core 2 Quad, Core 2 Extreme, Core 2 Duo, + * Pentium dual-core + * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 + * + * 06_1C Atom + * + * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 + * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D + * + * 0F_06 Xeon 7100, 5000 Series, Xeon MP, + * Pentium 4, Pentium D + */ + +/* Register processors bits */ +enum cpu_processor_bit { + CPU_NONE, +/* Intel */ + CPU_INTEL_PENTIUM_BIT, + CPU_INTEL_P6_BIT, + CPU_INTEL_PENTIUM_M_BIT, + CPU_INTEL_CORE_BIT, + CPU_INTEL_CORE2_BIT, + CPU_INTEL_ATOM_BIT, + CPU_INTEL_XEON_P4_BIT, + CPU_INTEL_XEON_MP_BIT, +}; + +#define CPU_ALL (~0) /* Select all CPUs */ + +#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) +#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) +#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) +#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) +#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) +#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) +#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) +#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) + +#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) +#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) +#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) +#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) +#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) +#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) +#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) +#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) +#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) +#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) +#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) +#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) +#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) +#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) +#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) +#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) +#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) +#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) +#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) + +/* Select all Intel CPUs*/ +#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) + +#define MAX_CPU_FILES 512 + +struct cpu_private { + unsigned cpu; + unsigned type; + unsigned reg; + unsigned file; +}; + +struct cpu_debug_base { + char *name; /* Register name */ + unsigned flag; /* Register flag */ + unsigned write; /* Register write flag */ +}; + +/* + * Currently it looks similar to cpu_debug_base but once we add more files + * cpu_file_base will go in different direction + */ +struct cpu_file_base { + char *name; /* Register file name */ + unsigned flag; /* Register file flag */ + unsigned write; /* Register write flag */ +}; + +struct cpu_cpuX_base { + struct dentry *dentry; /* Register dentry */ + int init; /* Register index file */ +}; + +struct cpu_debug_range { + unsigned min; /* Register range min */ + unsigned max; /* Register range max */ + unsigned flag; /* Supported flags */ + unsigned model; /* Supported models */ +}; + +#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index dc27705f544..5623c50d67b 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr) #define store_gdt(dtr) native_store_gdt(dtr) #define store_idt(dtr) native_store_idt(dtr) #define store_tr(tr) (tr = native_store_tr()) -#define store_ldt(ldt) asm("sldt %0":"=m" (ldt)) #define load_TLS(t, cpu) native_load_tls(t, cpu) #define set_ldt native_set_ldt @@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) } #endif /* CONFIG_PARAVIRT */ +#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) + static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index bf9276bea66..014c2b85ae4 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 0ceb6d19ed3..317ff1703d0 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -9,13 +9,13 @@ # define PAGES_NR 4 #else # define PA_CONTROL_PAGE 0 -# define PA_TABLE_PAGE 1 -# define PAGES_NR 2 +# define VA_CONTROL_PAGE 1 +# define PA_TABLE_PAGE 2 +# define PA_SWAP_PAGE 3 +# define PAGES_NR 4 #endif -#ifdef CONFIG_X86_32 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 -#endif #ifndef __ASSEMBLY__ @@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page, unsigned int has_pae, unsigned int preserve_context); #else -NORET_TYPE void +unsigned long relocate_kernel(unsigned long indirection_page, unsigned long page_list, - unsigned long start_address) ATTRIB_NORET; + unsigned long start_address, + unsigned int preserve_context); #endif #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9320e2a8a26..12d55e773eb 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -1,14 +1,11 @@ #ifndef _ASM_X86_LINKAGE_H #define _ASM_X86_LINKAGE_H +#include <linux/stringify.h> + #undef notrace #define notrace __attribute__((no_instrument_function)) -#ifdef CONFIG_X86_64 -#define __ALIGN .p2align 4,,15 -#define __ALIGN_STR ".p2align 4,,15" -#endif - #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) /* @@ -50,16 +47,20 @@ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ "g" (arg4), "g" (arg5), "g" (arg6)) -#endif +#endif /* CONFIG_X86_32 */ + +#ifdef __ASSEMBLY__ #define GLOBAL(name) \ .globl name; \ name: -#ifdef CONFIG_X86_ALIGNMENT_16 -#define __ALIGN .align 16,0x90 -#define __ALIGN_STR ".align 16,0x90" +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) +#define __ALIGN .p2align 4, 0x90 +#define __ALIGN_STR __stringify(__ALIGN) #endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8f1d2fbec1d..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -43,14 +43,6 @@ #else /* ...!ASSEMBLY */ #include <linux/stringify.h> -#include <asm/sections.h> - -#define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) -#define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e..ae85a8d66a3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; -#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ @@ -391,6 +391,9 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +DECLARE_PER_CPU(unsigned int, irq_count); +extern unsigned long kernel_eflags; +extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR DECLARE_PER_CPU(unsigned long, stack_canary); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 5e79ca69432..9c371e4a9fa 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -296,6 +296,8 @@ HYPERVISOR_get_debugreg(int reg) static inline int HYPERVISOR_update_descriptor(u64 ma, u64 desc) { + if (sizeof(u64) == sizeof(long)) + return _hypercall2(int, update_descriptor, ma, desc); return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); } diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 2ac0ab71412..b617b1164f1 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -83,7 +83,7 @@ void __init setup_bios_corruption_check(void) u64 size; addr = find_e820_area_size(addr, &size, PAGE_SIZE); - if (addr == 0) + if (!(addr + 1)) break; if ((addr + size) > corruption_check_size) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82db7f45e2d..d4356f8b752 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o +obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o + obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 6882a735d9c..8220ae69849 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) u32 regs[4]; const struct cpuid_bit *cb; - static const struct cpuid_bit cpuid_bits[] = { + static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, { 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f47df59016c..7e4a459daa6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -502,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } #endif -static struct cpu_dev amd_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 89bfdd9cacc..983e0830f0d 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -468,7 +468,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) return size; } -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index a1625f5a1e7..51b09c48c9c 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -25,7 +25,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); } -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { +static const struct cpu_dev centaur_cpu_dev __cpuinitconst = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c87627..e2962cc1e27 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,52 +1,52 @@ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> #include <linux/bootmem.h> +#include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/kernel.h> #include <linux/module.h> -#include <linux/kgdb.h> -#include <linux/topology.h> +#include <linux/percpu.h> +#include <linux/string.h> #include <linux/delay.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/kgdb.h> #include <linux/smp.h> -#include <linux/percpu.h> -#include <asm/i387.h> -#include <asm/msr.h> -#include <asm/io.h> -#include <asm/linkage.h> +#include <linux/io.h> + +#include <asm/stackprotector.h> #include <asm/mmu_context.h> +#include <asm/hypervisor.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/topology.h> +#include <asm/cpumask.h> +#include <asm/pgtable.h> +#include <asm/atomic.h> +#include <asm/proto.h> +#include <asm/setup.h> +#include <asm/apic.h> +#include <asm/desc.h> +#include <asm/i387.h> #include <asm/mtrr.h> +#include <asm/numa.h> +#include <asm/asm.h> +#include <asm/cpu.h> #include <asm/mce.h> +#include <asm/msr.h> #include <asm/pat.h> -#include <asm/asm.h> -#include <asm/numa.h> #include <asm/smp.h> -#include <asm/cpu.h> -#include <asm/cpumask.h> -#include <asm/apic.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> #endif -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/proto.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/hypervisor.h> -#include <asm/stackprotector.h> - #include "cpu.h" #ifdef CONFIG_X86_64 /* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_callin_mask; -cpumask_var_t cpu_callout_mask; cpumask_var_t cpu_initialized_mask; +cpumask_var_t cpu_callout_mask; +cpumask_var_t cpu_callin_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; @@ -62,15 +62,15 @@ void __init setup_cpu_local_masks(void) #else /* CONFIG_X86_32 */ -cpumask_t cpu_callin_map; +cpumask_t cpu_sibling_setup_map; cpumask_t cpu_callout_map; cpumask_t cpu_initialized; -cpumask_t cpu_sibling_setup_map; +cpumask_t cpu_callin_map; #endif /* CONFIG_X86_32 */ -static struct cpu_dev *this_cpu __cpuinitdata; +static const struct cpu_dev *this_cpu __cpuinitdata; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -79,48 +79,48 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout * - * The TLS descriptors are currently at a different place compared to i386. + * TLS descriptors are currently at a different place compared to i386. * Hopefully nobody expects them at a fixed place (Wine?) */ - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, #else - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, /* * Segments used for calling PnP BIOS have byte granularity. * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, GDT_STACK_CANARY_INIT #endif } }; @@ -164,16 +164,17 @@ static inline int flag_is_changeable_p(u32 flag) * the CPUID. Add "volatile" to not allow gcc to * optimize the subsequent calls to this function. */ - asm volatile ("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" + asm volatile ("pushfl \n\t" + "pushfl \n\t" + "popl %0 \n\t" + "movl %0, %1 \n\t" + "xorl %2, %0 \n\t" + "pushl %0 \n\t" + "popfl \n\t" + "pushfl \n\t" + "popl %0 \n\t" + "popfl \n\t" + : "=&r" (f1), "=&r" (f2) : "ir" (flag)); @@ -188,18 +189,22 @@ static int __cpuinit have_cpuid_p(void) static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } + unsigned long lo, hi; + + if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) + return; + + /* Disable processor serial number: */ + + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); } static int __init x86_serial_nr_setup(char *s) @@ -232,6 +237,7 @@ struct cpuid_dependent_feature { u32 feature; u32 level; }; + static const struct cpuid_dependent_feature __cpuinitconst cpuid_dependent_features[] = { { X86_FEATURE_MWAIT, 0x00000005 }, @@ -243,7 +249,11 @@ cpuid_dependent_features[] = { static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) { const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { + + if (!cpu_has(c, df->feature)) + continue; /* * Note: cpuid_level is set to -1 if unavailable, but * extended_extended_level is set to 0 if unavailable @@ -251,32 +261,32 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) * when signed; hence the weird messing around with * signs here... */ - if (cpu_has(c, df->feature) && - ((s32)df->level < 0 ? + if (!((s32)df->level < 0 ? (u32)df->level > (u32)c->extended_cpuid_level : - (s32)df->level > (s32)c->cpuid_level)) { - clear_cpu_cap(c, df->feature); - if (warn) - printk(KERN_WARNING - "CPU: CPU feature %s disabled " - "due to lack of CPUID level 0x%x\n", - x86_cap_flags[df->feature], - df->level); - } + (s32)df->level > (s32)c->cpuid_level)) + continue; + + clear_cpu_cap(c, df->feature); + if (!warn) + continue; + + printk(KERN_WARNING + "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", + x86_cap_flags[df->feature], df->level); } } /* * Naming convention should be: <Name> [(<Codename>)] * This table only is used unless init_<vendor>() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * + * in particular, if CPUID levels 0x80000002..4 are supported, this + * isn't used */ /* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) { - struct cpu_model_info *info; + const struct cpu_model_info *info; if (c->x86_model >= 16) return NULL; /* Range check */ @@ -307,8 +317,10 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ +/* + * Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. + */ void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; @@ -321,7 +333,7 @@ void switch_to_new_gdt(int cpu) load_percpu_segment(cpu); } -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -340,7 +352,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) #endif } -static struct cpu_dev __cpuinitdata default_cpu = { +static const struct cpu_dev __cpuinitconst default_cpu = { .c_init = default_init, .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, @@ -354,22 +366,24 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) if (c->extended_cpuid_level < 0x80000004) return; - v = (unsigned int *) c->x86_model_id; + v = (unsigned int *)c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ + /* + * Intel chips right-justify this string for some dumb reason; + * undo that brain damage: + */ p = q = &c->x86_model_id[0]; while (*p == ' ') - p++; + p++; if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ } } @@ -438,27 +452,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { + goto out; + } - if (smp_num_siblings > nr_cpu_ids) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } + if (smp_num_siblings <= 1) + goto out; + + if (smp_num_siblings > nr_cpu_ids) { + pr_warning("CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); - smp_num_siblings = smp_num_siblings / c->x86_max_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - index_msb = get_count_order(smp_num_siblings); + index_msb = get_count_order(smp_num_siblings); - core_bits = get_count_order(c->x86_max_cores); + core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - } + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); out: if ((c->x86_max_cores * smp_num_siblings) > 1) { @@ -473,8 +490,8 @@ out: static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; - int i; static int printed; + int i; for (i = 0; i < X86_VENDOR_NUM; i++) { if (!cpu_devs[i]) @@ -483,6 +500,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; c->x86_vendor = this_cpu->c_x86_vendor; return; @@ -491,7 +509,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR + "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); } @@ -511,14 +531,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; c->x86_cache_alignment = c->x86_clflush_size; @@ -534,6 +557,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -542,6 +566,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { if (xlvl >= 0x80000001) { c->x86_capability[1] = cpuid_edx(0x80000001); @@ -549,13 +574,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_X86_64 if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#ifdef CONFIG_X86_32 + else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; #endif if (c->extended_cpuid_level >= 0x80000007) @@ -602,8 +629,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; @@ -634,12 +665,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { - struct cpu_dev **cdev; + const struct cpu_dev *const *cdev; int count = 0; - printk("KERNEL supported cpus:\n"); + printk(KERN_INFO "KERNEL supported cpus:\n"); for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; + const struct cpu_dev *cpudev = *cdev; unsigned int j; if (count >= X86_VENDOR_NUM) @@ -650,7 +681,7 @@ void __init early_cpu_init(void) for (j = 0; j < 2; j++) { if (!cpudev->c_ident[j]) continue; - printk(" %s %s\n", cpudev->c_vendor, + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, cpudev->c_ident[j]); } } @@ -726,9 +757,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -759,8 +794,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) squash_the_stupid_serial_number(c); /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." + * The vendor-specific functions might have changed features. + * Now we do "generic changes." */ /* Filter out anything that depends on CPUID levels we don't have */ @@ -768,7 +803,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { - char *p; + const char *p; p = table_lookup_model(c); if (p) strcpy(c->x86_model_id, p); @@ -843,11 +878,11 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) } struct msr_range { - unsigned min; - unsigned max; + unsigned min; + unsigned max; }; -static struct msr_range msr_range_array[] __cpuinitdata = { +static const struct msr_range msr_range_array[] __cpuinitconst = { { 0x00000000, 0x00000418}, { 0xc0000000, 0xc000040b}, { 0xc0010000, 0xc0010142}, @@ -856,14 +891,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = { static void __cpuinit print_cpu_msr(void) { + unsigned index_min, index_max; unsigned index; u64 val; int i; - unsigned index_min, index_max; for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { index_min = msr_range_array[i].min; index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { if (rdmsrl_amd_safe(index, &val)) continue; @@ -873,6 +909,7 @@ static void __cpuinit print_cpu_msr(void) } static int show_msr __cpuinitdata; + static __init int setup_show_msr(char *arg) { int num; @@ -894,12 +931,14 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { - char *vendor = NULL; + const char *vendor = NULL; - if (c->x86_vendor < X86_VENDOR_NUM) + if (c->x86_vendor < X86_VENDOR_NUM) { vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; + } else { + if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + } if (vendor && !strstr(c->x86_model_id, vendor)) printk(KERN_CONT "%s ", vendor); @@ -926,10 +965,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) static __init int setup_disablecpuid(char *arg) { int bit; + if (get_option(&arg, &bit) && bit < NCAPINTS*32) setup_clear_cpu_cap(bit); else return 0; + return 1; } __setup("clearcpuid=", setup_disablecpuid); @@ -939,6 +980,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); + DEFINE_PER_CPU(char *, irq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; @@ -948,12 +990,21 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; + static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); -extern asmlinkage void ignore_sysret(void); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { @@ -983,7 +1034,7 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else /* x86_64 */ +#else /* CONFIG_X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, stack_canary); @@ -995,9 +1046,26 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; regs->gs = __KERNEL_STACK_CANARY; + return regs; } -#endif /* x86_64 */ +#endif /* CONFIG_X86_64 */ + +/* + * Clear all 6 debug registers: + */ +static void clear_all_debug_regs(void) +{ + int i; + + for (i = 0; i < 8; i++) { + /* Ignore db4, db5 */ + if ((i == 4) || (i == 5)) + continue; + + set_debugreg(0, i); + } +} /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1007,15 +1075,20 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * A lot of state is already set up in PDA init for 64 bit */ #ifdef CONFIG_X86_64 + void __cpuinit cpu_init(void) { - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; + struct orig_ist *orig_ist; struct task_struct *me; + struct tss_struct *t; + unsigned long v; + int cpu; int i; + cpu = stack_smp_processor_id(); + t = &per_cpu(init_tss, cpu); + orig_ist = &per_cpu(orig_ist, cpu); + #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -1056,19 +1129,17 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ - }; char *estacks = per_cpu(exception_stacks, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { - estacks += sizes[v]; + estacks += exception_stack_sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* * <= is required because the CPU will access up to * 8 bits beyond the end of the IO permission bitmap. @@ -1078,8 +1149,7 @@ void __cpuinit cpu_init(void) atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; - if (me->mm) - BUG(); + BUG_ON(me->mm); enter_lazy_tlb(&init_mm, me); load_sp0(t, ¤t->thread); @@ -1098,17 +1168,7 @@ void __cpuinit cpu_init(void) arch_kgdb_ops.correct_hw_break(); else #endif - { - /* - * Clear all 6 debug registers: - */ - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); - } + clear_all_debug_regs(); fpu_init(); @@ -1129,7 +1189,8 @@ void __cpuinit cpu_init(void) if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); + for (;;) + local_irq_enable(); } printk(KERN_INFO "Initializing CPU#%d\n", cpu); @@ -1145,8 +1206,7 @@ void __cpuinit cpu_init(void) */ atomic_inc(&init_mm.mm_count); curr->active_mm = &init_mm; - if (curr->mm) - BUG(); + BUG_ON(curr->mm); enter_lazy_tlb(&init_mm, curr); load_sp0(t, thread); @@ -1159,13 +1219,7 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); + clear_all_debug_regs(); /* * Force FPU initialization: @@ -1185,6 +1239,4 @@ void __cpuinit cpu_init(void) xsave_init(); } - - #endif diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index de4094a3921..9469ecb5aeb 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -5,15 +5,15 @@ struct cpu_model_info { int vendor; int family; - char *model_names[16]; + const char *model_names[16]; }; /* attempt to consolidate cpu attributes */ struct cpu_dev { - char * c_vendor; + const char * c_vendor; /* some have two possibilities for cpuid string */ - char * c_ident[2]; + const char * c_ident[2]; struct cpu_model_info c_models[4]; @@ -25,11 +25,12 @@ struct cpu_dev { }; #define cpu_dev_register(cpu_devX) \ - static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ __attribute__((__section__(".x86_cpu_dev.init"))) = \ &cpu_devX; -extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; +extern const struct cpu_dev *const __x86_cpu_dev_start[], + *const __x86_cpu_dev_end[]; extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c new file mode 100755 index 00000000000..21c0cf8ced1 --- /dev/null +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -0,0 +1,839 @@ +/* + * CPU x86 architecture debug code + * + * Copyright(C) 2009 Jaswinder Singh Rajput + * + * For licencing details see kernel-base/COPYING + */ + +#include <linux/interrupt.h> +#include <linux/compiler.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/smp.h> + +#include <asm/cpu_debug.h> +#include <asm/paravirt.h> +#include <asm/system.h> +#include <asm/traps.h> +#include <asm/apic.h> +#include <asm/desc.h> + +static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); +static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); +static DEFINE_PER_CPU(unsigned, cpu_modelflag); +static DEFINE_PER_CPU(int, cpu_priv_count); +static DEFINE_PER_CPU(unsigned, cpu_model); + +static DEFINE_MUTEX(cpu_debug_lock); + +static struct dentry *cpu_debugfs_dir; + +static struct cpu_debug_base cpu_base[] = { + { "mc", CPU_MC, 0 }, + { "monitor", CPU_MONITOR, 0 }, + { "time", CPU_TIME, 0 }, + { "pmc", CPU_PMC, 1 }, + { "platform", CPU_PLATFORM, 0 }, + { "apic", CPU_APIC, 0 }, + { "poweron", CPU_POWERON, 0 }, + { "control", CPU_CONTROL, 0 }, + { "features", CPU_FEATURES, 0 }, + { "lastbranch", CPU_LBRANCH, 0 }, + { "bios", CPU_BIOS, 0 }, + { "freq", CPU_FREQ, 0 }, + { "mtrr", CPU_MTRR, 0 }, + { "perf", CPU_PERF, 0 }, + { "cache", CPU_CACHE, 0 }, + { "sysenter", CPU_SYSENTER, 0 }, + { "therm", CPU_THERM, 0 }, + { "misc", CPU_MISC, 0 }, + { "debug", CPU_DEBUG, 0 }, + { "pat", CPU_PAT, 0 }, + { "vmx", CPU_VMX, 0 }, + { "call", CPU_CALL, 0 }, + { "base", CPU_BASE, 0 }, + { "smm", CPU_SMM, 0 }, + { "svm", CPU_SVM, 0 }, + { "osvm", CPU_OSVM, 0 }, + { "tss", CPU_TSS, 0 }, + { "cr", CPU_CR, 0 }, + { "dt", CPU_DT, 0 }, + { "registers", CPU_REG_ALL, 0 }, +}; + +static struct cpu_file_base cpu_file[] = { + { "index", CPU_REG_ALL, 0 }, + { "value", CPU_REG_ALL, 1 }, +}; + +/* Intel Registers Range */ +static struct cpu_debug_range cpu_intel_range[] = { + { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, + { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, + { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, + { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, + { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, + + { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, + { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, + { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, + { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, + + { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, + { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, + { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, + { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, + + { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, + { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, + { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, + { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, + + { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, + { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, + { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, + + { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, + { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, + { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, + { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, + { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, + + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, + { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, + { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, + { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, + { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, + { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, + { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, + { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, + + { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, + { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, + { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, + { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, + { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, + { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, + + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, + + { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, + { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, + { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, + { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, + { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, + { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, + { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, + { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, + + { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, + { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, + { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, + { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, + { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, + { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, + { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, + + { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, + { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, + + { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, + { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, + { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, +}; + +/* AMD Registers Range */ +static struct cpu_debug_range cpu_amd_range[] = { + { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, }, + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, }, + + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, }, + { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, }, + { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, }, + { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, }, + + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, }, + { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, }, + + { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, }, + { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, }, + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, }, + { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, }, + + { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, }, + + { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, }, + { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, }, + { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, }, + { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, }, + { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, }, + { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, }, + { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, }, + { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, }, + { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, }, + { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, }, + { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, }, + { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, }, + { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, }, +}; + + +static int get_cpu_modelflag(unsigned cpu) +{ + int flag; + + switch (per_cpu(cpu_model, cpu)) { + /* Intel */ + case 0x0501: + case 0x0502: + case 0x0504: + flag = CPU_INTEL_PENTIUM; + break; + case 0x0601: + case 0x0603: + case 0x0605: + case 0x0607: + case 0x0608: + case 0x060A: + case 0x060B: + flag = CPU_INTEL_P6; + break; + case 0x0609: + case 0x060D: + flag = CPU_INTEL_PENTIUM_M; + break; + case 0x060E: + flag = CPU_INTEL_CORE; + break; + case 0x060F: + case 0x0617: + flag = CPU_INTEL_CORE2; + break; + case 0x061C: + flag = CPU_INTEL_ATOM; + break; + case 0x0F00: + case 0x0F01: + case 0x0F02: + case 0x0F03: + case 0x0F04: + flag = CPU_INTEL_XEON_P4; + break; + case 0x0F06: + flag = CPU_INTEL_XEON_MP; + break; + default: + flag = CPU_NONE; + break; + } + + return flag; +} + +static int get_cpu_range_count(unsigned cpu) +{ + int index; + + switch (per_cpu(cpu_model, cpu) >> 16) { + case X86_VENDOR_INTEL: + index = ARRAY_SIZE(cpu_intel_range); + break; + case X86_VENDOR_AMD: + index = ARRAY_SIZE(cpu_amd_range); + break; + default: + index = 0; + break; + } + + return index; +} + +static int is_typeflag_valid(unsigned cpu, unsigned flag) +{ + unsigned vendor, modelflag; + int i, index; + + /* Standard Registers should be always valid */ + if (flag >= CPU_TSS) + return 1; + + modelflag = per_cpu(cpu_modelflag, cpu); + vendor = per_cpu(cpu_model, cpu) >> 16; + index = get_cpu_range_count(cpu); + + for (i = 0; i < index; i++) { + switch (vendor) { + case X86_VENDOR_INTEL: + if ((cpu_intel_range[i].model & modelflag) && + (cpu_intel_range[i].flag & flag)) + return 1; + break; + case X86_VENDOR_AMD: + if (cpu_amd_range[i].flag & flag) + return 1; + break; + } + } + + /* Invalid */ + return 0; +} + +static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, + int index, unsigned flag) +{ + unsigned modelflag; + + modelflag = per_cpu(cpu_modelflag, cpu); + *max = 0; + switch (per_cpu(cpu_model, cpu) >> 16) { + case X86_VENDOR_INTEL: + if ((cpu_intel_range[index].model & modelflag) && + (cpu_intel_range[index].flag & flag)) { + *min = cpu_intel_range[index].min; + *max = cpu_intel_range[index].max; + } + break; + case X86_VENDOR_AMD: + if (cpu_amd_range[index].flag & flag) { + *min = cpu_amd_range[index].min; + *max = cpu_amd_range[index].max; + } + break; + } + + return *max; +} + +/* This function can also be called with seq = NULL for printk */ +static void print_cpu_data(struct seq_file *seq, unsigned type, + u32 low, u32 high) +{ + struct cpu_private *priv; + u64 val = high; + + if (seq) { + priv = seq->private; + if (priv->file) { + val = (val << 32) | low; + seq_printf(seq, "0x%llx\n", val); + } else + seq_printf(seq, " %08x: %08x_%08x\n", + type, high, low); + } else + printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); +} + +/* This function can also be called with seq = NULL for printk */ +static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) +{ + unsigned msr, msr_min, msr_max; + struct cpu_private *priv; + u32 low, high; + int i, range; + + if (seq) { + priv = seq->private; + if (priv->file) { + if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, + &low, &high)) + print_cpu_data(seq, priv->reg, low, high); + return; + } + } + + range = get_cpu_range_count(cpu); + + for (i = 0; i < range; i++) { + if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) + continue; + + for (msr = msr_min; msr <= msr_max; msr++) { + if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) + continue; + print_cpu_data(seq, msr, low, high); + } + } +} + +static void print_tss(void *arg) +{ + struct pt_regs *regs = task_pt_regs(current); + struct seq_file *seq = arg; + unsigned int seg; + + seq_printf(seq, " RAX\t: %016lx\n", regs->ax); + seq_printf(seq, " RBX\t: %016lx\n", regs->bx); + seq_printf(seq, " RCX\t: %016lx\n", regs->cx); + seq_printf(seq, " RDX\t: %016lx\n", regs->dx); + + seq_printf(seq, " RSI\t: %016lx\n", regs->si); + seq_printf(seq, " RDI\t: %016lx\n", regs->di); + seq_printf(seq, " RBP\t: %016lx\n", regs->bp); + seq_printf(seq, " ESP\t: %016lx\n", regs->sp); + +#ifdef CONFIG_X86_64 + seq_printf(seq, " R08\t: %016lx\n", regs->r8); + seq_printf(seq, " R09\t: %016lx\n", regs->r9); + seq_printf(seq, " R10\t: %016lx\n", regs->r10); + seq_printf(seq, " R11\t: %016lx\n", regs->r11); + seq_printf(seq, " R12\t: %016lx\n", regs->r12); + seq_printf(seq, " R13\t: %016lx\n", regs->r13); + seq_printf(seq, " R14\t: %016lx\n", regs->r14); + seq_printf(seq, " R15\t: %016lx\n", regs->r15); +#endif + + asm("movl %%cs,%0" : "=r" (seg)); + seq_printf(seq, " CS\t: %04x\n", seg); + asm("movl %%ds,%0" : "=r" (seg)); + seq_printf(seq, " DS\t: %04x\n", seg); + seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); + asm("movl %%es,%0" : "=r" (seg)); + seq_printf(seq, " ES\t: %04x\n", seg); + asm("movl %%fs,%0" : "=r" (seg)); + seq_printf(seq, " FS\t: %04x\n", seg); + asm("movl %%gs,%0" : "=r" (seg)); + seq_printf(seq, " GS\t: %04x\n", seg); + + seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); + + seq_printf(seq, " EIP\t: %016lx\n", regs->ip); +} + +static void print_cr(void *arg) +{ + struct seq_file *seq = arg; + + seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); + seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); + seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); + seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); +#ifdef CONFIG_X86_64 + seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); +#endif +} + +static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) +{ + seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); +} + +static void print_dt(void *seq) +{ + struct desc_ptr dt; + unsigned long ldt; + + /* IDT */ + store_idt((struct desc_ptr *)&dt); + print_desc_ptr("IDT", seq, dt); + + /* GDT */ + store_gdt((struct desc_ptr *)&dt); + print_desc_ptr("GDT", seq, dt); + + /* LDT */ + store_ldt(ldt); + seq_printf(seq, " LDT\t: %016lx\n", ldt); + + /* TR */ + store_tr(ldt); + seq_printf(seq, " TR\t: %016lx\n", ldt); +} + +static void print_dr(void *arg) +{ + struct seq_file *seq = arg; + unsigned long dr; + int i; + + for (i = 0; i < 8; i++) { + /* Ignore db4, db5 */ + if ((i == 4) || (i == 5)) + continue; + get_debugreg(dr, i); + seq_printf(seq, " dr%d\t: %016lx\n", i, dr); + } + + seq_printf(seq, "\n MSR\t:\n"); +} + +static void print_apic(void *arg) +{ + struct seq_file *seq = arg; + +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(seq, " LAPIC\t:\n"); + seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); + seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); + seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); + seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); + seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); + seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); + seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); + seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); + seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); + seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); + seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); + seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); + seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); + seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); + seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); + seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); + seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); + seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); + seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); + seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); + seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); +#endif /* CONFIG_X86_LOCAL_APIC */ + + seq_printf(seq, "\n MSR\t:\n"); +} + +static int cpu_seq_show(struct seq_file *seq, void *v) +{ + struct cpu_private *priv = seq->private; + + if (priv == NULL) + return -EINVAL; + + switch (cpu_base[priv->type].flag) { + case CPU_TSS: + smp_call_function_single(priv->cpu, print_tss, seq, 1); + break; + case CPU_CR: + smp_call_function_single(priv->cpu, print_cr, seq, 1); + break; + case CPU_DT: + smp_call_function_single(priv->cpu, print_dt, seq, 1); + break; + case CPU_DEBUG: + if (priv->file == CPU_INDEX_BIT) + smp_call_function_single(priv->cpu, print_dr, seq, 1); + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + case CPU_APIC: + if (priv->file == CPU_INDEX_BIT) + smp_call_function_single(priv->cpu, print_apic, seq, 1); + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + + default: + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + } + seq_printf(seq, "\n"); + + return 0; +} + +static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) /* One time is enough ;-) */ + return seq; + + return NULL; +} + +static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + + return cpu_seq_start(seq, pos); +} + +static void cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations cpu_seq_ops = { + .start = cpu_seq_start, + .next = cpu_seq_next, + .stop = cpu_seq_stop, + .show = cpu_seq_show, +}; + +static int cpu_seq_open(struct inode *inode, struct file *file) +{ + struct cpu_private *priv = inode->i_private; + struct seq_file *seq; + int err; + + err = seq_open(file, &cpu_seq_ops); + if (!err) { + seq = file->private_data; + seq->private = priv; + } + + return err; +} + +static int write_msr(struct cpu_private *priv, u64 val) +{ + u32 low, high; + + high = (val >> 32) & 0xffffffff; + low = val & 0xffffffff; + + if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) + return 0; + + return -EPERM; +} + +static int write_cpu_register(struct cpu_private *priv, const char *buf) +{ + int ret = -EPERM; + u64 val; + + ret = strict_strtoull(buf, 0, &val); + if (ret < 0) + return ret; + + /* Supporting only MSRs */ + if (priv->type < CPU_TSS_BIT) + return write_msr(priv, val); + + return ret; +} + +static ssize_t cpu_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct cpu_private *priv = seq->private; + char buf[19]; + + if ((priv == NULL) || (count >= sizeof(buf))) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, count)) + return -EFAULT; + + buf[count] = 0; + + if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) + if (!write_cpu_register(priv, buf)) + return count; + + return -EACCES; +} + +static const struct file_operations cpu_fops = { + .owner = THIS_MODULE, + .open = cpu_seq_open, + .read = seq_read, + .write = cpu_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, + unsigned file, struct dentry *dentry) +{ + struct cpu_private *priv = NULL; + + /* Already intialized */ + if (file == CPU_INDEX_BIT) + if (per_cpu(cpu_arr[type].init, cpu)) + return 0; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + priv->cpu = cpu; + priv->type = type; + priv->reg = reg; + priv->file = file; + mutex_lock(&cpu_debug_lock); + per_cpu(priv_arr[type], cpu) = priv; + per_cpu(cpu_priv_count, cpu)++; + mutex_unlock(&cpu_debug_lock); + + if (file) + debugfs_create_file(cpu_file[file].name, S_IRUGO, + dentry, (void *)priv, &cpu_fops); + else { + debugfs_create_file(cpu_base[type].name, S_IRUGO, + per_cpu(cpu_arr[type].dentry, cpu), + (void *)priv, &cpu_fops); + mutex_lock(&cpu_debug_lock); + per_cpu(cpu_arr[type].init, cpu) = 1; + mutex_unlock(&cpu_debug_lock); + } + + return 0; +} + +static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, + struct dentry *dentry) +{ + unsigned file; + int err = 0; + + for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { + err = cpu_create_file(cpu, type, reg, file, dentry); + if (err) + return err; + } + + return err; +} + +static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) +{ + struct dentry *cpu_dentry = NULL; + unsigned reg, reg_min, reg_max; + int i, range, err = 0; + char reg_dir[12]; + u32 low, high; + + range = get_cpu_range_count(cpu); + + for (i = 0; i < range; i++) { + if (!get_cpu_range(cpu, ®_min, ®_max, i, + cpu_base[type].flag)) + continue; + + for (reg = reg_min; reg <= reg_max; reg++) { + if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) + continue; + + sprintf(reg_dir, "0x%x", reg); + cpu_dentry = debugfs_create_dir(reg_dir, dentry); + err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); + if (err) + return err; + } + } + + return err; +} + +static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) +{ + struct dentry *cpu_dentry = NULL; + unsigned type; + int err = 0; + + for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { + if (!is_typeflag_valid(cpu, cpu_base[type].flag)) + continue; + cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); + per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; + + if (type < CPU_TSS_BIT) + err = cpu_init_msr(cpu, type, cpu_dentry); + else + err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, + cpu_dentry); + if (err) + return err; + } + + return err; +} + +static int cpu_init_cpu(void) +{ + struct dentry *cpu_dentry = NULL; + struct cpuinfo_x86 *cpui; + char cpu_dir[12]; + unsigned cpu; + int err = 0; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + cpui = &cpu_data(cpu); + if (!cpu_has(cpui, X86_FEATURE_MSR)) + continue; + per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | + (cpui->x86 << 8) | + (cpui->x86_model)); + per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); + + sprintf(cpu_dir, "cpu%d", cpu); + cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); + err = cpu_init_allreg(cpu, cpu_dentry); + + pr_info("cpu%d(%d) debug files %d\n", + cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); + if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { + pr_err("Register files count %d exceeds limit %d\n", + per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); + per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; + err = -ENFILE; + } + if (err) + return err; + } + + return err; +} + +static int __init cpu_debug_init(void) +{ + cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); + + return cpu_init_cpu(); +} + +static void __exit cpu_debug_exit(void) +{ + int i, cpu; + + if (cpu_debugfs_dir) + debugfs_remove_recursive(cpu_debugfs_dir); + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) + kfree(per_cpu(priv_arr[i], cpu)); +} + +module_init(cpu_debug_init); +module_exit(cpu_debug_exit); + +MODULE_AUTHOR("Jaswinder Singh Rajput"); +MODULE_DESCRIPTION("CPU Debug module"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9..3178c3acd97 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = { .name = "p4-clockmod", .owner = THIS_MODULE, .attr = p4clockmod_attr, - .hide_interface = 1, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ffd0f5ed071..593171e967e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) */ static unsigned char Cx86_dir0_msb __cpuinitdata = 0; -static char Cx86_model[][9] __cpuinitdata = { +static const char __cpuinitconst Cx86_model[][9] = { "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", "M II ", "Unknown" }; -static char Cx486_name[][5] __cpuinitdata = { +static const char __cpuinitconst Cx486_name[][5] = { "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", "SRx2", "DRx2" }; -static char Cx486S_name[][4] __cpuinitdata = { +static const char __cpuinitconst Cx486S_name[][4] = { "S", "S2", "Se", "S2e" }; -static char Cx486D_name[][4] __cpuinitdata = { +static const char __cpuinitconst Cx486D_name[][4] = { "DX", "DX2", "?", "?", "?", "DX4" }; static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __cpuinitdata = "12??43"; -static char cyrix_model_mult2[] __cpuinitdata = "12233445"; +static const char __cpuinitconst cyrix_model_mult1[] = "12??43"; +static const char __cpuinitconst cyrix_model_mult2[] = "12233445"; /* * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old @@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) } } -static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, .c_early_init = early_init_cyrix, @@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { cpu_dev_register(cyrix_cpu_dev); -static struct cpu_dev nsc_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst nsc_cpu_dev = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 191117f1ad5..b09d4eb52bb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -54,6 +54,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = 128; #endif + /* CPUID workaround for 0F33/0F34 CPU */ + if (c->x86 == 0xF && c->x86_model == 0x3 + && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + c->x86_phys_bits = 36; + /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states @@ -410,7 +415,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i } #endif -static struct cpu_dev intel_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst intel_cpu_dev = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7293508d8f5..c471eb1a389 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -32,7 +32,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __cpuinitdata = +static const struct _cache_table __cpuinitconst cache_table[] = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -206,15 +206,15 @@ union l3_cache { unsigned val; }; -static unsigned short assocs[] __cpuinitdata = { +static const unsigned short __cpuinitconst assocs[] = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xa] = 32, [0xb] = 48, [0xc] = 64, [0xf] = 0xffff // ?? }; -static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; -static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; +static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; +static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index bfbd5323a63..ca14604611e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -639,7 +639,7 @@ static void mce_init_timer(void) if (!next_interval) return; setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies_relative(jiffies + next_interval); + t->expires = round_jiffies(jiffies + next_interval); add_timer(t); } @@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies_relative(jiffies + next_interval); + t->expires = round_jiffies(jiffies + next_interval); add_timer_on(t, cpu); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 52b3fefbd5a..bb62b3e5caa 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_early_init = early_init_transmeta, diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index e777f79e096..fd2c37bf7ac 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -8,7 +8,7 @@ * so no special init takes place. */ -static struct cpu_dev umc_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst umc_cpu_dev = { .c_vendor = "UMC", .c_ident = { "UMC UMC UMC" }, .c_models = { diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 169a120587b..87b67e3a765 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, spin_unlock_irqrestore(&ds_lock, irq); - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_resume_pebs(tracer); return tracer; @@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) void ds_exit_thread(struct task_struct *tsk) { - WARN_ON(tsk->thread.ds_ctx); } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 508bec1cee2..95b81c18b6b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -110,19 +110,25 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) /* * Add a memory region to the kernel e820 map. */ -void __init e820_add_region(u64 start, u64 size, int type) +static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, + int type) { - int x = e820.nr_map; + int x = e820x->nr_map; - if (x == ARRAY_SIZE(e820.map)) { + if (x == ARRAY_SIZE(e820x->map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; + e820x->map[x].addr = start; + e820x->map[x].size = size; + e820x->map[x].type = type; + e820x->nr_map++; +} + +void __init e820_add_region(u64 start, u64 size, int type) +{ + __e820_add_region(&e820, start, size, type); } void __init e820_print_map(char *who) @@ -417,11 +423,11 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map) return __append_e820_map(biosmap, nr_map); } -static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, +static u64 __init __e820_update_range(struct e820map *e820x, u64 start, u64 size, unsigned old_type, unsigned new_type) { - int i; + unsigned int i; u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -429,7 +435,7 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, if (size > (ULLONG_MAX - start)) size = ULLONG_MAX - start; - for (i = 0; i < e820.nr_map; i++) { + for (i = 0; i < e820x->nr_map; i++) { struct e820entry *ei = &e820x->map[i]; u64 final_start, final_end; if (ei->type != old_type) @@ -446,10 +452,16 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, final_end = min(start + size, ei->addr + ei->size); if (final_start >= final_end) continue; - e820_add_region(final_start, final_end - final_start, - new_type); + + __e820_add_region(e820x, final_start, final_end - final_start, + new_type); + real_updated_size += final_end - final_start; + /* + * left range could be head or tail, so need to update + * size at first. + */ ei->size -= final_end - final_start; if (ei->addr < final_start) continue; @@ -461,13 +473,13 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return e820_update_range_map(&e820, start, size, old_type, new_type); + return __e820_update_range(&e820, start, size, old_type, new_type); } static u64 __init e820_update_range_saved(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return e820_update_range_map(&e820_saved, start, size, old_type, + return __e820_update_range(&e820_saved, start, size, old_type, new_type); } @@ -1020,8 +1032,8 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) continue; return addr; } - return -1UL; + return -1ULL; } /* @@ -1034,13 +1046,22 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 start; start = startt; - while (size < sizet) + while (size < sizet && (start + 1)) start = find_e820_area_size(start, &size, align); if (size < sizet) return 0; +#ifdef CONFIG_X86_32 + if (start >= MAXMEM) + return 0; + if (start + size > MAXMEM) + size = MAXMEM - start; +#endif + addr = round_down(start + size - sizet, align); + if (addr < start) + return 0; e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); printk(KERN_INFO "update e820 for early_reserve_e820\n"); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 639ad98238a..335f049d110 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void) return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); } -static void dbgp_mdelay(int ms) +static void __init dbgp_mdelay(int ms) { int i; @@ -311,7 +311,7 @@ static void dbgp_set_data(const void *buf, int size) writel(hi, &ehci_debug->data47); } -static void dbgp_get_data(void *buf, int size) +static void __init dbgp_get_data(void *buf, int size) { unsigned char *bytes = buf; u32 lo, hi; @@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, return ret; } -static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, +static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, int size) { u32 pids, addr, ctrl; @@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, return ret; } -static int dbgp_control_msg(unsigned devnum, int requesttype, int request, - int value, int index, void *data, int size) +static int __init dbgp_control_msg(unsigned devnum, int requesttype, + int request, int value, int index, void *data, int size) { u32 pids, addr, ctrl; struct usb_ctrlrequest req; @@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) return 0; } -static int ehci_reset_port(int port) +static int __init ehci_reset_port(int port) { u32 portsc; u32 delay_time, delay; @@ -532,7 +532,7 @@ static int ehci_reset_port(int port) return -EBUSY; } -static int ehci_wait_for_port(int port) +static int __init ehci_wait_for_port(int port) { u32 status; int ret, reps; @@ -557,13 +557,13 @@ static inline void dbgp_printk(const char *fmt, ...) { } typedef void (*set_debug_port_t)(int port); -static void default_set_debug_port(int port) +static void __init default_set_debug_port(int port) { } -static set_debug_port_t set_debug_port = default_set_debug_port; +static set_debug_port_t __initdata set_debug_port = default_set_debug_port; -static void nvidia_set_debug_port(int port) +static void __init nvidia_set_debug_port(int port) { u32 dword; dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 899e8938e79..c929add475c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -442,8 +442,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz sysenter_audit sysenter_do_call: cmpl $(nr_syscalls), %eax @@ -454,7 +453,7 @@ sysenter_do_call: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx + testl $_TIF_ALLWORK_MASK, %ecx jne sysexit_audit sysenter_exit: /* if something modifies registers it must also disable sysexit */ @@ -468,7 +467,7 @@ sysenter_exit: #ifdef CONFIG_AUDITSYSCALL sysenter_audit: - testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry addl $4,%esp CFI_ADJUST_CFA_OFFSET -4 @@ -485,7 +484,7 @@ sysenter_audit: jmp sysenter_do_call sysexit_audit: - testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx jne syscall_exit_work TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) @@ -498,7 +497,7 @@ sysexit_audit: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx jne syscall_exit_work movl PT_EAX(%esp),%eax /* reload syscall return value */ jmp sysenter_exit @@ -523,8 +522,7 @@ ENTRY(system_call) SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -538,7 +536,7 @@ syscall_exit: # between sampling and the iret TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx # current->work + testl $_TIF_ALLWORK_MASK, %ecx # current->work jne syscall_exit_work restore_all: @@ -673,7 +671,7 @@ END(syscall_trace_entry) # perform syscall exit tracing ALIGN syscall_exit_work: - testb $_TIF_WORK_SYSCALL_EXIT, %cl + testl $_TIF_WORK_SYSCALL_EXIT, %ecx jz work_pending TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7ba4621c0df..a331ec38af9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -368,6 +368,7 @@ ENTRY(save_rest) END(save_rest) /* save complete stack frame */ + .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -396,6 +397,7 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) + .popsection /* * A newly forked process directly context switches into this address. @@ -416,7 +418,6 @@ ENTRY(ret_from_fork) GET_THREAD_INFO(%rcx) - CFI_REMEMBER_STATE RESTORE_REST testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? @@ -428,7 +429,6 @@ ENTRY(ret_from_fork) RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call # go to the SYSRET fastpath - CFI_RESTORE_STATE CFI_ENDPROC END(ret_from_fork) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index f5fc8c781a6..e7368c1da01 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -14,12 +14,12 @@ #include <linux/ftrace.h> #include <linux/suspend.h> #include <linux/gfp.h> +#include <linux/io.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> -#include <asm/io.h> #include <asm/apic.h> #include <asm/cpufeature.h> #include <asm/desc.h> @@ -63,7 +63,7 @@ static void load_segments(void) "\tmovl %%eax,%%fs\n" "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" - ::: "eax", "memory"); + : : : "eax", "memory"); #undef STR #undef __STR } @@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image) if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC - /* We need to put APICs in legacy mode so that we can + /* + * We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump * paths already have calls to disable_IO_APIC() in * one form or other. kexec jump path also need @@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); - /* The segment registers are funny things, they have both a + /* + * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the @@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image) * segments, before I zap the gdt with an invalid value. */ load_segments(); - /* The gdt & idt are now invalid. + /* + * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0),0); - set_idt(phys_to_virt(0),0); + set_gdt(phys_to_virt(0), 0); + set_idt(phys_to_virt(0), 0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 6993d51b7fd..89cea4d4467 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -12,11 +12,47 @@ #include <linux/reboot.h> #include <linux/numa.h> #include <linux/ftrace.h> +#include <linux/io.h> +#include <linux/suspend.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> -#include <asm/io.h> + +static int init_one_level2_page(struct kimage *image, pgd_t *pgd, + unsigned long addr) +{ + pud_t *pud; + pmd_t *pmd; + struct page *page; + int result = -ENOMEM; + + addr &= PMD_MASK; + pgd += pgd_index(addr); + if (!pgd_present(*pgd)) { + page = kimage_alloc_control_pages(image, 0); + if (!page) + goto out; + pud = (pud_t *)page_address(page); + memset(pud, 0, PAGE_SIZE); + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + page = kimage_alloc_control_pages(image, 0); + if (!page) + goto out; + pmd = (pmd_t *)page_address(page); + memset(pmd, 0, PAGE_SIZE); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); + result = 0; +out: + return result; +} static void init_level2_page(pmd_t *level2p, unsigned long addr) { @@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, } level3p = (pud_t *)page_address(page); result = init_level3_page(image, level3p, addr, last_addr); - if (result) { + if (result) goto out; - } set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); addr += PGDIR_SIZE; } @@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); if (result) return result; + /* + * image->start may be outside 0 ~ max_pfn, for example when + * jump back to original kernel from kexeced kernel + */ + result = init_one_level2_page(image, level4p, image->start); + if (result) + return result; return init_transition_pgtable(image, level4p); } @@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + int save_ftrace_enabled; - tracer_disable(); +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + save_processor_state(); +#endif + + save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + if (image->preserve_context) { +#ifdef CONFIG_X86_IO_APIC + /* + * We need to put APICs in legacy mode so that we can + * get timer interrupts in second kernel. kexec/kdump + * paths already have calls to disable_IO_APIC() in + * one form or other. kexec jump path also need + * one. + */ + disable_IO_APIC(); +#endif + } + control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, PAGE_SIZE); + memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); + page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); - /* The segment registers are funny things, they have both a + if (image->type == KEXEC_TYPE_DEFAULT) + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) + << PAGE_SHIFT); + + /* + * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the @@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image) * segments, before I zap the gdt with an invalid value. */ load_segments(); - /* The gdt & idt are now invalid. + /* + * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0),0); - set_idt(phys_to_virt(0),0); + set_gdt(phys_to_virt(0), 0); + set_idt(phys_to_virt(0), 0); /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start); + image->start = relocate_kernel((unsigned long)image->head, + (unsigned long)page_list, + image->start, + image->preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + restore_processor_state(); +#endif + + __ftrace_enabled_restore(save_ftrace_enabled); } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 666e43df51f..712d15fdc41 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -226,7 +226,7 @@ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { +static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { { .callback = set_check_enable_amd_mmconf, .ident = "Sun Microsystems Machine", diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e8192401da4..47673e02ae5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -890,12 +890,12 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, #ifdef CONFIG_X86_IO_APIC struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; - printk(KERN_INFO "OLD "); + apic_printk(APIC_VERBOSE, "OLD "); print_MP_intsrc_info(m); i = get_MP_intsrc_index(m); if (i > 0) { assign_to_mpc_intsrc(&mp_irqs[i], m); - printk(KERN_INFO "NEW "); + apic_printk(APIC_VERBOSE, "NEW "); print_mp_irq_info(&mp_irqs[i]); } else if (!i) { /* legacy, do nothing */ @@ -943,7 +943,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, continue; if (nr_m_spare > 0) { - printk(KERN_INFO "*NEW* found "); + apic_printk(APIC_VERBOSE, "*NEW* found\n"); nr_m_spare--; assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); m_spare[nr_m_spare] = NULL; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3d9672e59c1..19378715f41 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -685,9 +685,8 @@ static int ptrace_bts_config(struct task_struct *child, if (!cfg.signal) return -EINVAL; - return -EOPNOTSUPP; - child->thread.bts_ovfl_signal = cfg.signal; + return -EOPNOTSUPP; } if ((cfg.flags & PTRACE_BTS_O_ALLOC) && diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 309949e9e1c..6a5a2970f4c 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void) if (!force_hpet_address) return; - if (rcba_base == NULL) - BUG(); + BUG_ON(rcba_base == NULL); /* read the Function Disable register, dword mode only */ val = readl(rcba_base + 0x3404); diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 2064d0aa8d2..41235531b11 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -17,7 +17,8 @@ #define PTR(x) (x << 2) -/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE +/* + * control_page + KEXEC_CONTROL_CODE_MAX_SIZE * ~ control_page + PAGE_SIZE are used as data storage and stack for * jumping back */ @@ -76,8 +77,10 @@ relocate_kernel: movl %eax, CP_PA_SWAP_PAGE(%edi) movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) - /* get physical address of control page now */ - /* this is impossible after page table switch */ + /* + * get physical address of control page now + * this is impossible after page table switch + */ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi /* switch to new set of page tables */ @@ -97,7 +100,8 @@ identity_mapped: /* store the start address on the stack */ pushl %edx - /* Set cr0 to a known state: + /* + * Set cr0 to a known state: * - Paging disabled * - Alignment check disabled * - Write protect disabled @@ -113,7 +117,8 @@ identity_mapped: /* clear cr4 if applicable */ testl %ecx, %ecx jz 1f - /* Set cr4 to a known state: + /* + * Set cr4 to a known state: * Setting everything to zero seems safe. */ xorl %eax, %eax @@ -132,15 +137,18 @@ identity_mapped: call swap_pages addl $8, %esp - /* To be certain of avoiding problems with self-modifying code + /* + * To be certain of avoiding problems with self-modifying code * I need to execute a serializing instruction here. * So I flush the TLB, it's handy, and not processor dependent. */ xorl %eax, %eax movl %eax, %cr3 - /* set all of the registers to known values */ - /* leave %esp alone */ + /* + * set all of the registers to known values + * leave %esp alone + */ testl %esi, %esi jnz 1f diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index d32cfb27a47..4de8f5b3d47 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -19,29 +19,77 @@ #define PTR(x) (x << 3) #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +/* + * control_page + KEXEC_CONTROL_CODE_MAX_SIZE + * ~ control_page + PAGE_SIZE are used as data storage and stack for + * jumping back + */ +#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) + +/* Minimal CPU state */ +#define RSP DATA(0x0) +#define CR0 DATA(0x8) +#define CR3 DATA(0x10) +#define CR4 DATA(0x18) + +/* other data */ +#define CP_PA_TABLE_PAGE DATA(0x20) +#define CP_PA_SWAP_PAGE DATA(0x28) +#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) + .text .align PAGE_SIZE .code64 .globl relocate_kernel relocate_kernel: - /* %rdi indirection_page + /* + * %rdi indirection_page * %rsi page_list * %rdx start address + * %rcx preserve_context */ + /* Save the CPU context, used for jumping back */ + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushf + + movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 + movq %rsp, RSP(%r11) + movq %cr0, %rax + movq %rax, CR0(%r11) + movq %cr3, %rax + movq %rax, CR3(%r11) + movq %cr4, %rax + movq %rax, CR4(%r11) + /* zero out flags, and disable interrupts */ pushq $0 popfq - /* get physical address of control page now */ - /* this is impossible after page table switch */ + /* + * get physical address of control page now + * this is impossible after page table switch + */ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 /* get physical address of page table now too */ - movq PTR(PA_TABLE_PAGE)(%rsi), %rcx + movq PTR(PA_TABLE_PAGE)(%rsi), %r9 + + /* get physical address of swap page now */ + movq PTR(PA_SWAP_PAGE)(%rsi), %r10 + + /* save some information for jumping back */ + movq %r9, CP_PA_TABLE_PAGE(%r11) + movq %r10, CP_PA_SWAP_PAGE(%r11) + movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) /* Switch to the identity mapped page tables */ - movq %rcx, %cr3 + movq %r9, %cr3 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%r8), %rsp @@ -55,7 +103,8 @@ identity_mapped: /* store the start address on the stack */ pushq %rdx - /* Set cr0 to a known state: + /* + * Set cr0 to a known state: * - Paging enabled * - Alignment check disabled * - Write protect disabled @@ -68,7 +117,8 @@ identity_mapped: orl $(X86_CR0_PG | X86_CR0_PE), %eax movq %rax, %cr0 - /* Set cr4 to a known state: + /* + * Set cr4 to a known state: * - physical address extension enabled */ movq $X86_CR4_PAE, %rax @@ -78,9 +128,87 @@ identity_mapped: 1: /* Flush the TLB (needed?) */ - movq %rcx, %cr3 + movq %r9, %cr3 + + movq %rcx, %r11 + call swap_pages + + /* + * To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB by reloading %cr3 here, it's handy, + * and not processor dependent. + */ + movq %cr3, %rax + movq %rax, %cr3 + + /* + * set all of the registers to known values + * leave %rsp alone + */ + + testq %r11, %r11 + jnz 1f + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rsi, %rsi + xorq %rdi, %rdi + xorq %rbp, %rbp + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r9 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + + ret + +1: + popq %rdx + leaq PAGE_SIZE(%r10), %rsp + call *%rdx + + /* get the re-entry point of the peer system */ + movq 0(%rsp), %rbp + call 1f +1: + popq %r8 + subq $(1b - relocate_kernel), %r8 + movq CP_PA_SWAP_PAGE(%r8), %r10 + movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi + movq CP_PA_TABLE_PAGE(%r8), %rax + movq %rax, %cr3 + lea PAGE_SIZE(%r8), %rsp + call swap_pages + movq $virtual_mapped, %rax + pushq %rax + ret + +virtual_mapped: + movq RSP(%r8), %rsp + movq CR4(%r8), %rax + movq %rax, %cr4 + movq CR3(%r8), %rax + movq CR0(%r8), %r8 + movq %rax, %cr3 + movq %r8, %cr0 + movq %rbp, %rax + + popf + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret /* Do the copies */ +swap_pages: movq %rdi, %rcx /* Put the page_list in %rcx */ xorq %rdi, %rdi xorq %rsi, %rsi @@ -112,36 +240,27 @@ identity_mapped: movq %rcx, %rsi /* For ever source page do a copy */ andq $0xfffffffffffff000, %rsi + movq %rdi, %rdx + movq %rsi, %rax + + movq %r10, %rdi movq $512, %rcx rep ; movsq - jmp 0b -3: - - /* To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB by reloading %cr3 here, it's handy, - * and not processor dependent. - */ - movq %cr3, %rax - movq %rax, %cr3 - /* set all of the registers to known values */ - /* leave %rsp alone */ + movq %rax, %rdi + movq %rdx, %rsi + movq $512, %rcx + rep ; movsq - xorq %rax, %rax - xorq %rbx, %rbx - xorq %rcx, %rcx - xorq %rdx, %rdx - xorq %rsi, %rsi - xorq %rdi, %rdi - xorq %rbp, %rbp - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r9 - xorq %r11, %r11 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 + movq %rdx, %rdi + movq %r10, %rsi + movq $512, %rcx + rep ; movsq + lea PAGE_SIZE(%rax), %rsi + jmp 0b +3: ret + + .globl kexec_control_code_size +.set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c29f301d388..400331b50a5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -42,6 +42,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/* + * On x86_64 symbols referenced from code should be reachable using + * 32bit relocations. Reserve space for static percpu variables in + * modules so that they are always served from the first chunk which + * is located at the percpu segment base. On x86_32, anything can + * address anywhere. No need to reserve space in the first chunk. + */ +#ifdef CONFIG_X86_64 +#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE +#else +#define PERCPU_FIRST_CHUNK_RESERVE 0 +#endif + /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -141,7 +154,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; pg_data_t *last; - size_t ptrs_size; + size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; @@ -169,12 +182,14 @@ proceed: * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } + dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); @@ -217,8 +232,9 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, - pcpur_size - static_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, vm.addr, NULL); goto out_free_ar; enomem: @@ -241,24 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using - * bootmem allocator and used as-is without being mapped into vmalloc - * area. This enables the first chunk to piggy back on the linear - * physical PMD mapping and doesn't add any additional pressure to - * TLB. + * module and dynamic reserves and embedded into linear physical + * mapping so that it can use PMD mapping without additional TLB + * pressure. */ -static void *pcpue_ptr __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size - + ((size_t)pageno << PAGE_SHIFT)); -} - static ssize_t __init setup_pcpu_embed(size_t static_size) { - unsigned int cpu; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; /* * If large page isn't supported, there's no benefit in doing @@ -268,26 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) if (!cpu_has_pse || pcpu_need_numa()) return -EINVAL; - /* allocate and copy */ - pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); - pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, - PAGE_SIZE); - if (!pcpue_ptr) - return -ENOMEM; - - for_each_possible_cpu(cpu) - memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, - static_size); - - /* we're ready, commit */ - pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); - - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - pcpue_unit_size, - pcpue_unit_size - static_size, pcpue_ptr, - NULL); + return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); } /* @@ -344,8 +331,9 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, - pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, -1, + -1, NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f04549afcfe..d038b9c45cf 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, int locals = 0; struct bau_desc *bau_desc; - WARN_ON(!in_atomic()); - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 191a876e9e8..31ffc24eec4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = { static irqreturn_t piix4_master_intr(int irq, void *dev_id) { int realirq; - irq_desc_t *desc; + struct irq_desc *desc; unsigned long flags; spin_lock_irqsave(&i8259A_lock, flags); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fbfced6f680..5bf54e40c6e 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ASSERT((per_cpu__irq_stack_union == 0), "irq_stack_union is not at start of per-cpu area"); #endif + +#ifdef CONFIG_KEXEC +#include <asm/kexec.h> + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index f3a5305b8ad..9fe4ddaa8f6 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -348,6 +348,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * flush_tlb_user() for both user and kernel mappings unless * the Page Global Enable (PGE) feature bit is set. */ *dx |= 0x00002000; + /* We also lie, and say we're family id 5. 6 or greater + * leads to a rdmsr in early_init_intel which we can't handle. + * Family ID is returned as bits 8-12 in ax. */ + *ax &= 0xFFFFF0FF; + *ax |= 0x00000500; break; case 0x80000000: /* Futureproof this a little: if they ask how much extended @@ -594,19 +599,21 @@ static void __init lguest_init_IRQ(void) /* Some systems map "vectors" to interrupts weirdly. Lguest has * a straightforward 1 to 1 mapping, so force that here. */ __get_cpu_var(vector_irq)[vector] = i; - if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, - interrupt[vector-FIRST_EXTERNAL_VECTOR]); - set_irq_chip_and_handler_name(i, &lguest_irq_controller, - handle_level_irq, - "level"); - } + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); } /* This call is required to set up for 4k stacks, where we have * separate stacks for hard and soft interrupts. */ irq_ctx_init(smp_processor_id()); } +void lguest_setup_irq(unsigned int irq) +{ + irq_to_desc_alloc_cpu(irq, 0); + set_irq_chip_and_handler_name(irq, &lguest_irq_controller, + handle_level_irq, "level"); +} + /* * Time. * diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c22981fa2f3..ad5441ed1b5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,30 +1,38 @@ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> -#include <asm/dwarf2.h> + #include <asm/cpufeature.h> +#include <asm/dwarf2.h> /* * memcpy - Copy a memory block. * - * Input: - * rdi destination - * rsi source - * rdx count - * + * Input: + * rdi destination + * rsi source + * rdx count + * * Output: * rax original destination - */ + */ +/* + * memcpy_c() - fast string ops (REP MOVSQ) based variant. + * + * Calls to this get patched into the kernel image via the + * alternative instructions framework: + */ ALIGN memcpy_c: CFI_STARTPROC - movq %rdi,%rax - movl %edx,%ecx - shrl $3,%ecx - andl $7,%edx + movq %rdi, %rax + + movl %edx, %ecx + shrl $3, %ecx + andl $7, %edx rep movsq - movl %edx,%ecx + movl %edx, %ecx rep movsb ret CFI_ENDPROC @@ -33,99 +41,110 @@ ENDPROC(memcpy_c) ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC - pushq %rbx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbx, 0 - movq %rdi,%rax - movl %edx,%ecx - shrl $6,%ecx + /* + * Put the number of full 64-byte blocks into %ecx. + * Tail portion is handled at the end: + */ + movq %rdi, %rax + movl %edx, %ecx + shrl $6, %ecx jz .Lhandle_tail .p2align 4 .Lloop_64: + /* + * We decrement the loop index here - and the zero-flag is + * checked at the end of the loop (instructions inbetween do + * not change the zero flag): + */ decl %ecx - movq (%rsi),%r11 - movq 8(%rsi),%r8 + /* + * Move in blocks of 4x16 bytes: + */ + movq 0*8(%rsi), %r11 + movq 1*8(%rsi), %r8 + movq %r11, 0*8(%rdi) + movq %r8, 1*8(%rdi) - movq %r11,(%rdi) - movq %r8,1*8(%rdi) + movq 2*8(%rsi), %r9 + movq 3*8(%rsi), %r10 + movq %r9, 2*8(%rdi) + movq %r10, 3*8(%rdi) - movq 2*8(%rsi),%r9 - movq 3*8(%rsi),%r10 + movq 4*8(%rsi), %r11 + movq 5*8(%rsi), %r8 + movq %r11, 4*8(%rdi) + movq %r8, 5*8(%rdi) - movq %r9,2*8(%rdi) - movq %r10,3*8(%rdi) + movq 6*8(%rsi), %r9 + movq 7*8(%rsi), %r10 + movq %r9, 6*8(%rdi) + movq %r10, 7*8(%rdi) - movq 4*8(%rsi),%r11 - movq 5*8(%rsi),%r8 + leaq 64(%rsi), %rsi + leaq 64(%rdi), %rdi - movq %r11,4*8(%rdi) - movq %r8,5*8(%rdi) - - movq 6*8(%rsi),%r9 - movq 7*8(%rsi),%r10 - - movq %r9,6*8(%rdi) - movq %r10,7*8(%rdi) - - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi jnz .Lloop_64 .Lhandle_tail: - movl %edx,%ecx - andl $63,%ecx - shrl $3,%ecx + movl %edx, %ecx + andl $63, %ecx + shrl $3, %ecx jz .Lhandle_7 + .p2align 4 .Lloop_8: decl %ecx - movq (%rsi),%r8 - movq %r8,(%rdi) - leaq 8(%rdi),%rdi - leaq 8(%rsi),%rsi + movq (%rsi), %r8 + movq %r8, (%rdi) + leaq 8(%rdi), %rdi + leaq 8(%rsi), %rsi jnz .Lloop_8 .Lhandle_7: - movl %edx,%ecx - andl $7,%ecx - jz .Lende + movl %edx, %ecx + andl $7, %ecx + jz .Lend + .p2align 4 .Lloop_1: - movb (%rsi),%r8b - movb %r8b,(%rdi) + movb (%rsi), %r8b + movb %r8b, (%rdi) incq %rdi incq %rsi decl %ecx jnz .Lloop_1 -.Lende: - popq %rbx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rbx +.Lend: ret -.Lfinal: CFI_ENDPROC ENDPROC(memcpy) ENDPROC(__memcpy) - /* Some CPUs run faster using the string copy instructions. - It is also a lot simpler. Use this when possible */ + /* + * Some CPUs run faster using the string copy instructions. + * It is also a lot simpler. Use this when possible: + */ - .section .altinstr_replacement,"ax" + .section .altinstr_replacement, "ax" 1: .byte 0xeb /* jmp <disp8> */ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 2: .previous - .section .altinstructions,"a" + + .section .altinstructions, "a" .align 8 .quad memcpy .quad 1b .byte X86_FEATURE_REP_GOOD - /* Replace only beginning, memcpy is used to apply alternatives, so it - * is silly to overwrite itself with nops - reboot is only outcome... */ + + /* + * Replace only beginning, memcpy is used to apply alternatives, + * so it is silly to overwrite itself with nops - reboot is the + * only outcome... + */ .byte 2b - 1b .byte 2b - 1b .previous diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d11745334a6..522db5e3d0b 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,22 +121,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -/* This is the same as kmap_atomic() but can map memory that doesn't +/* + * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) { - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, kmap_prot); } EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6d63e3d1253..fd3da1dda1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -94,9 +94,9 @@ struct map_range { #define NR_RANGE_MR 5 #endif -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) +static int __meminit save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) { if (start_pfn < end_pfn) { if (nr_range >= NR_RANGE_MR) @@ -134,8 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, { unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long ret = 0; unsigned long pos; - unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2966c6b8d30..db81e9a8556 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,11 +806,6 @@ static unsigned long __init setup_node_bootmem(int nodeid, { unsigned long bootmap_size; - if (start_pfn > max_low_pfn) - return bootmap; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; - /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap >> PAGE_SHIFT, @@ -843,13 +838,23 @@ void __init setup_bootmem_allocator(void) max_pfn_mapped<<PAGE_SHIFT); printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); + for_each_online_node(nodeid) { + unsigned long start_pfn, end_pfn; + #ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nodeid) - bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid], - node_end_pfn[nodeid], bootmap); + start_pfn = node_start_pfn[nodeid]; + end_pfn = node_end_pfn[nodeid]; + if (start_pfn > max_low_pfn) + continue; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; #else - bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); + start_pfn = 0; + end_pfn = max_low_pfn; #endif + bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, + bootmap); + } after_bootmem = 1; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8a853bc3b28..54efa57d1c0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -85,7 +85,7 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int do_not_nx __cpuinitdata; +static int disable_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; + disable_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; + disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) + if (!(efer & EFER_NX) || disable_nx) __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d42ff4..6e60ba698ce 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -18,6 +18,7 @@ #include <asm/iomap.h> #include <asm/pat.h> +#include <asm/highmem.h> #include <linux/module.h> int is_io_mapping_possible(resource_size_t base, unsigned long size) @@ -31,16 +32,27 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) } EXPORT_SYMBOL_GPL(is_io_mapping_possible); -/* Map 'pfn' using fixed map 'type' and protections 'prot' - */ -void * -iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +} + +/* + * Map 'pfn' using fixed map 'type' and protections 'prot' + */ +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the @@ -50,12 +62,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf08..55e127f71ed 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,13 +22,17 @@ #include <asm/pgalloc.h> #include <asm/pat.h> -#ifdef CONFIG_X86_64 - -static inline int phys_addr_valid(unsigned long addr) +static inline int phys_addr_valid(resource_size_t addr) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif } +#ifdef CONFIG_X86_64 + unsigned long __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { @@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid); #else -static inline int phys_addr_valid(unsigned long addr) -{ - return 1; -} - #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { @@ -87,6 +86,8 @@ bool __virt_addr_valid(unsigned long x) return false; if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; + if (x >= FIXADDR_START) + return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); @@ -486,7 +487,12 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; +#define __FIXADDR_TOP (-PAGE_SIZE) +static pte_t bm_pte[(__fix_to_virt(FIX_DBGP_BASE) + ^ __fix_to_virt(FIX_BTMAP_BEGIN)) >> PMD_SHIFT + ? PAGE_SIZE / sizeof(pte_t) : 0] __page_aligned_bss; +#undef __FIXADDR_TOP +static __initdata pte_t *bm_ptep; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -501,19 +507,33 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) static inline pte_t * __init early_ioremap_pte(unsigned long addr) { + if (!sizeof(bm_pte)) + return &bm_ptep[pte_index(addr)]; return &bm_pte[pte_index(addr)]; } +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + void __init early_ioremap_init(void) { pmd_t *pmd; + int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + if (sizeof(bm_pte)) { + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + } else { + bm_ptep = pte_offset_kernel(pmd, 0); + if (early_ioremap_debug) + printk(KERN_INFO "bm_ptep=%p\n", bm_ptep); + } /* * The boot-ioremap range spans multiple pmds, for which @@ -577,6 +597,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) { int count = 0; @@ -598,7 +619,8 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem * +__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -664,9 +686,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); return prev_map[slot]; } @@ -734,8 +756,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9a..4f115e00486 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_warning("kmmio: spurious debug trap on CPU %d.\n", + pr_debug("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d03..605c8be0621 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + return 0; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8253bc97587..9c4294986af 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -522,6 +522,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) * primary protection behavior: */ __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); + + /* + * Intel Atom errata AAH41 workaround. + * + * The real fix should be in hw or in a microcode update, but + * we also probabilistically try to reduce the window of having + * a large TLB mixed with 4K TLBs while instruction fetches are + * going on. + */ + __flush_tlb_all(); + base = NULL; out_unlock: diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2ed37158012..640339ee4fb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. + * reserve_pfn_range() doesn't support RAM pages. Maintain the current + * behavior with RAM pages by returning success. */ if (is_ram != 0) - return -EINVAL; + return 0; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 82d22fc601a..8c362b96b64 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -90,7 +90,7 @@ static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = { +static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = { /* * Systems where PCI IO resource ISA alignment can be skipped * when the ISA enable bit in the bridge control is not set @@ -183,7 +183,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d) } #endif -static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { +static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { #ifdef __i386__ /* * Laptops which need pci=assign-busses to see Cardbus cards diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 7d388d5cf54..9c49919e4d1 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -356,7 +356,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); -static struct dmi_system_id __devinitdata msi_k8t_dmi_table[] = { +static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { { .ident = "MSI-K8T-Neo2Fir", .matches = { @@ -413,7 +413,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, */ static u16 toshiba_line_size; -static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { +static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = { { .ident = "Toshiba PS5 based laptop", .matches = { |