diff options
Diffstat (limited to 'arch/x86')
149 files changed, 11259 insertions, 6492 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bf07b6f50fa..112afd368c7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64 || (X86_SMP && !X86_VOYAGER) + def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP @@ -230,6 +230,27 @@ config SMP If you don't know what to do here, say N. +config X86_FIND_SMP_CONFIG + def_bool y + depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS + depends on X86_32 + +if ACPI +config X86_MPPARSE + def_bool y + bool "Enable MPS table" + depends on X86_LOCAL_APIC && !X86_VISWS + help + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it +endif + +if !ACPI +config X86_MPPARSE + def_bool y + depends on X86_LOCAL_APIC && !X86_VISWS +endif + choice prompt "Subarchitecture Type" default X86_PC @@ -251,7 +272,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) + depends on X86_32 && (SMP || BROKEN) && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -261,39 +282,9 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 - select NUMA - help - This option is used for getting Linux to run on a (IBM/Sequent) NUMA - multiquad box. This changes the way that processors are bootstrapped, - and uses Clustered Logical APIC addressing mode instead of Flat Logical. - You will need a new lynxer.elf file to flash your firmware with - send - email to <Martin.Bligh@us.ibm.com>. - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - - If you don't have one of these computers, you should say N here. - If you want to build a NUMA kernel, you must select ACPI. - -config X86_BIGSMP - bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - - If you don't have such a system, you should say N here. - config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 + depends on X86_32 && !PCI help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -304,12 +295,33 @@ config X86_VISWS and vice versa. See <file:Documentation/sgi-visws.txt> for details. config X86_GENERICARCH - bool "Generic architecture (Summit, bigsmp, ES7000, default)" + bool "Generic architecture" depends on X86_32 help - This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. - It is intended for a generic binary kernel. - If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +if X86_GENERICARCH + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on SMP && X86_32 && PCI && X86_MPPARSE + select NUMA + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to <Martin.Bligh@us.ibm.com>. + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" @@ -317,8 +329,15 @@ config X86_ES7000 help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. - Only choose this option if you have such a system, otherwise you - should say N here. + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + +endif config X86_RDC321X bool "RDC R-321x SoC" @@ -337,7 +356,7 @@ config X86_RDC321X config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT - depends on X86_64 + depends on X86_64 && !PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -417,38 +436,22 @@ config PARAVIRT_CLOCK endif -config MEMTEST_BOOTPARAM - bool "Memtest boot parameter" +config MEMTEST + bool "Memtest" depends on X86_64 default y help This option adds a kernel parameter 'memtest', which allows memtest - to be disabled at boot. If this option is selected, memtest - functionality can be disabled with memtest=0 on the kernel - command line. The purpose of this option is to allow a single - kernel image to be distributed with memtest built in, but not - necessarily enabled. - + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer Y. -config MEMTEST_BOOTPARAM_VALUE - int "Memtest boot parameter default value (0-4)" - depends on MEMTEST_BOOTPARAM - range 0 4 - default 0 - help - This option sets the default value for the kernel parameter - 'memtest', which allows memtest to be disabled at boot. If this - option is set to 0 (zero), the memtest kernel parameter will - default to 0, disabling memtest at bootup. If this option is - set to 4, the memtest kernel parameter will default to 4, - enabling memtest at bootup, and use that as pattern number. - - If you are unsure how to answer this question, answer 0. - config ACPI_SRAT def_bool y - depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && ACPI && NUMA && X86_GENERICARCH select ACPI_NUMA config HAVE_ARCH_PARSE_SRAT @@ -457,11 +460,11 @@ config HAVE_ARCH_PARSE_SRAT config X86_SUMMIT_NUMA def_bool y - depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && NUMA && X86_GENERICARCH config X86_CYCLONE_TIMER def_bool y - depends on X86_32 && X86_SUMMIT || X86_GENERICARCH + depends on X86_GENERICARCH config ES7000_CLUSTERED_APIC def_bool y @@ -549,6 +552,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. +config AMD_IOMMU + bool "AMD IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && ACPI + help + With this option you can enable support for AMD IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an AMD IOMMU you + can isolate the the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + + You can find out if your system has an AMD IOMMU if you look into + your BIOS for an option to enable it or if you have an IVRS ACPI + table. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -561,20 +579,35 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) +config MAXSMP + bool "Configure Maximum number of SMP Processors and NUMA Nodes" + depends on X86_64 && SMP + default n + help + Configure maximum number of CPUS and NUMA Nodes for this architecture. + If unsure, say N. + +if MAXSMP +config NR_CPUS + int + default "4096" +endif +if !MAXSMP config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 + int "Maximum number of CPUs (2-4096)" + range 2 4096 depends on SMP default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 default "8" help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 255 and the + kernel will support. The maximum supported value is 4096 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +endif config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -911,9 +944,9 @@ config X86_PAE config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC - default y if (X86_NUMAQ || X86_SUMMIT) + default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the @@ -965,13 +998,25 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. +if MAXSMP + +config NODES_SHIFT + int + default "9" +endif + +if !MAXSMP config NODES_SHIFT - int "Max num nodes shift(1-9)" - range 1 9 if X86_64 + int "Maximum NUMA Nodes (as a power of 2)" + range 1 9 if X86_64 default "6" if X86_64 default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accomodate various tables. +endif config HAVE_ARCH_BOOTMEM_NODE def_bool y @@ -1090,6 +1135,40 @@ config MTRR See <file:Documentation/mtrr.txt> for more information. +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so some X driver + could add WB entries. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots). + + Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size + could be used to send largest mtrr entry size for continuous block + to hold holes (aka. UC entries) + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup default value + +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + help + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr= + config X86_PAT bool prompt "x86 PAT support" @@ -1475,8 +1554,7 @@ endmenu menu "Bus options (PCI etc.)" config PCI - bool "PCI support" if !X86_VISWS && !X86_VSMP - depends on !X86_VOYAGER + bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301849a..3d22bb8175b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -399,6 +399,10 @@ config X86_TSC def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 + # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 18363374d51..acc0271920f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -20,6 +20,14 @@ config NONPROMISC_DEVMEM If in doubt, say Y. +config X86_VERBOSE_BOOTUP + bool "Enable verbose x86 bootup info messages" + default y + help + Enables the informational output from the decompression stage + (e.g. bzImage) of the boot. If you disable this you will still + see errors. Disable this if you want silent bootup. + config EARLY_PRINTK bool "Early printk" if EMBEDDED default y @@ -60,7 +68,7 @@ config DEBUG_PAGEALLOC config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL - depends on X86_64_SMP + depends on X86_SMP default n help Say Y to verify that the per_cpu map being accessed has @@ -129,15 +137,6 @@ config 4KSTACKS on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_LOCAL_APIC || X86_VOYAGER - depends on X86_32 - -config X86_MPPARSE - def_bool y - depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf..b03d24b44bf 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ -# NUMAQ subarch support -mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq -mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/ - -# BIGSMP subarch support -mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp -mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/ - -#Summit subarch support -mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit -mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/ - # generic subarchitecture mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ - -# ES7000 subarch support -mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 -fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ -mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/ - # RDC R-321x subarch support mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ @@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y) head-y := arch/x86/kernel/head_$(BITS).o head-y += arch/x86/kernel/head$(BITS).o +head-y += arch/x86/kernel/head.o head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ @@ -210,12 +193,12 @@ all: bzImage # KBUILD_IMAGE specify target image being built KBUILD_IMAGE := $(boot)/bzImage -zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage +zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage zImage bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot - $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ compressed: zImage diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index e01aafd03bd..4063d630def 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -1,7 +1,7 @@ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -95,6 +95,9 @@ static void enable_a20_kbc(void) outb(0xdf, 0x60); /* A20 on */ empty_8042(); + + outb(0xff, 0x64); /* Null command, but UHCI wants it */ + empty_8042(); } static void enable_a20_fast(void) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d8819efac81..1d5dff4123e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -30,6 +30,7 @@ #include <asm/page.h> #include <asm/boot.h> #include <asm/msr.h> +#include <asm/processor-flags.h> #include <asm/asm-offsets.h> .section ".text.head" @@ -109,7 +110,7 @@ startup_32: /* Enable PAE mode */ xorl %eax, %eax - orl $(1 << 5), %eax + orl $(X86_CR4_PAE), %eax movl %eax, %cr4 /* @@ -170,7 +171,7 @@ startup_32: pushl %eax /* Enter paged protected Mode, activating Long Mode */ - movl $0x80000001, %eax /* Enable Paging and Protected mode */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ movl %eax, %cr0 /* Jump from 32bit compatibility mode into 64bit mode. */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 90456cee47c..bc5553b496f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -30,6 +30,7 @@ #include <asm/io.h> #include <asm/page.h> #include <asm/boot.h> +#include <asm/bootparam.h> /* WARNING!! * This code is compiled with -fPIC and it is relocated dynamically @@ -187,13 +188,8 @@ static void gzip_release(void **); /* * This is set up by the setup-routine at boot-time */ -static unsigned char *real_mode; /* Pointer to real-mode data */ - -#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) -#ifndef STANDARD_MEMORY_BIOS_CALL -#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) -#endif -#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) +static struct boot_params *real_mode; /* Pointer to real-mode data */ +static int quiet; extern unsigned char input_data[]; extern int input_len; @@ -206,7 +202,8 @@ static void free(void *where); static void *memset(void *s, int c, unsigned n); static void *memcpy(void *dest, const void *src, unsigned n); -static void putstr(const char *); +static void __putstr(int, const char *); +#define putstr(__x) __putstr(0, __x) #ifdef CONFIG_X86_64 #define memptr long @@ -221,10 +218,6 @@ static char *vidmem; static int vidport; static int lines, cols; -#ifdef CONFIG_X86_NUMAQ -void *xquad_portio; -#endif - #include "../../../../lib/inflate.c" static void *malloc(int size) @@ -270,18 +263,24 @@ static void scroll(void) vidmem[i] = ' '; } -static void putstr(const char *s) +static void __putstr(int error, const char *s) { int x, y, pos; char c; +#ifndef CONFIG_X86_VERBOSE_BOOTUP + if (!error) + return; +#endif + #ifdef CONFIG_X86_32 - if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) + if (real_mode->screen_info.orig_video_mode == 0 && + lines == 0 && cols == 0) return; #endif - x = RM_SCREEN_INFO.orig_x; - y = RM_SCREEN_INFO.orig_y; + x = real_mode->screen_info.orig_x; + y = real_mode->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -302,8 +301,8 @@ static void putstr(const char *s) } } - RM_SCREEN_INFO.orig_x = x; - RM_SCREEN_INFO.orig_y = y; + real_mode->screen_info.orig_x = x; + real_mode->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -366,9 +365,9 @@ static void flush_window(void) static void error(char *x) { - putstr("\n\n"); - putstr(x); - putstr("\n\n -- System halted"); + __putstr(1, "\n\n"); + __putstr(1, x); + __putstr(1, "\n\n -- System halted"); while (1) asm("hlt"); @@ -395,7 +394,8 @@ static void parse_elf(void *output) return; } - putstr("Parsing ELF... "); + if (!quiet) + putstr("Parsing ELF... "); phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); if (!phdrs) @@ -430,7 +430,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; - if (RM_SCREEN_INFO.orig_video_mode == 7) { + if (real_mode->hdr.loadflags & QUIET_FLAG) + quiet = 1; + + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -438,8 +441,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = RM_SCREEN_INFO.orig_video_lines; - cols = RM_SCREEN_INFO.orig_video_cols; + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ @@ -465,9 +468,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, #endif makecrc(); - putstr("\nDecompressing Linux... "); + if (!quiet) + putstr("\nDecompressing Linux... "); gunzip(); parse_elf(output); - putstr("done.\nBooting the kernel.\n"); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index edaadea90aa..a1310c52fc0 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -10,16 +10,20 @@ #define USE_BSD #include <endian.h> -#define MAX_SHDRS 100 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; -static Elf32_Shdr shdr[MAX_SHDRS]; -static Elf32_Sym *symtab[MAX_SHDRS]; -static Elf32_Rel *reltab[MAX_SHDRS]; -static char *strtab[MAX_SHDRS]; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; +struct section { + Elf32_Shdr shdr; + struct section *link; + Elf32_Sym *symtab; + Elf32_Rel *reltab; + char *strtab; +}; +static struct section *secs; + /* * Following symbols have been audited. There values are constant and do * not change if bzImage is loaded at a different physical address than @@ -35,7 +39,7 @@ static int is_safe_abs_reloc(const char* sym_name) { int i; - for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { + for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { if (!strcmp(sym_name, safe_abs_relocs[i])) /* Match found */ return 1; @@ -137,10 +141,10 @@ static const char *sec_name(unsigned shndx) { const char *sec_strtab; const char *name; - sec_strtab = strtab[ehdr.e_shstrndx]; + sec_strtab = secs[ehdr.e_shstrndx].strtab; name = "<noname>"; if (shndx < ehdr.e_shnum) { - name = sec_strtab + shdr[shndx].sh_name; + name = sec_strtab + secs[shndx].shdr.sh_name; } else if (shndx == SHN_ABS) { name = "ABSOLUTE"; @@ -159,7 +163,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) name = sym_strtab + sym->st_name; } else { - name = sec_name(shdr[sym->st_shndx].sh_name); + name = sec_name(secs[sym->st_shndx].shdr.sh_name); } return name; } @@ -244,29 +248,34 @@ static void read_ehdr(FILE *fp) static void read_shdrs(FILE *fp) { int i; - if (ehdr.e_shnum > MAX_SHDRS) { - die("%d section headers supported: %d\n", - ehdr.e_shnum, MAX_SHDRS); + Elf32_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) { + die("Unable to allocate %d section headers\n", + ehdr.e_shnum); } if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); } - if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { - die("Cannot read ELF section headers: %s\n", - strerror(errno)); - } - for(i = 0; i < ehdr.e_shnum; i++) { - shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); - shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); - shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); - shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); - shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); - shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); - shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); - shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); - shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); - shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (fread(&shdr, sizeof shdr, 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; } } @@ -274,20 +283,22 @@ static void read_shdrs(FILE *fp) static void read_strtabs(FILE *fp) { int i; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_STRTAB) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_STRTAB) { continue; } - strtab[i] = malloc(shdr[i].sh_size); - if (!strtab[i]) { + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) { die("malloc of %d bytes for strtab failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } @@ -297,28 +308,31 @@ static void read_strtabs(FILE *fp) static void read_symtabs(FILE *fp) { int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_SYMTAB) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - symtab[i] = malloc(shdr[i].sh_size); - if (!symtab[i]) { + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) { die("malloc of %d bytes for symtab failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { - symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); - symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); - symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); - symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym = &sec->symtab[j]; + sym->st_name = elf32_to_cpu(sym->st_name); + sym->st_value = elf32_to_cpu(sym->st_value); + sym->st_size = elf32_to_cpu(sym->st_size); + sym->st_shndx = elf16_to_cpu(sym->st_shndx); } } } @@ -327,26 +341,29 @@ static void read_symtabs(FILE *fp) static void read_relocs(FILE *fp) { int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_REL) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { continue; } - reltab[i] = malloc(shdr[i].sh_size); - if (!reltab[i]) { + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) { die("malloc of %d bytes for relocs failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { - reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); - reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; + rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_info = elf32_to_cpu(rel->r_info); } } } @@ -357,19 +374,21 @@ static void print_absolute_symbols(void) int i; printf("Absolute symbols\n"); printf(" Num: Value Size Type Bind Visibility Name\n"); - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; char *sym_strtab; Elf32_Sym *sh_symtab; int j; - if (shdr[i].sh_type != SHT_SYMTAB) { + + if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - sh_symtab = symtab[i]; - sym_strtab = strtab[shdr[i].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + sh_symtab = sec->symtab; + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { Elf32_Sym *sym; const char *name; - sym = &symtab[i][j]; + sym = &sec->symtab[j]; name = sym_name(sym_strtab, sym); if (sym->st_shndx != SHN_ABS) { continue; @@ -389,26 +408,27 @@ static void print_absolute_relocs(void) { int i, printed = 0; - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + struct section *sec_applies, *sec_symtab; char *sym_strtab; Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; int j; - if (shdr[i].sh_type != SHT_REL) { + if (sec->shdr.sh_type != SHT_REL) { continue; } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; const char *name; - rel = &reltab[i][j]; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; name = sym_name(sym_strtab, sym); if (sym->st_shndx != SHN_ABS) { @@ -456,26 +476,28 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) { int i; /* Walk through the relocations */ - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { char *sym_strtab; Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; + struct section *sec_applies, *sec_symtab; int j; - if (shdr[i].sh_type != SHT_REL) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL) { continue; } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + sh_symtab = sec_symtab->symtab; + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; unsigned r_type; - rel = &reltab[i][j]; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); /* Don't visit relocations to absolute symbols */ @@ -539,7 +561,7 @@ static void emit_relocs(int as_text) */ printf(".section \".data.reloc\",\"a\"\n"); printf(".balign 4\n"); - for(i = 0; i < reloc_count; i++) { + for (i = 0; i < reloc_count; i++) { printf("\t .long 0x%08lx\n", relocs[i]); } printf("\n"); @@ -550,7 +572,7 @@ static void emit_relocs(int as_text) /* Print a stop */ printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); /* Now print each relocation */ - for(i = 0; i < reloc_count; i++) { + for (i = 0; i < reloc_count; i++) { buf[0] = (relocs[i] >> 0) & 0xff; buf[1] = (relocs[i] >> 8) & 0xff; buf[2] = (relocs[i] >> 16) & 0xff; @@ -577,7 +599,7 @@ int main(int argc, char **argv) show_absolute_relocs = 0; as_text = 0; fname = NULL; - for(i = 1; i < argc; i++) { + for (i = 1; i < argc; i++) { char *arg = argv[i]; if (*arg == '-') { if (strcmp(argv[1], "--abs-syms") == 0) { diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 00e19edd852..92d6fd73dc7 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -28,6 +28,8 @@ static char *cpu_name(int level) if (level == 64) { return "x86-64"; } else { + if (level == 15) + level = 6; sprintf(buf, "i%d86", level); return buf; } diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 77569a4a3be..2296164b54d 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -165,6 +165,10 @@ void main(void) /* Set the video mode */ set_video(); + /* Parse command line for 'quiet' and pass it to decompressor. */ + if (cmdline_find_option_bool("quiet")) + boot_params.hdr.loadflags |= QUIET_FLAG; + /* Do the last things and invoke protected mode */ go_to_protected_mode(); } diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32eb429..53165c97336 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -13,6 +13,7 @@ */ #include "boot.h" +#include <linux/kernel.h> #define SMAP 0x534d4150 /* ASCII "SMAP" */ @@ -53,7 +54,7 @@ static int detect_memory_e820(void) count++; desc++; - } while (next && count < E820MAX); + } while (next && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index ab049d40a88..141b6e20ed3 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -33,6 +33,8 @@ protected_mode_jump: movw %cs, %bx shll $4, %ebx addl %ebx, 2f + jmp 1f # Short jump to serialize on 386/486 +1: movw $__BOOT_DS, %cx movw $__BOOT_TSS, %di @@ -40,8 +42,6 @@ protected_mode_jump: movl %cr0, %edx orb $X86_CR0_PE, %dl # Protected mode movl %edx, %cr0 - jmp 1f # Short jump to serialize on 386/486 -1: # Transition to 32-bit mode .byte 0x66, 0xea # ljmpl opcode diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 40ecb8d7688..b939cb476de 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -259,8 +259,7 @@ static int vga_probe(void) return mode_count[adapter]; } -__videocard video_vga = -{ +__videocard video_vga = { .card_name = "VGA", .probe = vga_probe, .set_mode = vga_set_mode, diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index ad7ddaaff58..9bc34e2033e 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,54 +1,103 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:59:02 2008 # +# CONFIG_64BIT is not set CONFIG_X86_32=y +# CONFIG_X86_64 is not set +CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y -CONFIG_X86=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_DMI=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +# CONFIG_GENERIC_TIME_VSYSCALL is not set +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_ZONE_DMA32 is not set +CONFIG_ARCH_POPULATES_NODE_MAP=y +# CONFIG_AUDIT_ARCH is not set +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_32_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -56,13 +105,15 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -76,6 +127,17 @@ CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -87,10 +149,10 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LBD is not set +CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_LSF is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_BSG=y # # IO Schedulers @@ -103,7 +165,8 @@ CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features @@ -111,18 +174,21 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y -# CONFIG_X86_PC is not set +CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_NUMAQ is not set # CONFIG_X86_SUMMIT is not set # CONFIG_X86_BIGSMP is not set # CONFIG_X86_VISWS is not set -CONFIG_X86_GENERICARCH=y +# CONFIG_X86_GENERICARCH is not set # CONFIG_X86_ES7000 is not set -# CONFIG_PARAVIRT is not set -CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_X86_RDC321X is not set +# CONFIG_X86_VSMP is not set +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +# CONFIG_PARAVIRT_GUEST is not set # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -130,9 +196,8 @@ CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M586MMX is not set # CONFIG_M686 is not set # CONFIG_MPENTIUMII is not set -CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set -# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -147,14 +212,14 @@ CONFIG_MPENTIUMIII=y # CONFIG_MCYRIXIII is not set # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set -CONFIG_X86_GENERIC=y +# CONFIG_MPSC is not set +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CPU=y CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_XADD=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y CONFIG_X86_BSWAP=y @@ -162,106 +227,120 @@ CONFIG_X86_POPAD_OK=y CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_X86_MINIMUM_CPU_FAMILY=6 +CONFIG_X86_DEBUGCTLMSR=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y -CONFIG_NR_CPUS=32 -CONFIG_SCHED_SMT=y +CONFIG_DMI=y +# CONFIG_IOMMU_HELPER is not set +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_NONFATAL=y -CONFIG_X86_MCE_P4THERMAL=y +# CONFIG_X86_MCE is not set CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set # CONFIG_X86_REBOOTFIXUPS is not set -CONFIG_MICROCODE=y -CONFIG_MICROCODE_OLD_INTERFACE=y +# CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -# CONFIG_DELL_RBU is not set -# CONFIG_DCDBAS is not set -CONFIG_DMIID=y # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y -CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_NEED_NODE_MEMMAP_SIZE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y -CONFIG_NR_QUICK=1 CONFIG_VIRT_TO_BUS=y # CONFIG_HIGHPTE is not set # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y -# CONFIG_EFI is not set +# CONFIG_X86_PAT is not set +CONFIG_EFI=y # CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x100000 -# CONFIG_HOTPLUG_CPU is not set -CONFIG_COMPAT_VDSO=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # -# Power management options (ACPI, APM) +# Power management options # CONFIG_PM=y -CONFIG_PM_LEGACY=y -# CONFIG_PM_DEBUG is not set - -# -# ACPI (Advanced Configuration and Power Interface) Support -# +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set -CONFIG_ACPI_BLACKLIST_YEAR=2001 -CONFIG_ACPI_DEBUG=y +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y -# CONFIG_ACPI_CONTAINER is not set +CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_SBS is not set # CONFIG_APM is not set @@ -271,15 +350,17 @@ CONFIG_X86_PM_TIMER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers @@ -287,8 +368,7 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_X86_ACPI_CPUFREQ=y # CONFIG_X86_POWERNOW_K6 is not set # CONFIG_X86_POWERNOW_K7 is not set -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_POWERNOW_K8 is not set # CONFIG_X86_GX_SUSPMOD is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_X86_SPEEDSTEP_ICH is not set @@ -302,43 +382,72 @@ CONFIG_X86_POWERNOW_K8_ACPI=y # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # -# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# Bus options (PCI etc.) # CONFIG_PCI=y # CONFIG_PCI_GOBIOS is not set # CONFIG_PCI_GOMMCONFIG is not set # CONFIG_PCI_GODIRECT is not set CONFIG_PCI_GOANY=y +# CONFIG_PCI_GOOLPC is not set CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y -# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_DOMAINS=y +CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set +CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set +CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set # CONFIG_SCx200 is not set +# CONFIG_OLPC is not set CONFIG_K8_NB=y - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set - -# -# Executable file formats +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_IBM is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# Executable file formats / Emulations # CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_AOUT is not set -# CONFIG_BINFMT_MISC is not set +CONFIG_BINFMT_MISC=y # # Networking @@ -349,59 +458,142 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set +CONFIG_XFRM_USER=y # CONFIG_XFRM_SUB_POLICY is not set # CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set CONFIG_INET_TUNNEL=y -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set CONFIG_INET6_XFRM_MODE_TRANSPORT=y CONFIG_INET6_XFRM_MODE_TUNNEL=y -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -409,6 +601,7 @@ CONFIG_IPV6_SIT=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -416,28 +609,99 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -449,13 +713,15 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -466,7 +732,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -479,8 +745,8 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -489,73 +755,17 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_BLK_DEV_ATIIXP is not set -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_CS5535 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -# CONFIG_BLK_DEV_PDC202XX_NEW is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set # # SCSI device support @@ -564,8 +774,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -574,7 +784,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -582,7 +792,7 @@ CONFIG_CHR_DEV_SG=y # Some SCSI devices (e.g. CD jukebox) support multiple LUNs # # CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set +CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set CONFIG_SCSI_WAIT_SCAN=m @@ -591,81 +801,37 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -CONFIG_BLK_DEV_3W_XXXX_RAID=y -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_DEBUG_ENABLE=y -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set CONFIG_ATA_PIIX=y # CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y +# CONFIG_SATA_NV is not set # CONFIG_PDC_ADMA is not set # CONFIG_SATA_QSTOR is not set # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y -# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y +# CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set +CONFIG_PATA_AMD=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set @@ -673,6 +839,7 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_CS5520 is not set # CONFIG_PATA_CS5530 is not set # CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CS5536 is not set # CONFIG_PATA_CYPRESS is not set # CONFIG_PATA_EFAR is not set # CONFIG_ATA_GENERIC is not set @@ -686,11 +853,14 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RZ1000 is not set @@ -702,65 +872,42 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -770,38 +917,40 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -814,34 +963,75 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -CONFIG_R8169=y -# CONFIG_R8169_NAPI is not set +# CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set -# CONFIG_TR is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_IBMLS is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -850,16 +1040,27 @@ CONFIG_NETDEV_10000=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set +# CONFIG_PCMCIA_IBMTR is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -870,18 +1071,17 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -906,17 +1106,63 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_WISTRON_BTNS is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -929,8 +1175,26 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -940,9 +1204,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -951,89 +1220,275 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set CONFIG_HW_RANDOM_GEODE=y CONFIG_HW_RANDOM_VIA=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set -CONFIG_AGP=y -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_ATI is not set -# CONFIG_AGP_AMD is not set -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_NVIDIA is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_EFFICEON is not set -# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set # CONFIG_NSC_GPIO is not set # CONFIG_CS5535_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set - -# -# SPI support -# +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_SBC7240_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_AGP=y +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_I810 is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_CYBLA is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -1043,33 +1498,167 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SIS7019 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -1077,18 +1666,18 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -1121,8 +1710,10 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_LIBUSUAL is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1134,10 +1725,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1163,90 +1750,125 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # -# USB DSL modem support +# LED drivers # +# CONFIG_LEDS_CLEVO_MAIL is not set # -# USB Gadget Support +# LED Triggers # -# CONFIG_USB_GADGET is not set -# CONFIG_MMC is not set +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_EDAC=y # -# LED devices +# Reporting subsystems # -# CONFIG_NEW_LEDS is not set +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set # -# LED drivers +# RTC interfaces # +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# LED Triggers +# I2C RTC drivers # -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set # -# Real Time Clock +# SPI RTC drivers # -# CONFIG_RTC_CLASS is not set # -# DMA Engine support +# Platform RTC drivers # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set # -# DMA Clients +# on-CPU RTC drivers # +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set +# CONFIG_INTEL_IOATDMA is not set +# CONFIG_UIO is not set # -# Userspace I/O +# Firmware Drivers # -# CONFIG_UIO is not set +# CONFIG_EDD is not set +CONFIG_EFI_VARS=y +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1256,8 +1878,8 @@ CONFIG_GENERIC_ACL=y # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set +CONFIG_JOLIET=y +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1275,13 +1897,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1289,6 +1911,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1296,33 +1919,15 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1332,14 +1937,26 @@ CONFIG_SUNRPC=y # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1374,37 +1991,33 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set -CONFIG_INSTRUMENTATION=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y # # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y -# CONFIG_DEBUG_FS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1419,48 +2032,174 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_RODATA is not set +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_X86_PTDUMP is not set +CONFIG_DEBUG_RODATA=y +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m # CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_DOUBLEFAULT=y +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_586 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_LGUEST is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_X86_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_BIOS_REBOOT=y -CONFIG_X86_TRAMPOLINE=y -CONFIG_KTIME_SCALAR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2d6f5b2809d..ae5124e064d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,64 +1,103 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:59:57 2008 # -CONFIG_X86_64=y CONFIG_64BIT=y +# CONFIG_X86_32 is not set +CONFIG_X86_64=y CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_ZONE_DMA32=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y -CONFIG_NR_QUICK=2 -CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_DMI=y -CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ZONE_DMA32=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_AUDIT_ARCH=y +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_64_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +# CONFIG_KTIME_SCALAR is not set # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -66,13 +105,15 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -82,9 +123,21 @@ CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y # CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -96,14 +149,15 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_BSG=y +CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_AS is not set @@ -111,107 +165,177 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features # +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set +# CONFIG_PARAVIRT_GUEST is not set +CONFIG_MEMTEST_BOOTPARAM=y +CONFIG_MEMTEST_BOOTPARAM_VALUE=0 +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set # CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 -CONFIG_X86_TSC=y +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +CONFIG_X86_CPU=y +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_GOOD_APIC=y -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=y -CONFIG_X86_CPUID=y -CONFIG_X86_HT=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_SCHED_SMT=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=64 +CONFIG_X86_DEBUGCTLMSR=y +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_DMI=y +CONFIG_GART_IOMMU=y +CONFIG_CALGARY_IOMMU=y +CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y +CONFIG_SWIOTLB=y +CONFIG_IOMMU_HELPER=y +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +# CONFIG_X86_MCE is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y CONFIG_NUMA=y CONFIG_K8_NUMA=y -CONFIG_NODES_SHIFT=6 CONFIG_X86_64_ACPI_NUMA=y -CONFIG_NUMA_EMU=y +CONFIG_NODES_SPAN_OTHER_NODES=y +# CONFIG_NUMA_EMU is not set +CONFIG_NODES_SHIFT=6 +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_VMEMMAP=y + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y -CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y -CONFIG_NR_CPUS=32 -CONFIG_PHYSICAL_ALIGN=0x200000 -CONFIG_HOTPLUG_CPU=y -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_GART_IOMMU=y -# CONFIG_CALGARY_IOMMU is not set -CONFIG_SWIOTLB=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_START=0x200000 +CONFIG_MTRR=y +# CONFIG_X86_PAT is not set +CONFIG_EFI=y CONFIG_SECCOMP=y -# CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_K8_NB=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_ISA_DMA_API=y -CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # # Power management options # +CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y -# CONFIG_PM_LEGACY is not set -# CONFIG_PM_DEBUG is not set +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" - -# -# ACPI (Advanced Configuration and Power Interface) Support -# CONFIG_ACPI=y CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SLEEP_PROC_FS=y -CONFIG_ACPI_SLEEP_PROC_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set +# CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y @@ -227,29 +351,34 @@ CONFIG_ACPI_CONTAINER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers # -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y -# CONFIG_X86_SPEEDSTEP_CENTRINO is not set CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K8 is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_P4_CLOCKMOD is not set # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # # Bus options (PCI etc.) @@ -257,27 +386,56 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_DOMAINS=y +CONFIG_DMAR=y +CONFIG_DMAR_GFX_WA=y +CONFIG_DMAR_FLOPPY_WA=y CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +CONFIG_K8_NB=y +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set # # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set +CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y -CONFIG_IA32_AOUT=y +# CONFIG_IA32_AOUT is not set CONFIG_COMPAT=y +CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y # @@ -289,22 +447,31 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set @@ -313,31 +480,109 @@ CONFIG_INET_TUNNEL=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET6_XFRM_MODE_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -345,6 +590,7 @@ CONFIG_IPV6_SIT=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -352,28 +598,99 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -385,13 +702,15 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -402,7 +721,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -415,8 +734,8 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -425,72 +744,16 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_ATIIXP=y -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set # # SCSI device support @@ -499,8 +762,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -509,7 +772,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -526,73 +789,37 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set -CONFIG_SCSI_SAS_ATTRS=y +# CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set CONFIG_ATA_PIIX=y # CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y +# CONFIG_SATA_NV is not set # CONFIG_PDC_ADMA is not set # CONFIG_SATA_QSTOR is not set # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y -# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y +# CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set +CONFIG_PATA_AMD=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set @@ -612,11 +839,14 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RZ1000 is not set @@ -628,65 +858,42 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set -CONFIG_TUN=y +# CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -696,39 +903,40 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set -CONFIG_AMD8111_ETH=y -# CONFIG_AMD8111E_NAPI is not set +# CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -740,34 +948,74 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set -# CONFIG_SKY2 is not set +CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set -CONFIG_S2IO=m -# CONFIG_S2IO_NAPI is not set +# CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set -# CONFIG_TR is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -776,16 +1024,26 @@ CONFIG_S2IO=m # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -796,18 +1054,17 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -832,17 +1089,62 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -855,8 +1157,26 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -866,9 +1186,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -877,78 +1202,260 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set - -# -# SPI support -# +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -958,33 +1465,165 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -992,18 +1631,18 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -1036,8 +1675,10 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_LIBUSUAL is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1049,10 +1690,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1078,98 +1715,126 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # -# USB DSL modem support +# LED drivers # +# CONFIG_LEDS_CLEVO_MAIL is not set # -# USB Gadget Support +# LED Triggers # -# CONFIG_USB_GADGET is not set -# CONFIG_MMC is not set +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_EDAC=y # -# LED devices +# Reporting subsystems # -# CONFIG_NEW_LEDS is not set +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set # -# LED drivers +# RTC interfaces # +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# LED Triggers +# I2C RTC drivers # -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set # -# Real Time Clock +# SPI RTC drivers # -# CONFIG_RTC_CLASS is not set # -# DMA Engine support +# Platform RTC drivers # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set # -# DMA Clients +# on-CPU RTC drivers # +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set - -# -# Userspace I/O -# +# CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set # # Firmware Drivers # # CONFIG_EDD is not set +CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1180,7 +1845,7 @@ CONFIG_GENERIC_ACL=y # CONFIG_ISO9660_FS=y CONFIG_JOLIET=y -# CONFIG_ZISOFS is not set +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1198,13 +1863,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1212,6 +1877,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1219,33 +1885,15 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1255,14 +1903,26 @@ CONFIG_SUNRPC=y # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1297,40 +1957,33 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set # -# Instrumentation Support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y - -# # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y +# CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1344,28 +1997,162 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set -# CONFIG_DEBUG_RODATA is not set -# CONFIG_IOMMU_DEBUG is not set +# CONFIG_LATENCYTOP is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set +CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_X86_PTDUMP is not set +CONFIG_DEBUG_RODATA=y +# CONFIG_DIRECT_GBPAGES is not set +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m +CONFIG_X86_MPPARSE=y +# CONFIG_IOMMU_DEBUG is not set +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_X86_64 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_X86_64 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_X86_64 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index b5e329da166..3aefbce2de4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -370,13 +370,11 @@ quiet_ni_syscall: PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx - PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx PTREGSCALL stub32_execve, sys32_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_clone, sys32_clone, %rdx PTREGSCALL stub32_vfork, sys_vfork, %rdi PTREGSCALL stub32_iopl, sys_iopl, %rsi - PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx ENTRY(ia32_ptregs_common) popq %r11 @@ -476,7 +474,7 @@ ia32_sys_call_table: .quad sys_ssetmask .quad sys_setreuid16 /* 70 */ .quad sys_setregid16 - .quad stub32_sigsuspend + .quad sys32_sigsuspend .quad compat_sys_sigpending .quad sys_sethostname .quad compat_sys_setrlimit /* 75 */ @@ -583,7 +581,7 @@ ia32_sys_call_table: .quad sys32_rt_sigpending .quad compat_sys_rt_sigtimedwait .quad sys32_rt_sigqueueinfo - .quad stub32_rt_sigsuspend + .quad sys_rt_sigsuspend .quad sys32_pread /* 180 */ .quad sys32_pwrite .quad sys_chown16 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4078e98f012..9a71be82792 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds +extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) @@ -22,10 +22,9 @@ obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += bootflag.o e820_$(BITS).o +obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o -obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o @@ -53,7 +52,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o +obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o @@ -100,6 +99,7 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o + obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ff1a7b49a46..6516359922b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -83,6 +83,8 @@ int acpi_lapic; int acpi_ioapic; int acpi_strict; +static int disable_irq0_through_ioapic __initdata; + u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; int acpi_skip_timer_override __initdata; @@ -338,8 +340,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC -struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -858,6 +858,372 @@ static int __init acpi_parse_madt_lapic_entries(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +#define MP_ISA_BUS 0 + +#ifdef CONFIG_X86_ES7000 +extern int es7000_plat; +#endif + +static struct { + int apic_id; + int gsi_base; + int gsi_end; + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; + +static int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_ioapic_routing[i].gsi_base) + && (gsi <= mp_ioapic_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +static u8 __init uniq_ioapic_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mp_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mp_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} + +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].mp_type = MP_IOAPIC; + mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mp_apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 + mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); +#else + mp_ioapics[idx].mp_apicver = 0; +#endif + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].gsi_base = gsi_base; + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, + mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; +} + +static void assign_to_mp_irq(struct mp_config_intsrc *m, + struct mp_config_intsrc *mp_irq) +{ + memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); +} + +static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, + struct mp_config_intsrc *m) +{ + return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); +} + +static void save_mp_irq(struct mp_config_intsrc *m) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +{ + int ioapic; + int pin; + struct mp_config_intsrc mp_irq; + + /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ + if (bus_irq == 0 && disable_irq0_through_ioapic) + return; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_irqflag = (trigger << 2) | polarity; + mp_irq.mp_srcbus = MP_ISA_BUS; + mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ + mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ + mp_irq.mp_dstirq = pin; /* INTIN# */ + + save_mp_irq(&mp_irq); +} + +void __init mp_config_acpi_legacy_irqs(void) +{ + int i; + int ioapic; + unsigned int dstapic; + struct mp_config_intsrc mp_irq; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif + set_bit(MP_ISA_BUS, mp_bus_not_pci); + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + +#ifdef CONFIG_X86_ES7000 + /* + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) + return; +#endif + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + dstapic = mp_ioapics[ioapic].mp_apicid; + + /* + * Use the default configuration for the IRQs 0-15. Unless + * overridden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + int idx; + + /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ + if (i == 0 && disable_irq0_through_ioapic) + continue; + + for (idx = 0; idx < mp_irq_entries; idx++) { + struct mp_config_intsrc *irq = mp_irqs + idx; + + /* Do we already have a mapping for this ISA IRQ? */ + if (irq->mp_srcbus == MP_ISA_BUS + && irq->mp_srcbusirq == i) + break; + + /* Do we already have a mapping for this IOAPIC pin */ + if (irq->mp_dstapic == dstapic && + irq->mp_dstirq == i) + break; + } + + if (idx != mp_irq_entries) { + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); + continue; /* IRQ already used */ + } + + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqflag = 0; /* Conforming */ + mp_irq.mp_srcbus = MP_ISA_BUS; + mp_irq.mp_dstapic = dstapic; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_srcbusirq = i; /* Identity mapped */ + mp_irq.mp_dstirq = i; + + save_mp_irq(&mp_irq); + } +} + +int mp_register_gsi(u32 gsi, int triggering, int polarity) +{ + int ioapic; + int ioapic_pin; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + + static int pci_irq = IRQ_COMPRESSION_START; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; +#else + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +#endif + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + +#ifdef CONFIG_X86_32 + if (ioapic_renumber_irq) + gsi = ioapic_renumber_irq(ioapic, gsi); +#endif + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + return gsi; + } + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else + return gsi; +#endif + } + + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); +#ifdef CONFIG_X86_32 + /* + * For GSI >= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } +#endif + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; +} + +int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity) +{ +#ifdef CONFIG_X86_MPPARSE + struct mp_config_intsrc mp_irq; + int ioapic; + + if (!acpi_ioapic) + return 0; + + /* print the entry should happen on mptable identically */ + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.mp_srcbus = number; + mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + + save_mp_irq(&mp_irq); +#endif + return 0; +} + /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error @@ -1059,6 +1425,17 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) } /* + * Don't register any I/O APIC entries for the 8254 timer IRQ. + */ +static int __init +dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d) +{ + pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident); + disable_irq0_through_ioapic = 1; + return 0; +} + +/* * If your system is blacklisted here, but you find that acpi=force * works for you, please contact acpi-devel@sourceforge.net */ @@ -1225,6 +1602,32 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * HP laptops which use a DSDT reporting as HP/SB400/10000, + * which includes some code which overrides all temperature + * trip points to 16C if the INTIN2 input of the I/O APIC + * is enabled. This input is incorrectly designated the + * ISA IRQ 0 via an interrupt source override even though + * it is wired to the output of the master 8259A and INTIN0 + * is not connected at all. Abandon any attempts to route + * IRQ 0 through the I/O APIC therefore. + */ + { + .callback = dmi_disable_irq0_through_ioapic, + .ident = "HP NX6125 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"), + }, + }, + { + .callback = dmi_disable_irq0_through_ioapic, + .ident = "HP NX6325 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), + }, + }, {} }; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c new file mode 100644 index 00000000000..f2766d84c7a --- /dev/null +++ b/arch/x86/kernel/amd_iommu.c @@ -0,0 +1,962 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/bitops.h> +#include <linux/scatterlist.h> +#include <linux/iommu-helper.h> +#include <asm/proto.h> +#include <asm/gart.h> +#include <asm/amd_iommu_types.h> +#include <asm/amd_iommu.h> + +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) + +#define to_pages(addr, size) \ + (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) + +static DEFINE_RWLOCK(amd_iommu_devtable_lock); + +struct command { + u32 data[4]; +}; + +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e); + +static int iommu_has_npcache(struct amd_iommu *iommu) +{ + return iommu->cap & IOMMU_CAP_NPCACHE; +} + +static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +{ + u32 tail, head; + u8 *target; + + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + target = (iommu->cmd_buf + tail); + memcpy_toio(target, cmd, sizeof(*cmd)); + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + if (tail == head) + return -ENOMEM; + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + return 0; +} + +static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&iommu->lock, flags); + ret = __iommu_queue_command(iommu, cmd); + spin_unlock_irqrestore(&iommu->lock, flags); + + return ret; +} + +static int iommu_completion_wait(struct amd_iommu *iommu) +{ + int ret; + struct command cmd; + volatile u64 ready = 0; + unsigned long ready_phys = virt_to_phys(&ready); + + memset(&cmd, 0, sizeof(cmd)); + cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; + cmd.data[1] = HIGH_U32(ready_phys); + cmd.data[2] = 1; /* value written to 'ready' */ + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + + iommu->need_sync = 0; + + ret = iommu_queue_command(iommu, &cmd); + + if (ret) + return ret; + + while (!ready) + cpu_relax(); + + return 0; +} + +static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) +{ + struct command cmd; + + BUG_ON(iommu == NULL); + + memset(&cmd, 0, sizeof(cmd)); + CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); + cmd.data[0] = devid; + + iommu->need_sync = 1; + + return iommu_queue_command(iommu, &cmd); +} + +static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, + u64 address, u16 domid, int pde, int s) +{ + struct command cmd; + + memset(&cmd, 0, sizeof(cmd)); + address &= PAGE_MASK; + CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); + cmd.data[1] |= domid; + cmd.data[2] = LOW_U32(address); + cmd.data[3] = HIGH_U32(address); + if (s) + cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) + cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + + iommu->need_sync = 1; + + return iommu_queue_command(iommu, &cmd); +} + +static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, + u64 address, size_t size) +{ + int s = 0; + unsigned pages = to_pages(address, size); + + address &= PAGE_MASK; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); + + return 0; +} + +static int iommu_map(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot) +{ + u64 __pte, *pte, *page; + + bus_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(bus_addr); + + /* only support 512GB address spaces for now */ + if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + *pte = IOMMU_L2_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + *pte = IOMMU_L1_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)]; + + if (IOMMU_PTE_PRESENT(*pte)) + return -EBUSY; + + __pte = phys_addr | IOMMU_PTE_P; + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + *pte = __pte; + + return 0; +} + +static int iommu_for_unity_map(struct amd_iommu *iommu, + struct unity_map_entry *entry) +{ + u16 bdf, i; + + for (i = entry->devid_start; i <= entry->devid_end; ++i) { + bdf = amd_iommu_alias_table[i]; + if (amd_iommu_rlookup_table[bdf] == iommu) + return 1; + } + + return 0; +} + +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e) +{ + u64 addr; + int ret; + + for (addr = e->address_start; addr < e->address_end; + addr += PAGE_SIZE) { + ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); + if (ret) + return ret; + /* + * if unity mapping is in aperture range mark the page + * as allocated in the aperture + */ + if (addr < dma_dom->aperture_size) + __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); + } + + return 0; +} + +static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, + u16 devid) +{ + struct unity_map_entry *e; + int ret; + + list_for_each_entry(e, &amd_iommu_unity_map, list) { + if (!(devid >= e->devid_start && devid <= e->devid_end)) + continue; + ret = dma_ops_unity_map(dma_dom, e); + if (ret) + return ret; + } + + return 0; +} + +static unsigned long dma_mask_to_pages(unsigned long mask) +{ + return (mask >> PAGE_SHIFT) + + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); +} + +static unsigned long dma_ops_alloc_addresses(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages) +{ + unsigned long limit = dma_mask_to_pages(*dev->dma_mask); + unsigned long address; + unsigned long size = dom->aperture_size >> PAGE_SHIFT; + unsigned long boundary_size; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + limit = limit < size ? limit : size; + + if (dom->next_bit >= limit) + dom->next_bit = 0; + + address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, + 0 , boundary_size, 0); + if (address == -1) + address = iommu_area_alloc(dom->bitmap, limit, 0, pages, + 0, boundary_size, 0); + + if (likely(address != -1)) { + dom->next_bit = address + pages; + address <<= PAGE_SHIFT; + } else + address = bad_dma_address; + + WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); + + return address; +} + +static void dma_ops_free_addresses(struct dma_ops_domain *dom, + unsigned long address, + unsigned int pages) +{ + address >>= PAGE_SHIFT; + iommu_area_free(dom->bitmap, address, pages); +} + +static u16 domain_id_alloc(void) +{ + unsigned long flags; + int id; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); + BUG_ON(id == 0); + if (id > 0 && id < MAX_DOMAIN_ID) + __set_bit(id, amd_iommu_pd_alloc_bitmap); + else + id = 0; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return id; +} + +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + set_bit_string(dom->bitmap, start_page, pages); +} + +static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) +{ + int i, j; + u64 *p1, *p2, *p3; + + p1 = dma_dom->domain.pt_root; + + if (!p1) + return; + + for (i = 0; i < 512; ++i) { + if (!IOMMU_PTE_PRESENT(p1[i])) + continue; + + p2 = IOMMU_PTE_PAGE(p1[i]); + for (j = 0; j < 512; ++i) { + if (!IOMMU_PTE_PRESENT(p2[j])) + continue; + p3 = IOMMU_PTE_PAGE(p2[j]); + free_page((unsigned long)p3); + } + + free_page((unsigned long)p2); + } + + free_page((unsigned long)p1); +} + +static void dma_ops_domain_free(struct dma_ops_domain *dom) +{ + if (!dom) + return; + + dma_ops_free_pagetable(dom); + + kfree(dom->pte_pages); + + kfree(dom->bitmap); + + kfree(dom); +} + +static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, + unsigned order) +{ + struct dma_ops_domain *dma_dom; + unsigned i, num_pte_pages; + u64 *l2_pde; + u64 address; + + /* + * Currently the DMA aperture must be between 32 MB and 1GB in size + */ + if ((order < 25) || (order > 30)) + return NULL; + + dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); + if (!dma_dom) + return NULL; + + spin_lock_init(&dma_dom->domain.lock); + + dma_dom->domain.id = domain_id_alloc(); + if (dma_dom->domain.id == 0) + goto free_dma_dom; + dma_dom->domain.mode = PAGE_MODE_3_LEVEL; + dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.priv = dma_dom; + if (!dma_dom->domain.pt_root) + goto free_dma_dom; + dma_dom->aperture_size = (1ULL << order); + dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8), + GFP_KERNEL); + if (!dma_dom->bitmap) + goto free_dma_dom; + /* + * mark the first page as allocated so we never return 0 as + * a valid dma-address. So we can use 0 as error value + */ + dma_dom->bitmap[0] = 1; + dma_dom->next_bit = 0; + + if (iommu->exclusion_start && + iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; + int pages = to_pages(iommu->exclusion_start, + iommu->exclusion_length); + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + + num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); + dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), + GFP_KERNEL); + if (!dma_dom->pte_pages) + goto free_dma_dom; + + l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); + if (l2_pde == NULL) + goto free_dma_dom; + + dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); + + for (i = 0; i < num_pte_pages; ++i) { + dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!dma_dom->pte_pages[i]) + goto free_dma_dom; + address = virt_to_phys(dma_dom->pte_pages[i]); + l2_pde[i] = IOMMU_L1_PDE(address); + } + + return dma_dom; + +free_dma_dom: + dma_ops_domain_free(dma_dom); + + return NULL; +} + +static struct protection_domain *domain_for_device(u16 devid) +{ + struct protection_domain *dom; + unsigned long flags; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = amd_iommu_pd_table[devid]; + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + +static void set_device_domain(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) +{ + unsigned long flags; + + u64 pte_root = virt_to_phys(domain->pt_root); + + pte_root |= (domain->mode & 0x07) << 9; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + amd_iommu_dev_table[devid].data[0] = pte_root; + amd_iommu_dev_table[devid].data[1] = pte_root >> 32; + amd_iommu_dev_table[devid].data[2] = domain->id; + + amd_iommu_pd_table[devid] = domain; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + iommu_queue_inv_dev_entry(iommu, devid); + + iommu->need_sync = 1; +} + +static int get_device_resources(struct device *dev, + struct amd_iommu **iommu, + struct protection_domain **domain, + u16 *bdf) +{ + struct dma_ops_domain *dma_dom; + struct pci_dev *pcidev; + u16 _bdf; + + BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); + + pcidev = to_pci_dev(dev); + _bdf = (pcidev->bus->number << 8) | pcidev->devfn; + + if (_bdf >= amd_iommu_last_bdf) { + *iommu = NULL; + *domain = NULL; + *bdf = 0xffff; + return 0; + } + + *bdf = amd_iommu_alias_table[_bdf]; + + *iommu = amd_iommu_rlookup_table[*bdf]; + if (*iommu == NULL) + return 0; + dma_dom = (*iommu)->default_dom; + *domain = domain_for_device(*bdf); + if (*domain == NULL) { + *domain = &dma_dom->domain; + set_device_domain(*iommu, *domain, *bdf); + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " + "device ", (*domain)->id); + print_devid(_bdf, 1); + } + + return 1; +} + +static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, + struct dma_ops_domain *dom, + unsigned long address, + phys_addr_t paddr, + int direction) +{ + u64 *pte, __pte; + + WARN_ON(address > dom->aperture_size); + + paddr &= PAGE_MASK; + + pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte += IOMMU_PTE_L0_INDEX(address); + + __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (direction == DMA_TO_DEVICE) + __pte |= IOMMU_PTE_IR; + else if (direction == DMA_FROM_DEVICE) + __pte |= IOMMU_PTE_IW; + else if (direction == DMA_BIDIRECTIONAL) + __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; + + WARN_ON(*pte); + + *pte = __pte; + + return (dma_addr_t)address; +} + +static void dma_ops_domain_unmap(struct amd_iommu *iommu, + struct dma_ops_domain *dom, + unsigned long address) +{ + u64 *pte; + + if (address >= dom->aperture_size) + return; + + WARN_ON(address & 0xfffULL || address > dom->aperture_size); + + pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte += IOMMU_PTE_L0_INDEX(address); + + WARN_ON(!*pte); + + *pte = 0ULL; +} + +static dma_addr_t __map_single(struct device *dev, + struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, + phys_addr_t paddr, + size_t size, + int dir) +{ + dma_addr_t offset = paddr & ~PAGE_MASK; + dma_addr_t address, start; + unsigned int pages; + int i; + + pages = to_pages(paddr, size); + paddr &= PAGE_MASK; + + address = dma_ops_alloc_addresses(dev, dma_dom, pages); + if (unlikely(address == bad_dma_address)) + goto out; + + start = address; + for (i = 0; i < pages; ++i) { + dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + paddr += PAGE_SIZE; + start += PAGE_SIZE; + } + address += offset; + +out: + return address; +} + +static void __unmap_single(struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, + dma_addr_t dma_addr, + size_t size, + int dir) +{ + dma_addr_t i, start; + unsigned int pages; + + if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + return; + + pages = to_pages(dma_addr, size); + dma_addr &= PAGE_MASK; + start = dma_addr; + + for (i = 0; i < pages; ++i) { + dma_ops_domain_unmap(iommu, dma_dom, start); + start += PAGE_SIZE; + } + + dma_ops_free_addresses(dma_dom, dma_addr, pages); +} + +static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + dma_addr_t addr; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (iommu == NULL || domain == NULL) + return (dma_addr_t)paddr; + + spin_lock_irqsave(&domain->lock, flags); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); + if (addr == bad_dma_address) + goto out; + + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return addr; +} + +static void unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(iommu, domain->priv, dma_addr, size, dir); + + iommu_flush_pages(iommu, domain->id, dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sglist, s, nelems, i) { + s->dma_address = (dma_addr_t)sg_phys(s); + s->dma_length = s->length; + } + + return nelems; +} + +static int map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + int i; + struct scatterlist *s; + phys_addr_t paddr; + int mapped_elems = 0; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) + return map_sg_no_iommu(dev, sglist, nelems, dir); + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + paddr = sg_phys(s); + + s->dma_address = __map_single(dev, iommu, domain->priv, + paddr, s->length, dir); + + if (s->dma_address) { + s->dma_length = s->length; + mapped_elems++; + } else + goto unmap; + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, s->dma_address, + s->dma_length); + } + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return mapped_elems; +unmap: + for_each_sg(sglist, s, mapped_elems, i) { + if (s->dma_address) + __unmap_single(iommu, domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + mapped_elems = 0; + + goto out; +} + +static void unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + struct scatterlist *s; + u16 devid; + int i; + + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + __unmap_single(iommu, domain->priv, s->dma_address, + s->dma_length, dir); + iommu_flush_pages(iommu, domain->id, s->dma_address, + s->dma_length); + s->dma_address = s->dma_length = 0; + } + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void *alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long flags; + void *virt_addr; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + phys_addr_t paddr; + + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + if (!virt_addr) + return 0; + + memset(virt_addr, 0, size); + paddr = virt_to_phys(virt_addr); + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) { + *dma_addr = (dma_addr_t)paddr; + return virt_addr; + } + + spin_lock_irqsave(&domain->lock, flags); + + *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + size, DMA_BIDIRECTIONAL); + + if (*dma_addr == bad_dma_address) { + free_pages((unsigned long)virt_addr, get_order(size)); + virt_addr = NULL; + goto out; + } + + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, *dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return virt_addr; +} + +static void free_coherent(struct device *dev, size_t size, + void *virt_addr, dma_addr_t dma_addr) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) + goto free_mem; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + iommu_flush_pages(iommu, domain->id, dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); + +free_mem: + free_pages((unsigned long)virt_addr, get_order(size)); +} + +/* + * If the driver core informs the DMA layer if a driver grabs a device + * we don't need to preallocate the protection domains anymore. + * For now we have to. + */ +void prealloc_protection_domains(void) +{ + struct pci_dev *dev = NULL; + struct dma_ops_domain *dma_dom; + struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + u16 devid; + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + devid = (dev->bus->number << 8) | dev->devfn; + if (devid >= amd_iommu_last_bdf) + continue; + devid = amd_iommu_alias_table[devid]; + if (domain_for_device(devid)) + continue; + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + continue; + dma_dom = dma_ops_domain_alloc(iommu, order); + if (!dma_dom) + continue; + init_unity_mappings_for_device(dma_dom, devid); + set_device_domain(iommu, &dma_dom->domain, devid); + printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", + dma_dom->domain.id); + print_devid(devid, 1); + } +} + +static struct dma_mapping_ops amd_iommu_dma_ops = { + .alloc_coherent = alloc_coherent, + .free_coherent = free_coherent, + .map_single = map_single, + .unmap_single = unmap_single, + .map_sg = map_sg, + .unmap_sg = unmap_sg, +}; + +int __init amd_iommu_init_dma_ops(void) +{ + struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + int ret; + + list_for_each_entry(iommu, &amd_iommu_list, list) { + iommu->default_dom = dma_ops_domain_alloc(iommu, order); + if (iommu->default_dom == NULL) + return -ENOMEM; + ret = iommu_init_unity_mappings(iommu); + if (ret) + goto free_domains; + } + + if (amd_iommu_isolate) + prealloc_protection_domains(); + + iommu_detected = 1; + force_iommu = 1; + bad_dma_address = 0; +#ifdef CONFIG_GART_IOMMU + gart_iommu_aperture_disabled = 1; + gart_iommu_aperture = 0; +#endif + + dma_ops = &amd_iommu_dma_ops; + + return 0; + +free_domains: + + list_for_each_entry(iommu, &amd_iommu_list, list) { + if (iommu->default_dom) + dma_ops_domain_free(iommu->default_dom); + } + + return ret; +} diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c new file mode 100644 index 00000000000..2a13e430437 --- /dev/null +++ b/arch/x86/kernel/amd_iommu_init.c @@ -0,0 +1,875 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * Leo Duran <leo.duran@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/pci.h> +#include <linux/acpi.h> +#include <linux/gfp.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <asm/pci-direct.h> +#include <asm/amd_iommu_types.h> +#include <asm/amd_iommu.h> +#include <asm/gart.h> + +/* + * definitions for the ACPI scanning code + */ +#define UPDATE_LAST_BDF(x) do {\ + if ((x) > amd_iommu_last_bdf) \ + amd_iommu_last_bdf = (x); \ + } while (0); + +#define DEVID(bus, devfn) (((bus) << 8) | (devfn)) +#define PCI_BUS(x) (((x) >> 8) & 0xff) +#define IVRS_HEADER_LENGTH 48 +#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) + +#define ACPI_IVHD_TYPE 0x10 +#define ACPI_IVMD_TYPE_ALL 0x20 +#define ACPI_IVMD_TYPE 0x21 +#define ACPI_IVMD_TYPE_RANGE 0x22 + +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_ALIAS_RANGE 0x43 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_EXT_SELECT_RANGE 0x47 + +#define IVHD_FLAG_HT_TUN_EN 0x00 +#define IVHD_FLAG_PASSPW_EN 0x01 +#define IVHD_FLAG_RESPASSPW_EN 0x02 +#define IVHD_FLAG_ISOC_EN 0x03 + +#define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_UNITY_MAP 0x01 + +#define ACPI_DEVFLAG_INITPASS 0x01 +#define ACPI_DEVFLAG_EXTINT 0x02 +#define ACPI_DEVFLAG_NMI 0x04 +#define ACPI_DEVFLAG_SYSMGT1 0x10 +#define ACPI_DEVFLAG_SYSMGT2 0x20 +#define ACPI_DEVFLAG_LINT0 0x40 +#define ACPI_DEVFLAG_LINT1 0x80 +#define ACPI_DEVFLAG_ATSDIS 0x10000000 + +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 reserved; +} __attribute__((packed)); + +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; +} __attribute__((packed)); + +struct ivmd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 aux; + u64 resv; + u64 range_start; + u64 range_length; +} __attribute__((packed)); + +static int __initdata amd_iommu_detected; + +u16 amd_iommu_last_bdf; +struct list_head amd_iommu_unity_map; +unsigned amd_iommu_aperture_order = 26; +int amd_iommu_isolate; + +struct list_head amd_iommu_list; +struct dev_table_entry *amd_iommu_dev_table; +u16 *amd_iommu_alias_table; +struct amd_iommu **amd_iommu_rlookup_table; +struct protection_domain **amd_iommu_pd_table; +unsigned long *amd_iommu_pd_alloc_bitmap; + +static u32 dev_table_size; +static u32 alias_table_size; +static u32 rlookup_table_size; + +static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) +{ + u64 start = iommu->exclusion_start & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 entry; + + if (!iommu->exclusion_start) + return; + + entry = start | MMIO_EXCL_ENABLE_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + entry = limit; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_set_device_table(struct amd_iommu *iommu) +{ + u32 entry; + + BUG_ON(iommu->mmio_base == NULL); + + entry = virt_to_phys(amd_iommu_dev_table); + entry |= (dev_table_size >> 12) - 1; + memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +void __init iommu_enable(struct amd_iommu *iommu) +{ + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); + print_devid(iommu->devid, 0); + printk(" cap 0x%hx\n", iommu->cap_ptr); + + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); +} + +static u8 * __init iommu_map_mmio_space(u64 address) +{ + u8 *ret; + + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) + return NULL; + + ret = ioremap_nocache(address, MMIO_REGION_LENGTH); + if (ret != NULL) + return ret; + + release_mem_region(address, MMIO_REGION_LENGTH); + + return NULL; +} + +static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) +{ + if (iommu->mmio_base) + iounmap(iommu->mmio_base); + release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); +} + +static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) +{ + u32 cap; + + cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + + return 0; +} + +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + p += sizeof(*h); + end += h->length; + + find_last_devid_on_pci(PCI_BUS(h->devid), + PCI_SLOT(h->devid), + PCI_FUNC(h->devid), + h->cap_ptr); + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + UPDATE_LAST_BDF(dev->devid); + break; + default: + break; + } + p += 0x04 << (*p >> 6); + } + + WARN_ON(p != end); + + return 0; +} + +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) + /* ACPI table corrupt */ + return -ENODEV; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + switch (h->type) { + case ACPI_IVHD_TYPE: + find_last_devid_from_ivhd(h); + break; + default: + break; + } + p += h->length; + } + WARN_ON(p != end); + + return 0; +} + +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +{ + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, + get_order(CMD_BUFFER_SIZE)); + u64 entry = 0; + + if (cmd_buf == NULL) + return NULL; + + iommu->cmd_buf_size = CMD_BUFFER_SIZE; + + memset(cmd_buf, 0, CMD_BUFFER_SIZE); + + entry = (u64)virt_to_phys(cmd_buf); + entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); + + return cmd_buf; +} + +static void __init free_command_buffer(struct amd_iommu *iommu) +{ + if (iommu->cmd_buf) + free_pages((unsigned long)iommu->cmd_buf, + get_order(CMD_BUFFER_SIZE)); +} + +static void set_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + amd_iommu_dev_table[devid].data[i] |= (1 << _bit); +} + +static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) +{ + if (flags & ACPI_DEVFLAG_INITPASS) + set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); +} + +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) + return; + + if (iommu) { + set_dev_entry_bit(m->devid, DEV_ENTRY_EX); + iommu->exclusion_start = m->range_start; + iommu->exclusion_length = m->range_length; + } +} + +static void __init init_iommu_from_pci(struct amd_iommu *iommu) +{ + int bus = PCI_BUS(iommu->devid); + int dev = PCI_SLOT(iommu->devid); + int fn = PCI_FUNC(iommu->devid); + int cap_ptr = iommu->cap_ptr; + u32 range; + + iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + + range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); + iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); +} + +static void __init init_iommu_from_acpi(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p, flags = 0; + u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; + u32 ext_flags = 0; + bool alias = 0; + struct ivhd_entry *e; + + /* + * First set the recommended feature enable bits from ACPI + * into the IOMMU control registers + */ + h->flags & IVHD_FLAG_HT_TUN_EN ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + h->flags & IVHD_FLAG_PASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + h->flags & IVHD_FLAG_RESPASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + h->flags & IVHD_FLAG_ISOC_EN ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + + /* + * Done. Now parse the device entries + */ + p += sizeof(struct ivhd_header); + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = iommu->first_device; + dev_i <= iommu->last_device; ++dev_i) + set_dev_entry_from_acpi(dev_i, e->flags, 0); + break; + case IVHD_DEV_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, 0); + break; + case IVHD_DEV_SELECT_RANGE_START: + devid_start = e->devid; + flags = e->flags; + ext_flags = 0; + alias = 0; + break; + case IVHD_DEV_ALIAS: + devid = e->devid; + devid_to = e->ext >> 8; + set_dev_entry_from_acpi(devid, e->flags, 0); + amd_iommu_alias_table[devid] = devid_to; + break; + case IVHD_DEV_ALIAS_RANGE: + devid_start = e->devid; + flags = e->flags; + devid_to = e->ext >> 8; + ext_flags = 0; + alias = 1; + break; + case IVHD_DEV_EXT_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, e->ext); + break; + case IVHD_DEV_EXT_SELECT_RANGE: + devid_start = e->devid; + flags = e->flags; + ext_flags = e->ext; + alias = 0; + break; + case IVHD_DEV_RANGE_END: + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) { + if (alias) + amd_iommu_alias_table[dev_i] = devid_to; + set_dev_entry_from_acpi( + amd_iommu_alias_table[dev_i], + flags, ext_flags); + } + break; + default: + break; + } + + p += 0x04 << (e->type >> 6); + } +} + +static int __init init_iommu_devices(struct amd_iommu *iommu) +{ + u16 i; + + for (i = iommu->first_device; i <= iommu->last_device; ++i) + set_iommu_for_device(iommu, i); + + return 0; +} + +static void __init free_iommu_one(struct amd_iommu *iommu) +{ + free_command_buffer(iommu); + iommu_unmap_mmio_space(iommu); +} + +static void __init free_iommu_all(void) +{ + struct amd_iommu *iommu, *next; + + list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { + list_del(&iommu->list); + free_iommu_one(iommu); + kfree(iommu); + } +} + +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +{ + spin_lock_init(&iommu->lock); + list_add_tail(&iommu->list, &amd_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->cap_ptr = h->cap_ptr; + iommu->mmio_phys = h->mmio_phys; + iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); + if (!iommu->mmio_base) + return -ENOMEM; + + iommu_set_device_table(iommu); + iommu->cmd_buf = alloc_command_buffer(iommu); + if (!iommu->cmd_buf) + return -ENOMEM; + + init_iommu_from_pci(iommu); + init_iommu_from_acpi(iommu, h); + init_iommu_devices(iommu); + + return 0; +} + +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct amd_iommu *iommu; + int ret; + + INIT_LIST_HEAD(&amd_iommu_list); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + switch (*p) { + case ACPI_IVHD_TYPE: + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + ret = init_iommu_one(iommu, h); + if (ret) + return ret; + break; + default: + break; + } + p += h->length; + + } + WARN_ON(p != end); + + return 0; +} + +static void __init free_unity_maps(void) +{ + struct unity_map_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { + list_del(&entry->list); + kfree(entry); + } +} + +static int __init init_exclusion_range(struct ivmd_header *m) +{ + int i; + + switch (m->type) { + case ACPI_IVMD_TYPE: + set_device_exclusion_range(m->devid, m); + break; + case ACPI_IVMD_TYPE_ALL: + for (i = 0; i < amd_iommu_last_bdf; ++i) + set_device_exclusion_range(i, m); + break; + case ACPI_IVMD_TYPE_RANGE: + for (i = m->devid; i <= m->aux; ++i) + set_device_exclusion_range(i, m); + break; + default: + break; + } + + return 0; +} + +static int __init init_unity_map_range(struct ivmd_header *m) +{ + struct unity_map_entry *e = 0; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + switch (m->type) { + default: + case ACPI_IVMD_TYPE: + e->devid_start = e->devid_end = m->devid; + break; + case ACPI_IVMD_TYPE_ALL: + e->devid_start = 0; + e->devid_end = amd_iommu_last_bdf; + break; + case ACPI_IVMD_TYPE_RANGE: + e->devid_start = m->devid; + e->devid_end = m->aux; + break; + } + e->address_start = PAGE_ALIGN(m->range_start); + e->address_end = e->address_start + PAGE_ALIGN(m->range_length); + e->prot = m->flags >> 1; + + list_add_tail(&e->list, &amd_iommu_unity_map); + + return 0; +} + +static int __init init_memory_definitions(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivmd_header *m; + + INIT_LIST_HEAD(&amd_iommu_unity_map); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + m = (struct ivmd_header *)p; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + else if (m->flags & IVMD_FLAG_UNITY_MAP) + init_unity_map_range(m); + + p += m->length; + } + + return 0; +} + +static void __init enable_iommus(void) +{ + struct amd_iommu *iommu; + + list_for_each_entry(iommu, &amd_iommu_list, list) { + iommu_set_exclusion_range(iommu); + iommu_enable(iommu); + } +} + +/* + * Suspend/Resume support + * disable suspend until real resume implemented + */ + +static int amd_iommu_resume(struct sys_device *dev) +{ + return 0; +} + +static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class amd_iommu_sysdev_class = { + .name = "amd_iommu", + .suspend = amd_iommu_suspend, + .resume = amd_iommu_resume, +}; + +static struct sys_device device_amd_iommu = { + .id = 0, + .cls = &amd_iommu_sysdev_class, +}; + +int __init amd_iommu_init(void) +{ + int i, ret = 0; + + + if (no_iommu) { + printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); + return 0; + } + + if (!amd_iommu_detected) + return -ENODEV; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) + return -ENODEV; + + dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); + alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); + + ret = -ENOMEM; + + /* Device table - directly used by all IOMMUs */ + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(dev_table_size)); + if (amd_iommu_dev_table == NULL) + goto out; + + /* + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the + * IOMMU see for that device + */ + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (amd_iommu_alias_table == NULL) + goto free; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_rlookup_table == NULL) + goto free; + + /* + * Protection Domain table - maps devices to protection domains + * This table has the same size as the rlookup_table + */ + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_pd_table == NULL) + goto free; + + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, + get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_pd_alloc_bitmap == NULL) + goto free; + + /* + * memory is allocated now; initialize the device table with all zeroes + * and let all alias entries point to itself + */ + memset(amd_iommu_dev_table, 0, dev_table_size); + for (i = 0; i < amd_iommu_last_bdf; ++i) + amd_iommu_alias_table[i] = i; + + memset(amd_iommu_pd_table, 0, rlookup_table_size); + memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); + + /* + * never allocate domain 0 because its used as the non-allocated and + * error value placeholder + */ + amd_iommu_pd_alloc_bitmap[0] = 1; + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = -ENODEV; + if (acpi_table_parse("IVRS", init_iommu_all) != 0) + goto free; + + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) + goto free; + + ret = amd_iommu_init_dma_ops(); + if (ret) + goto free; + + ret = sysdev_class_register(&amd_iommu_sysdev_class); + if (ret) + goto free; + + ret = sysdev_register(&device_amd_iommu); + if (ret) + goto free; + + enable_iommus(); + + printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", + (1 << (amd_iommu_aperture_order-20))); + + printk(KERN_INFO "AMD IOMMU: device isolation "); + if (amd_iommu_isolate) + printk("enabled\n"); + else + printk("disabled\n"); + +out: + return ret; + +free: + if (amd_iommu_pd_alloc_bitmap) + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + + if (amd_iommu_pd_table) + free_pages((unsigned long)amd_iommu_pd_table, + get_order(rlookup_table_size)); + + if (amd_iommu_rlookup_table) + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); + + if (amd_iommu_alias_table) + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); + + if (amd_iommu_dev_table) + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + + free_iommu_all(); + + free_unity_maps(); + + goto out; +} + +static int __init early_amd_iommu_detect(struct acpi_table_header *table) +{ + return 0; +} + +void __init amd_iommu_detect(void) +{ + if (swiotlb || no_iommu || iommu_detected) + return; + + if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { + iommu_detected = 1; + amd_iommu_detected = 1; +#ifdef CONFIG_GART_IOMMU + gart_iommu_aperture_disabled = 1; + gart_iommu_aperture = 0; +#endif + } +} + +static int __init parse_amd_iommu_options(char *str) +{ + for (; *str; ++str) { + if (strcmp(str, "isolate") == 0) + amd_iommu_isolate = 1; + } + + return 1; +} + +static int __init parse_amd_iommu_size_options(char *str) +{ + for (; *str; ++str) { + if (strcmp(str, "32M") == 0) + amd_iommu_aperture_order = 25; + if (strcmp(str, "64M") == 0) + amd_iommu_aperture_order = 26; + if (strcmp(str, "128M") == 0) + amd_iommu_aperture_order = 27; + if (strcmp(str, "256M") == 0) + amd_iommu_aperture_order = 28; + if (strcmp(str, "512M") == 0) + amd_iommu_aperture_order = 29; + if (strcmp(str, "1G") == 0) + amd_iommu_aperture_order = 30; + } + + return 1; +} + +__setup("amd_iommu=", parse_amd_iommu_options); +__setup("amd_iommu_size=", parse_amd_iommu_size_options); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 479926d9e00..600470d464f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -35,6 +35,18 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; +struct bus_dev_range { + int bus; + int dev_base; + int dev_limit; +}; + +static struct bus_dev_range bus_dev_ranges[] __initdata = { + { 0x00, 0x18, 0x20}, + { 0xff, 0x00, 0x20}, + { 0xfe, 0x00, 0x20} +}; + static struct resource gart_resource = { .name = "GART", .flags = IORESOURCE_MEM, @@ -55,8 +67,9 @@ static u32 __init allocate_aperture(void) u32 aper_size; void *p; - if (fallback_aper_order > 7) - fallback_aper_order = 7; + /* aper_size should <= 1G */ + if (fallback_aper_order > 5) + fallback_aper_order = 5; aper_size = (32 * 1024 * 1024) << fallback_aper_order; /* @@ -65,7 +78,20 @@ static u32 __init allocate_aperture(void) * memory. Unfortunately we cannot move it up because that would * make the IOMMU useless. */ - p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); + /* + * using 512M as goal, in case kexec will load kernel_big + * that will do the on position decompress, and could overlap with + * that positon with gart that is used. + * sequende: + * kernel_small + * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) + * ==> kernel_small(gart area become e820_reserved) + * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) + * ==> kerne_big (uncompressed size will be big than 64M or 128M) + * so don't use 512M below as gart iommu, leave the space for kernel + * code for safe + */ + p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); if (!p || __pa(p)+aper_size > 0xffffffff) { printk(KERN_ERR "Cannot allocate aperture memory hole (%p,%uK)\n", @@ -83,69 +109,53 @@ static u32 __init allocate_aperture(void) return (u32)__pa(p); } -static int __init aperture_valid(u64 aper_base, u32 aper_size) -{ - if (!aper_base) - return 0; - - if (aper_base + aper_size > 0x100000000UL) { - printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); - return 0; - } - if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } - if (aper_size < 64*1024*1024) { - printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); - return 0; - } - - return 1; -} /* Find a PCI capability */ -static __u32 __init find_cap(int num, int slot, int func, int cap) +static u32 __init find_cap(int bus, int slot, int func, int cap) { int bytes; u8 pos; - if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & PCI_STATUS_CAP_LIST)) return 0; - pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { u8 id; pos &= ~3; - id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); + id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); if (id == 0xff) break; if (id == cap) return pos; - pos = read_pci_config_byte(num, slot, func, + pos = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_NEXT); } return 0; } /* Read a standard AGPv3 bridge header */ -static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) +static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) { u32 apsize; u32 apsizereg; int nbits; u32 aper_low, aper_hi; u64 aper; + u32 old_order; - printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); - apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); + printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); + apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); return 0; } + /* old_order could be the value from NB gart setting */ + old_order = *order; + apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ if (apsize & 0xff) @@ -155,14 +165,26 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) if ((int)*order < 0) /* < 32MB */ *order = 0; - aper_low = read_pci_config(num, slot, func, 0x10); - aper_hi = read_pci_config(num, slot, func, 0x14); + aper_low = read_pci_config(bus, slot, func, 0x10); + aper_hi = read_pci_config(bus, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); + /* + * On some sick chips, APSIZE is 0. It means it wants 4G + * so let double check that order, and lets trust AMD NB settings: + */ + printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", + aper, 32 << old_order); + if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { + printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", + 32 << *order, apsizereg); + *order = old_order; + } + printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); - if (!aperture_valid(aper, (32*1024*1024) << *order)) + if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) return 0; return (u32)aper; } @@ -180,17 +202,17 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) * the AGP bridges should be always an own bus on the HT hierarchy, * but do it here for future safety. */ -static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) +static u32 __init search_agp_bridge(u32 *order, int *valid_agp) { - int num, slot, func; + int bus, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 256; num++) { + for (bus = 0; bus < 256; bus++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; u8 type; - class = read_pci_config(num, slot, func, + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) break; @@ -199,17 +221,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(num, slot, func, + cap = find_cap(bus, slot, func, PCI_CAP_ID_AGP); if (!cap) break; *valid_agp = 1; - return read_agp(num, slot, func, cap, + return read_agp(bus, slot, func, cap, order); } /* No multi-function device? */ - type = read_pci_config_byte(num, slot, func, + type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; @@ -249,36 +271,50 @@ void __init early_gart_iommu_check(void) * or BIOS forget to put that in reserved. * try to update e820 to make that region as reserved. */ - int fix, num; + int i, fix, slot; u32 ctl; u32 aper_size = 0, aper_order = 0, last_aper_order = 0; u64 aper_base = 0, last_aper_base = 0; - int aper_enabled = 0, last_aper_enabled = 0; + int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0; if (!early_pci_allowed()) return; + /* This is mostly duplicate of iommu_hole_init */ fix = 0; - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - ctl = read_pci_config(0, num, 3, 0x90); - aper_enabled = ctl & 1; - aper_order = (ctl >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; - - if ((last_aper_order && aper_order != last_aper_order) || - (last_aper_base && aper_base != last_aper_base) || - (last_aper_enabled && aper_enabled != last_aper_enabled)) { - fix = 1; - break; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); + aper_enabled = ctl & AMD64_GARTEN; + aper_order = (ctl >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; + aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; + aper_base <<= 25; + + if (last_valid) { + if ((aper_order != last_aper_order) || + (aper_base != last_aper_base) || + (aper_enabled != last_aper_enabled)) { + fix = 1; + break; + } + } + + last_aper_order = aper_order; + last_aper_base = aper_base; + last_aper_enabled = aper_enabled; + last_valid = 1; } - last_aper_order = aper_order; - last_aper_base = aper_base; - last_aper_enabled = aper_enabled; } if (!fix && !aper_enabled) @@ -290,32 +326,46 @@ void __init early_gart_iommu_check(void) if (gart_fix_e820 && !fix && aper_enabled) { if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - /* reserved it, so we can resuse it in second kernel */ + /* reserve it, so we can reuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); - add_memory_region(aper_base, aper_size, E820_RESERVED); + e820_add_region(aper_base, aper_size, E820_RESERVED); update_e820(); } - return; } + if (!fix) + return; + /* different nodes have different setting, disable them all at first*/ - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; - ctl = read_pci_config(0, num, 3, 0x90); - ctl &= ~1; - write_pci_config(0, num, 3, 0x90, ctl); + ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); + ctl &= ~AMD64_GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + } } } +static int __initdata printed_gart_size_msg; + void __init gart_iommu_hole_init(void) { + u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; - int fix, num, valid_agp = 0; - int node; + int fix, slot, valid_agp = 0; + int i, node; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) @@ -323,38 +373,63 @@ void __init gart_iommu_hole_init(void) printk(KERN_INFO "Checking aperture...\n"); + if (!fallback_aper_force) + agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); + fix = 0; node = 0; - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - iommu_detected = 1; - gart_iommu_aperture = 1; - - aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; - - printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", - node, aper_base, aper_size >> 20); - node++; - - if (!aperture_valid(aper_base, aper_size)) { - fix = 1; - break; - } + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + iommu_detected = 1; + gart_iommu_aperture = 1; + + aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; + aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; + aper_base <<= 25; + + printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", + node, aper_base, aper_size >> 20); + node++; + + if (!aperture_valid(aper_base, aper_size, 64<<20)) { + if (valid_agp && agp_aper_base && + agp_aper_base == aper_base && + agp_aper_order == aper_order) { + /* the same between two setting from NB and agp */ + if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { + printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); + printk(KERN_ERR "please increase GART size in your BIOS setup\n"); + printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); + printed_gart_size_msg = 1; + } + } else { + fix = 1; + goto out; + } + } - if ((last_aper_order && aper_order != last_aper_order) || - (last_aper_base && aper_base != last_aper_base)) { - fix = 1; - break; + if ((last_aper_order && aper_order != last_aper_order) || + (last_aper_base && aper_base != last_aper_base)) { + fix = 1; + goto out; + } + last_aper_order = aper_order; + last_aper_base = aper_base; } - last_aper_order = aper_order; - last_aper_base = aper_base; } +out: if (!fix && !fallback_aper_force) { if (last_aper_base) { unsigned long n = (32 * 1024 * 1024) << last_aper_order; @@ -364,8 +439,10 @@ void __init gart_iommu_hole_init(void) return; } - if (!fallback_aper_force) - aper_alloc = search_agp_bridge(&aper_order, &valid_agp); + if (!fallback_aper_force) { + aper_alloc = agp_aper_base; + aper_order = agp_aper_order; + } if (aper_alloc) { /* Got the aperture from the AGP bridge */ @@ -401,16 +478,24 @@ void __init gart_iommu_hole_init(void) } /* Fix up the north bridges */ - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - /* - * Don't enable translation yet. That is done later. - * Assume this BIOS didn't initialise the GART so - * just overwrite all previous bits - */ - write_pci_config(0, num, 3, 0x90, aper_order<<1); - write_pci_config(0, num, 3, 0x94, aper_alloc>>25); + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + /* Don't enable translation yet. That is done later. + Assume this BIOS didn't initialise the GART so + just overwrite all previous bits */ + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); + write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); + } } + + set_up_gart_resume(aper_order, aper_alloc); } diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3f13a1aa07c..dd8de26b278 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -52,9 +52,6 @@ unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - /* * Knob to control our willingness to enable the local APIC. * @@ -64,9 +61,8 @@ static int enable_local_apic __initdata; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk - or using CPU MSR check */ -int local_apic_timer_disabled; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int local_apic_timer_disabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -80,6 +76,11 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; */ int apic_verbosity; +int pic_mode; + +/* Have we found an MP table */ +int smp_found_config; + static unsigned int calibration_result; static int lapic_next_event(unsigned long delta, @@ -1158,9 +1159,6 @@ static int __init detect_init_APIC(void) if (l & MSR_IA32_APICBASE_ENABLE) mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) - nmi_watchdog = NMI_LOCAL_APIC; - printk(KERN_INFO "Found and enabled local APIC!\n"); apic_pm_activate(); @@ -1206,7 +1204,7 @@ void __init init_apic_mappings(void) for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; if (!ioapic_phys) { printk(KERN_ERR "WARNING: bogus zero IO-APIC " @@ -1273,6 +1271,10 @@ int __init APIC_init_uniprocessor(void) setup_local_APIC(); +#ifdef CONFIG_X86_IO_APIC + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif + localise_nmi_watchdog(); end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) @@ -1517,6 +1519,9 @@ void __cpuinit generic_processor_info(int apicid, int version) */ cpu = 0; + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1524,7 +1529,7 @@ void __cpuinit generic_processor_info(int apicid, int version) * if (CPU_HOTPLUG_ENABLED || num_processors > 8) * - Ashok Raj <ashok.raj@intel.com> */ - if (num_processors > 8) { + if (max_physical_apicid >= 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { @@ -1538,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version) } #ifdef CONFIG_SMP /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e22f8d2d50..d9d663ffa64 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -43,7 +43,7 @@ #include <mach_ipi.h> #include <mach_apic.h> -int disable_apic_timer __cpuinitdata; +static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); */ int apic_verbosity; +/* Have we found an MP table */ +int smp_found_config; + static struct resource lapic_resource = { .name = "Local APIC", .flags = IORESOURCE_MEM | IORESOURCE_BUSY, @@ -87,9 +90,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version @@ -422,32 +422,8 @@ void __init setup_boot_APIC_clock(void) setup_APIC_timer(); } -/* - * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the - * C1E flag only in the secondary CPU, so when we detect the wreckage - * we already have enabled the boot CPU local apic timer. Check, if - * disable_apic_timer is set and the DUMMY flag is cleared. If yes, - * set the DUMMY flag again and force the broadcast mode in the - * clockevents layer. - */ -static void __cpuinit check_boot_apic_timer_broadcast(void) -{ - if (!disable_apic_timer || - (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) - return; - - printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); - lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; - - local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &boot_cpu_physical_apicid); - local_irq_disable(); -} - void __cpuinit setup_secondary_APIC_clock(void) { - check_boot_apic_timer_broadcast(); setup_APIC_timer(); } @@ -875,7 +851,7 @@ static int __init detect_init_APIC(void) void __init early_init_lapic_mapping(void) { - unsigned long apic_phys; + unsigned long phys_addr; /* * If no local APIC can be found then go out @@ -884,11 +860,11 @@ void __init early_init_lapic_mapping(void) if (!smp_found_config) return; - apic_phys = mp_lapic_addr; + phys_addr = mp_lapic_addr; - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); + APIC_BASE, phys_addr); /* * Fetch the APIC ID of the BSP in case we have a @@ -954,6 +930,8 @@ int __init APIC_init_uniprocessor(void) if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) + localise_nmi_watchdog(); end_local_APIC_setup(); if (smp_found_config && !skip_ioapic_setup && nr_ioapics) @@ -1090,10 +1068,13 @@ void __cpuinit generic_processor_info(int apicid, int version) */ cpu = 0; } + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; @@ -1269,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void) if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) return 0; - bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e2901..00e6d137095 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,6 +228,7 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/jiffies.h> +#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) as->event_tail = 0; } as->events[as->event_head] = event; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) continue; switch (event) { case APM_SYS_SUSPEND: @@ -1396,7 +1397,7 @@ static void apm_mainloop(void) static int check_apm_user(struct apm_user *as, const char *func) { - if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { + if (as == NULL || as->magic != APM_BIOS_MAGIC) { printk(KERN_ERR "apm: %s passed bad filp\n", func); return 1; } @@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait) return 0; } -static int do_ioctl(struct inode *inode, struct file *filp, - u_int cmd, u_long arg) +static long do_ioctl(struct file *filp, u_int cmd, u_long arg) { struct apm_user *as; + int ret; as = filp->private_data; if (check_apm_user(as, "ioctl")) return -EIO; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: + lock_kernel(); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp, queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); + unlock_kernel(); break; case APM_IOC_SUSPEND: + lock_kernel(); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp, } else queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { - return suspend(1); + ret = suspend(1); } else { as->suspend_wait = 1; wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); - return as->suspend_result; + ret = as->suspend_result; } - break; + unlock_kernel(); + return ret; default: - return -EINVAL; + return -ENOTTY; } return 0; } @@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = { .owner = THIS_MODULE, .read = do_read, .poll = do_poll, - .ioctl = do_ioctl, + .unlocked_ioctl = do_ioctl, .open = do_open, .release = do_release, }; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0c6f819088..65b1be5fe9c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_64) += amd_64.o obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_64) += centaur_64.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_64) += intel_64.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c2e1ce33c7c..84a8220a607 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -1,9 +1,7 @@ - /* * Routines to indentify additional cpu features that are scattered in * cpuid space. */ - #include <linux/cpu.h> #include <asm/pat.h> @@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { + if (!cpu_has_pat) + pat_disable("PAT not supported by CPU."); + switch (c->x86_vendor) { - case X86_VENDOR_AMD: - if (c->x86 >= 0xf && c->x86 <= 0x11) - return; - break; case X86_VENDOR_INTEL: if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) return; break; + case X86_VENDOR_AMD: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_TRANSMETA: + return; } - pat_disable(cpu_has_pat ? - "PAT disabled. Not yet verified on this CPU type." : - "PAT not supported by CPU."); + pat_disable("PAT disabled. Not yet verified on this CPU type."); } #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 24586682829..81a07ca65d4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,43 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -#ifdef CONFIG_X86_LOCAL_APIC -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi; - u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) { - if (smp_processor_id() != boot_cpu_physical_apicid) - printk(KERN_INFO "AMD C1E detected late. " - " Force timer broadcast.\n"); - return 1; - } - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} -#endif - int force_mwait __cpuinitdata; static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) @@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } -#ifdef CONFIG_X86_LOCAL_APIC - if (amd_apic_timer_broken()) - local_apic_timer_disabled = 1; -#endif - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c new file mode 100644 index 00000000000..30b7557c964 --- /dev/null +++ b/arch/x86/kernel/cpu/amd_64.c @@ -0,0 +1,211 @@ +#include <linux/init.h> +#include <linux/mm.h> + +#include <asm/numa_64.h> +#include <asm/mmconfig.h> +#include <asm/cacheflush.h> + +#include <mach_apic.h> + +#include "cpu.h" + +int force_mwait __cpuinitdata; + +#ifdef CONFIG_NUMA +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits; +#ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node = 0; + unsigned apicid = hard_smp_processor_id(); +#endif + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; + +#ifdef CONFIG_NUMA + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; + +#endif +} + +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + unsigned level; + +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } +#endif + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_cpu_cap(c, 0*32+31); + + /* On C+ stepping K8 rep microcode works well for copy/memset */ + level = cpuid_eax(1); + if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || + level >= 0x0f58)) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + + level = get_model_name(c); + if (!level) { + switch (c->x86) { + case 15: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } + display_cacheinfo(c); + + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + if (c->extended_cpuid_level >= 0x80000006 && + (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + + if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_K8); + + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) + fam10h_check_enable_mmcfg(); + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && + (tseg >> PMD_SHIFT) < + (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) + set_memory_4k((unsigned long)__va(tseg), 1); + } +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_early_init = early_init_amd, + .c_init = init_amd, +}; + +cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); + diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 170d2f5523b..1b1c56bb338 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,8 +59,12 @@ static void __init check_fpu(void) return; } -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug.. + */ __asm__("fninit\n\t" "fldl %1\n\t" "fdivl %2\n\t" @@ -108,10 +112,15 @@ static void __init check_popad(void) "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); + : "ecx", "edi"); + /* + * If this fails, it means that any user program may lock the + * CPU hard. Too bad. + */ + if (res != 12345678) + printk("Buggy.\n"); + else + printk("OK.\n"); #endif } @@ -137,7 +146,8 @@ static void __init check_config(void) * i486+ only features! (WP works in supervisor mode and the * new "invlpg" and "bswap" instructions) */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ + defined(CONFIG_X86_BSWAP) if (boot_cpu_data.x86 == 3) panic("Kernel requires i486+ for 'invlpg' and other features"); #endif @@ -170,6 +180,7 @@ void __init check_bugs(void) check_fpu(); check_hlt(); check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); } diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed0649d4..9a3ed0649d4 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c new file mode 100644 index 00000000000..13526fd5cce --- /dev/null +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -0,0 +1,43 @@ +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/cpufeature.h> +#include <asm/processor.h> + +#include "cpu.h" + +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x6 && c->x86_model >= 0xf) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 0x6 && c->x86_model >= 0xf) { + c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +} + +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { + .c_vendor = "Centaur", + .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, + .c_init = init_centaur, +}; + +cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); + diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 783691b2a73..4d894e8565f 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -1,3 +1,6 @@ +#ifndef ARCH_X86_CPU_H + +#define ARCH_X86_CPU_H struct cpu_model_info { int vendor; @@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); + +#endif diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index f03e9153618..965ea52767a 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -26,9 +26,10 @@ #define NFORCE2_SAFE_DISTANCE 50 /* Delay in ms between FSB changes */ -//#define NFORCE2_DELAY 10 +/* #define NFORCE2_DELAY 10 */ -/* nforce2_chipset: +/* + * nforce2_chipset: * FSB is changed using the chipset */ static struct pci_dev *nforce2_chipset_dev; @@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev; /* fid: * multiplier * 10 */ -static int fid = 0; +static int fid; /* min_fsb, max_fsb: * minimum and maximum FSB (= FSB at boot time) */ -static int min_fsb = 0; -static int max_fsb = 0; +static int min_fsb; +static int max_fsb; MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); @@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444); MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, - "Minimum FSB to use, if not defined: current FSB - 50"); + "Minimum FSB to use, if not defined: current FSB - 50"); #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) @@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb) /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); + 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); if (!nforce2_sub5) return 0; @@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb) fsb /= 1000000; /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); - if(bootfsb || !temp) + if (bootfsb || !temp) return fsb; - + /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; @@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb) tfsb--; /* Calculate the PLL reg. value */ - if ((pll = nforce2_calc_pll(tfsb)) == -1) + pll = nforce2_calc_pll(tfsb); + if (pll == -1) return -EINVAL; nforce2_write_pll(pll); @@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu) static int nforce2_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { -// unsigned long flags; +/* unsigned long flags; */ struct cpufreq_freqs freqs; unsigned int target_fsb; @@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); /* Disable IRQs */ - //local_irq_save(flags); + /* local_irq_save(flags); */ if (nforce2_set_fsb(target_fsb) < 0) printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", - target_fsb); + target_fsb); else dprintk("Changed FSB successfully to %d\n", - target_fsb); + target_fsb); /* Enable IRQs */ - //local_irq_restore(flags); + /* local_irq_restore(flags); */ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); @@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy) policy->max = (fsb_pol_max + 1) * fid * 100; cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); return 0; } @@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* Set maximum FSB to FSB at boot time */ max_fsb = nforce2_fsb_read(1); - if(!max_fsb) + if (!max_fsb) return -EIO; if (!min_fsb) diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c new file mode 100644 index 00000000000..fcb1cc9d75c --- /dev/null +++ b/arch/x86/kernel/cpu/intel_64.c @@ -0,0 +1,103 @@ +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/topology.h> +#include <asm/numa_64.h> + +#include "cpu.h" + +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +{ + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, t; + + if (c->cpuid_level < 4) + return 1; + + cpuid_count(4, 0, &eax, &t, &t, &t); + + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + } + + + if (cpu_has_bts) + ds_init_intel(c); + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_early_init = early_init_intel, + .c_init = init_intel, +}; +cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); + diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 26d615dcb14..2c8afafa18e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index e633c9c2b76..f390c9f6635 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -9,23 +9,23 @@ #include <linux/interrupt.h> #include <linux/smp.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/system.h> #include <asm/msr.h> #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron */ -static void k7_machine_check(struct pt_regs * regs, long error_code) +static void k7_machine_check(struct pt_regs *regs, long error_code) { - int recover=1; + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + recover = 0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); @@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code) } if (recover&2) - panic ("CPU context corrupt"); + panic("CPU context corrupt"); if (recover&1) - panic ("Unable to continue"); - printk (KERN_EMERG "Attempting to continue.\n"); + panic("Unable to continue"); + printk(KERN_EMERG "Attempting to continue.\n"); mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); + wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } @@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) machine_check_vector = k7_machine_check; wmb(); - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); + printk(KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; /* Clear status for MC index 0 separately, we don't touch CTL, * as some K7 Athlons cause spurious MCEs when its enabled. */ if (boot_cpu_data.x86 == 6) { - wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); + wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); i = 1; } else i = 0; - for (; i<nr_mce_banks; i++) { - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + for (; i < nr_mce_banks; i++) { + wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); } - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index e07e8c068ae..501ca1cea27 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -31,7 +31,7 @@ #include <asm/idle.h> #define MISC_MCELOG_MINOR 227 -#define NR_BANKS 6 +#define NR_SYSFS_BANKS 6 atomic_t mce_entry; @@ -46,7 +46,7 @@ static int mce_dont_init; */ static int tolerant = 1; static int banks; -static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; @@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) barrier(); for (i = 0; i < banks; i++) { - if (!bank[i]) + if (i < NR_SYSFS_BANKS && !bank[i]) continue; m.misc = 0; @@ -444,9 +444,10 @@ static void mce_init(void *dummy) rdmsrl(MSR_IA32_MCG_CAP, cap); banks = cap & 0xff; - if (banks > NR_BANKS) { - printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); - banks = NR_BANKS; + if (banks > MCE_EXTENDED_BANK) { + banks = MCE_EXTENDED_BANK; + printk(KERN_INFO "MCE: warning: using only %d banks\n", + MCE_EXTENDED_BANK); } /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) @@ -462,7 +463,11 @@ static void mce_init(void *dummy) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + if (i < NR_SYSFS_BANKS) + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + else + wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } @@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce); } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); -/* TBD should generate these dynamically based on number of available banks */ +/* + * TBD should generate these dynamically based on number of available banks. + * Have only 6 contol banks in /sysfs until then. + */ ACCESSOR(bank0ctl,bank[0],mce_restart()) ACCESSOR(bank1ctl,bank[1],mce_restart()) ACCESSOR(bank2ctl,bank[2],mce_restart()) diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index cb03345554a..eef001ad3bd 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -8,7 +8,7 @@ #include <linux/interrupt.h> #include <linux/smp.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/system.h> #include <asm/msr.h> #include <asm/apic.h> @@ -32,12 +32,12 @@ struct intel_mce_extended_msrs { /* u32 *reserved[]; */ }; -static int mce_num_extended_msrs = 0; +static int mce_num_extended_msrs; #ifdef CONFIG_X86_MCE_P4THERMAL static void unexpected_thermal_interrupt(struct pt_regs *regs) -{ +{ printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", smp_processor_id()); add_taint(TAINT_MACHINE_CHECK); @@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) * be some SMM goo which handles it, so we can't even put a handler * since it might be delivered via SMI already -zwanem. */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); + rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); if ((l & (1<<3)) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", @@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) return; /* -EBUSY */ } - /* check whether a vector already exists, temporarily masked? */ + /* check whether a vector already exists, temporarily masked? */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " "installed\n", @@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ apic_write_around(APIC_LVTTHMR, h); - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); /* ok we're good to go... */ vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read(APIC_LVTTHMR); + apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); @@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { u32 h; - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); + rdmsr(MSR_IA32_MCG_EAX, r->eax, h); + rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr(MSR_IA32_MCG_EDX, r->edx, h); + rdmsr(MSR_IA32_MCG_ESI, r->esi, h); + rdmsr(MSR_IA32_MCG_EDI, r->edi, h); + rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr(MSR_IA32_MCG_ESP, r->esp, h); + rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr(MSR_IA32_MCG_EIP, r->eip, h); } -static void intel_machine_check(struct pt_regs * regs, long error_code) +static void intel_machine_check(struct pt_regs *regs, long error_code) { - int recover=1; + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + recover = 0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); @@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) } if (recover & 2) - panic ("CPU context corrupt"); + panic("CPU context corrupt"); if (recover & 1) - panic ("Unable to continue"); + panic("Unable to continue"); printk(KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs * for errors if the OS could not log the error. */ - for (i=0; i<nr_mce_banks; i++) { + for (i = 0; i < nr_mce_banks; i++) { u32 msr; msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr, low, high); + rdmsr(msr, low, high); if (high&(1<<31)) { /* Clear it */ wrmsr(msr, 0UL, 0UL); @@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) } } mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); + wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } @@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; - + machine_check_vector = intel_machine_check; wmb(); - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); + printk(KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - for (i=0; i<nr_mce_banks; i++) { - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + for (i = 0; i < nr_mce_banks; i++) { + wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); } - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); /* Check for P4/Xeon extended MCE MSRs */ - rdmsr (MSR_IA32_MCG_CAP, l, h); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<9)) {/* MCG_EXT_P */ mce_num_extended_msrs = (l >> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" " available\n", smp_processor_id(), mce_num_extended_msrs); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5d241ce94a4..509bd3d9eac 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; -static u64 tom2; +u64 mtrr_tom2; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) } } - if (tom2) { - if (start >= (1ULL<<32) && (end < tom2)) + if (mtrr_tom2) { + if (start >= (1ULL<<32) && (end < mtrr_tom2)) return MTRR_TYPE_WRBACK; } @@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); } +/* fill the MSR pair relating to a var range */ +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) +{ + struct mtrr_var_range *vr; + + vr = mtrr_state.var_ranges; + + vr[index].base_lo = base_lo; + vr[index].base_hi = base_hi; + vr[index].mask_lo = mask_lo; + vr[index].mask_hi = mask_hi; +} + static void get_fixed_ranges(mtrr_type * frs) { @@ -213,13 +227,13 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; if (amd_special_default_mtrr()) { - unsigned lo, hi; + unsigned low, high; /* TOP_MEM2 */ - rdmsr(MSR_K8_TOP_MEM2, lo, hi); - tom2 = hi; - tom2 <<= 32; - tom2 |= lo; - tom2 &= 0xffffff8000000ULL; + rdmsr(MSR_K8_TOP_MEM2, low, high); + mtrr_tom2 = high; + mtrr_tom2 <<= 32; + mtrr_tom2 |= low; + mtrr_tom2 &= 0xffffff800000ULL; } if (mtrr_show) { int high_width; @@ -251,9 +265,9 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } - if (tom2) { + if (mtrr_tom2) { printk(KERN_INFO "TOM2: %016llx aka %lldM\n", - tom2, tom2>>20); + mtrr_tom2, mtrr_tom2>>20); } } mtrr_state_set = 1; @@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) if (lo != msrwords[0] || hi != msrwords[1]) { if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 15 && + (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278d932..105afe12beb 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -37,6 +37,7 @@ #include <linux/smp.h> #include <linux/cpu.h> #include <linux/mutex.h> +#include <linux/sort.h> #include <asm/e820.h> #include <asm/mtrr.h> @@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +/* should be related to MTRR_VAR_RANGES nums */ +#define RANGE_NUM 256 + +struct res_range { + unsigned long start; + unsigned long end; +}; + +static int __init +add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + /* out of slots */ + if (nr_range >= RANGE_NUM) + return nr_range; + + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; +} + +static int __init +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + int i; + + /* try to merge it with old one */ + for (i = 0; i < nr_range; i++) { + unsigned long final_start, final_end; + unsigned long common_start, common_end; + + if (!range[i].end) + continue; + + common_start = max(range[i].start, start); + common_end = min(range[i].end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(range[i].start, start); + final_end = max(range[i].end, end); + + range[i].start = final_start; + range[i].end = final_end; + return nr_range; + } + + /* need to add that */ + return add_range(range, nr_range, start, end); +} + +static void __init +subtract_range(struct res_range *range, unsigned long start, unsigned long end) +{ + int i, j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && + range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && + range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static int __init cmp_range(const void *x1, const void *x2) +{ + const struct res_range *r1 = x1; + const struct res_range *r2 = x2; + long start1, start2; + + start1 = r1->start; + start2 = r2->start; + + return start1 - start2; +} + +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static int __initdata debug_print; + +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + nr_range = add_range_with_merge(range, nr_range, base, + base + size - 1); + } + if (debug_print) { + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_UNCACHABLE) + continue; + size = range_state[i].size_pfn; + if (!size) + continue; + base = range_state[i].base_pfn; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, + extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + if (debug_print) { + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + if (debug_print) { + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* clear those is not used */ + for (i = nr_range; i < RANGE_NUM; i++) + memset(&range[i], 0, sizeof(range[i])); + + return nr_range; +} + +static struct res_range __initdata range[RANGE_NUM]; + +#ifdef CONFIG_MTRR_SANITIZER + +static unsigned long __init sum_ranges(struct res_range *range, int nr_range) +{ + unsigned long sum; + int i; + + sum = 0; + for (i = 0; i < nr_range; i++) + sum += range[i].end + 1 - range[i].start; + + return sum; +} + +static int enable_mtrr_cleanup __initdata = + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + +struct var_mtrr_state { + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; +}; + +static void __init +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned int address_bits) +{ + u32 base_lo, base_hi, mask_lo, mask_hi; + u64 base, mask; + + if (!sizek) { + fill_mtrr_var_range(reg, 0, 0, 0, 0); + return; + } + + mask = (1ULL << address_bits) - 1; + mask &= ~((((u64)sizek) << 10) - 1); + + base = ((u64)basek) << 10; + + base |= type; + mask |= 0x800; + + base_lo = base & ((1ULL<<32) - 1); + base_hi = base >> 32; + + mask_lo = mask & ((1ULL<<32) - 1); + mask_hi = mask >> 32; + + fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); +} + +static void __init +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type) +{ + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); + range_state[reg].type = type; +} + +static void __init +set_var_mtrr_all(unsigned int address_bits) +{ + unsigned long basek, sizek; + unsigned char type; + unsigned int reg; + + for (reg = 0; reg < num_var_ranges; reg++) { + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); + type = range_state[reg].type; + + set_var_mtrr(reg, basek, sizek, type, address_bits); + } +} + +static unsigned int __init +range_to_mtrr(unsigned int reg, unsigned long range_startk, + unsigned long range_sizek, unsigned char type) +{ + if (!range_sizek || (reg >= num_var_ranges)) + return reg; + + while (range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + + /* Compute the maximum size I can make a range */ + if (range_startk) + max_align = ffs(range_startk) - 1; + else + max_align = 32; + align = fls(range_sizek) - 1; + if (align > max_align) + align = max_align; + + sizek = 1 << align; + if (debug_print) + printk(KERN_DEBUG "Setting variable MTRR %d, " + "base: %ldMB, range: %ldMB, type %s\n", + reg, range_startk >> 10, sizek >> 10, + (type == MTRR_TYPE_UNCACHABLE)?"UC": + ((type == MTRR_TYPE_WRBACK)?"WB":"Other") + ); + save_var_mtrr(reg++, range_startk, sizek, type); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= num_var_ranges) + break; + } + return reg; +} + +static unsigned __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, + unsigned long sizek) +{ + unsigned long hole_basek, hole_sizek; + unsigned long second_basek, second_sizek; + unsigned long range0_basek, range0_sizek; + unsigned long range_basek, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + + hole_basek = 0; + hole_sizek = 0; + second_basek = 0; + second_sizek = 0; + chunk_sizek = state->chunk_sizek; + gran_sizek = state->gran_sizek; + + /* align with gran size, prevent small block used up MTRRs */ + range_basek = ALIGN(state->range_startk, gran_sizek); + if ((range_basek > basek) && basek) + return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); + range_sizek = ALIGN(state->range_sizek, gran_sizek); + + while (range_sizek > state->range_sizek) { + range_sizek -= gran_sizek; + if (!range_sizek) + return 0; + } + state->range_sizek = range_sizek; + + /* try to append some small hole */ + range0_basek = state->range_startk; + range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + if (range0_sizek == state->range_sizek) { + if (debug_print) + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK); + return 0; + } + + range0_sizek -= chunk_sizek; + if (range0_sizek && sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + + } + + range_basek = range0_basek + range0_sizek; + range_sizek = chunk_sizek; + + if (range_basek + range_sizek > basek && + range_basek + range_sizek <= (basek + sizek)) { + /* one hole */ + second_basek = basek; + second_sizek = range_basek + range_sizek - basek; + } + + /* if last piece, only could one hole near end */ + if ((second_basek || !basek) && + range_sizek - (state->range_sizek - range0_sizek) - second_sizek < + (chunk_sizek >> 1)) { + /* + * one hole in middle (second_sizek is 0) or at end + * (second_sizek is 0 ) + */ + hole_sizek = range_sizek - (state->range_sizek - range0_sizek) + - second_sizek; + hole_basek = range_basek + range_sizek - hole_sizek + - second_sizek; + } else { + /* fallback for big hole, or several holes */ + range_sizek = state->range_sizek - range0_sizek; + second_basek = 0; + second_sizek = 0; + } + + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + MTRR_TYPE_WRBACK); + if (hole_sizek) { + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, + MTRR_TYPE_UNCACHABLE); + + } + + return second_sizek; +} + +static void __init +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, + unsigned long size_pfn) +{ + unsigned long basek, sizek; + unsigned long second_sizek = 0; + + if (state->reg >= num_var_ranges) + return; + + basek = base_pfn << (PAGE_SHIFT - 10); + sizek = size_pfn << (PAGE_SHIFT - 10); + + /* See if I can merge with the last range */ + if ((basek <= 1024) || + (state->range_startk + state->range_sizek == basek)) { + unsigned long endk = basek + sizek; + state->range_sizek = endk - state->range_startk; + return; + } + /* Write the range mtrrs */ + if (state->range_sizek != 0) + second_sizek = range_to_mtrr_with_hole(state, basek, sizek); + + /* Allocate an msr */ + state->range_startk = basek + second_sizek; + state->range_sizek = sizek - second_sizek; +} + +/* mininum size of mtrr block that can take hole */ +static u64 mtrr_chunk_size __initdata = (256ULL<<20); + +static int __init parse_mtrr_chunk_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_chunk_size = memparse(p, &p); + return 0; +} +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); + +/* granity of mtrr of block */ +static u64 mtrr_gran_size __initdata; + +static int __init parse_mtrr_gran_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_gran_size = memparse(p, &p); + return 0; +} +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); + +static int nr_mtrr_spare_reg __initdata = + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; + +static int __init parse_mtrr_spare_reg(char *arg) +{ + if (arg) + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); + +static int __init +x86_setup_var_mtrrs(struct res_range *range, int nr_range, + u64 chunk_size, u64 gran_size) +{ + struct var_mtrr_state var_state; + int i; + int num_reg; + + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.chunk_sizek = chunk_size >> 10; + var_state.gran_sizek = gran_size >> 10; + + memset(range_state, 0, sizeof(range_state)); + + /* Write the range etc */ + for (i = 0; i < nr_range; i++) + set_var_mtrr_range(&var_state, range[i].start, + range[i].end - range[i].start + 1); + + /* Write the last range */ + if (var_state.range_sizek != 0) + range_to_mtrr_with_hole(&var_state, 0, 0); + + num_reg = var_state.reg; + /* Clear out the extra MTRR's */ + while (var_state.reg < num_var_ranges) { + save_var_mtrr(var_state.reg, 0, 0, 0); + var_state.reg++; + } + + return num_reg; +} + +struct mtrr_cleanup_result { + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; +}; + +/* + * gran_size: 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 4G + * so we need (2+13)*6 + */ +#define NUM_RESULT 90 +#define PSHIFT (PAGE_SHIFT - 10) + +static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; +static struct res_range __initdata range_new[RANGE_NUM]; +static unsigned long __initdata min_loss_pfn[RANGE_NUM]; + +static int __init mtrr_cleanup(unsigned address_bits) +{ + unsigned long extra_remove_base, extra_remove_size; + unsigned long i, base, size, def, dummy; + mtrr_type type; + int nr_range, nr_range_new; + u64 chunk_size, gran_size; + unsigned long range_sums, range_sums_new; + int index_good; + int num_reg_good; + + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + + if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + size = range_state[i].size_pfn; + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* check if we got UC entries */ + if (!num[MTRR_TYPE_UNCACHABLE]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + extra_remove_size = 0; + if (mtrr_tom2) { + extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_size = + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, + extra_remove_size); + range_sums = sum_ranges(range, nr_range); + printk(KERN_INFO "total RAM coverred: %ldM\n", + range_sums >> (20 - PAGE_SHIFT)); + + if (mtrr_chunk_size && mtrr_gran_size) { + int num_reg; + + debug_print = 1; + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, + mtrr_gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, + extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + i = 0; + result[i].chunk_sizek = mtrr_chunk_size >> 10; + result[i].gran_sizek = mtrr_gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + if (!result[i].bad) { + set_var_mtrr_all(address_bits); + return 1; + } + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " + "will find optimal one\n"); + debug_print = 0; + memset(result, 0, sizeof(result[0])); + } + + i = 0; + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); + memset(result, 0, sizeof(result)); + for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (chunk_size = gran_size; chunk_size < (1ULL<<33); + chunk_size <<= 1) { + int num_reg; + + if (debug_print) + printk(KERN_INFO + "\ngran_size: %lldM chunk_size_size: %lldM\n", + gran_size >> 20, chunk_size >> 20); + if (i >= NUM_RESULT) + continue; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, + chunk_size, gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + result[i].chunk_sizek = chunk_size >> 10; + result[i].gran_sizek = gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + /* double check it */ + if (!result[i].bad && !result[i].lose_cover_sizek) { + if (nr_range_new != nr_range || + memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; + } + + if (!result[i].bad && (range_sums - range_sums_new < + min_loss_pfn[num_reg])) { + min_loss_pfn[num_reg] = + range_sums - range_sums_new; + } + i++; + } + } + + /* print out all */ + for (i = 0; i < NUM_RESULT; i++) { + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + } + + /* try to find the optimal index */ + if (nr_mtrr_spare_reg >= num_var_ranges) + nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; + for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { + if (!min_loss_pfn[i]) { + num_reg_good = i; + break; + } + } + + index_good = -1; + if (num_reg_good != -1) { + for (i = 0; i < NUM_RESULT; i++) { + if (!result[i].bad && + result[i].num_reg == num_reg_good && + !result[i].lose_cover_sizek) { + index_good = i; + break; + } + } + } + + if (index_good != -1) { + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + i = index_good; + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", + result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", + result[i].num_reg, + result[i].lose_cover_sizek >> 10); + /* convert ranges to var ranges state */ + chunk_size = result[i].chunk_sizek; + chunk_size <<= 10; + gran_size = result[i].gran_sizek; + gran_size <<= 10; + debug_print = 1; + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + set_var_mtrr_all(address_bits); + return 1; + } + + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + + return 0; +} +#else +static int __init mtrr_cleanup(unsigned address_bits) +{ + return 0; +} +#endif + +static int __initdata changed_by_mtrr_cleanup; + static int disable_mtrr_trim; static int __init disable_mtrr_trim_setup(char *str) @@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void) return 0; } +static u64 __init real_trim_memory(unsigned long start_pfn, + unsigned long limit_pfn) +{ + u64 trim_start, trim_size; + trim_start = start_pfn; + trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + + return e820_update_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); +} /** * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs * @end_pfn: ending page frame number @@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - u64 trim_start, trim_size; + int nr_range; + u64 total_trim_size; + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: @@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - if (amd_special_default_mtrr()) - return 0; + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } /* Find highest cached pfn */ for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; if (type != MTRR_TYPE_WRBACK) continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; if (highest_pfn < base + size) highest_pfn = base + size; } @@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) return 0; } - if (highest_pfn < end_pfn) { + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type >= MTRR_NUM_TYPES) + continue; + size = range_state[i].size_pfn; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* no entry for WB? */ + if (!num[MTRR_TYPE_WRBACK]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + nr_range = 0; + if (mtrr_tom2) { + range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); + range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; + if (highest_pfn < range[nr_range].end + 1) + highest_pfn = range[nr_range].end + 1; + nr_range++; + } + nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + + total_trim_size = 0; + /* check the head */ + if (range[0].start) + total_trim_size += real_trim_memory(0, range[0].start); + /* check the holes */ + for (i = 0; i < nr_range - 1; i++) { + if (range[i].end + 1 < range[i+1].start) + total_trim_size += real_trim_memory(range[i].end + 1, + range[i+1].start); + } + /* check the top */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) + total_trim_size += real_trim_memory(range[i].end + 1, + end_pfn); + + if (total_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %luMB of RAM.\n", - (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); + " all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); - WARN_ON(1); + if (!changed_by_mtrr_cleanup) + WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_pfn; - trim_start <<= PAGE_SHIFT; - trim_size = end_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - update_memory_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); update_e820(); + return 1; } @@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ void __init mtrr_bp_init(void) { + u32 phys_addr; init_ifs(); + phys_addr = 32; + if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; + phys_addr = 36; /* This is an AMD specific MSR, but we assume(hope?) that Intel will implement it to when they extend the address bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void) don't support PAE */ size_or_mask = 0xfff00000; /* 32 bits */ size_and_mask = 0; + phys_addr = 32; } } else { switch (boot_cpu_data.x86_vendor) { @@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - if (use_intel()) + if (use_intel()) { get_mtrr_state(); + + if (mtrr_cleanup(phys_addr)) { + changed_by_mtrr_cleanup = 1; + mtrr_if->set_all(); + } + + } } } @@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void) { if (!mtrr_if) return 0; - if (use_intel()) - mtrr_state_warn(); - else { + if (use_intel()) { + if (!changed_by_mtrr_cleanup) + mtrr_state_warn(); + } else { /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb6fea..2dc4ec656b2 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); extern void set_mtrr_ops(struct mtrr_ops * ops); @@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) extern unsigned int num_var_ranges; +extern u64 mtrr_tom2; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f9ae93adffe..ddda4b64f54 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -181,7 +181,9 @@ void disable_lapic_nmi_watchdog(void) return; on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wd_ops->unreserve(); + + if (wd_ops) + wd_ops->unreserve(); BUG_ON(atomic_read(&nmi_active) != 0); } diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820.c index 124480c0008..7b613d2efb0 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820.c @@ -17,171 +17,30 @@ #include <linux/kexec.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/suspend.h> #include <linux/pfn.h> +#include <linux/suspend.h> #include <asm/pgtable.h> #include <asm/page.h> #include <asm/e820.h> #include <asm/proto.h> #include <asm/setup.h> -#include <asm/sections.h> -#include <asm/kdebug.h> #include <asm/trampoline.h> struct e820map e820; -/* - * PFN of last memory page. - */ -unsigned long end_pfn; - -/* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long max_pfn_mapped; - -/* - * Last pfn which the user wants to use. - */ -static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; - -/* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 20 - -struct early_res { - unsigned long start, end; - char name[16]; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_X86_TRAMPOLINE - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0xaeedbabe; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); #endif - {} -}; - -void __init reserve_early(unsigned long start, unsigned long end, char *name) -{ - int i; - struct early_res *r; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", - start, end - 1, name?name:"", r->start, r->end - 1, r->name); - } - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - r->start = start; - r->end = end; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -void __init free_early(unsigned long start, unsigned long end) -{ - struct early_res *r; - int i, j; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (start == r->start && end == r->end) - break; - } - if (i >= MAX_EARLY_RES || !early_res[i].end) - panic("free_early on not reserved area: %lx-%lx!", start, end); - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -void __init early_res_to_bootmem(unsigned long start, unsigned long end) -{ - int i; - unsigned long final_start, final_end; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) - continue; - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, - final_start, final_end - 1, r->name); - reserve_bootmem_generic(final_start, final_end - final_start); - } -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last >= r->start && addr < r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - unsigned long size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} /* * This function checks if any part of the range <start,end> is mapped * with type. */ int -e820_any_mapped(unsigned long start, unsigned long end, unsigned type) +e820_any_mapped(u64 start, u64 end, unsigned type) { int i; @@ -204,8 +63,7 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); * Note: this function only works correct if the e820 table is sorted and * not-overlapping, which is the case */ -int __init e820_all_mapped(unsigned long start, unsigned long end, - unsigned type) +int __init e820_all_mapped(u64 start, u64 end, unsigned type) { int i; @@ -234,214 +92,13 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, } /* - * Find a free area with specified alignment in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1UL; -} - -/* - * Find next free range after *start - */ -unsigned long __init find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; - } - return -1UL; - -} -/* - * Find the highest page frame number we have available - */ -unsigned long __init e820_end_of_ram(void) -{ - unsigned long end_pfn; - - end_pfn = find_max_pfn_with_active_regions(); - - if (end_pfn > max_pfn_mapped) - max_pfn_mapped = end_pfn; - if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) - max_pfn_mapped = MAXMEM>>PAGE_SHIFT; - if (end_pfn > end_user_pfn) - end_pfn = end_user_pfn; - if (end_pfn > max_pfn_mapped) - end_pfn = max_pfn_mapped; - - printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); - return end_pfn; -} - -/* - * Mark e820 reserved areas as busy for the resource manager. - */ -void __init e820_reserve_resources(void) -{ - int i; - struct resource *res; - - res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); - for (i = 0; i < e820.nr_map; i++) { - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - res++; - } -} - -/* - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for software - * suspend and suspend to RAM. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long paddr; - - paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (paddr < ei->addr) - register_nosave_region(PFN_DOWN(paddr), - PFN_UP(ei->addr)); - - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), - PFN_DOWN(paddr)); - - if (paddr >= (end_pfn << PAGE_SHIFT)) - break; - } -} - -/* - * Finds an active region in the address range from start_pfn to end_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -static int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long end_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Check if max_pfn_mapped should be updated */ - if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) - max_pfn_mapped = *ei_endpfn; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= end_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > end_pfn) - *ei_endpfn = end_pfn; - - /* Obey end_user_pfn to save on memmap */ - if (*ei_startpfn >= end_user_pfn) - return 0; - if (*ei_endpfn > end_user_pfn) - *ei_endpfn = end_user_pfn; - - return 1; -} - -/* Walk the e820 map and register active regions within a node */ -void __init -e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* * Add a memory region to the kernel e820 map. */ -void __init add_memory_region(unsigned long start, unsigned long size, int type) +void __init e820_add_region(u64 start, u64 size, int type) { int x = e820.nr_map; - if (x == E820MAX) { + if (x == ARRAY_SIZE(e820.map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } @@ -452,28 +109,7 @@ void __init add_memory_region(unsigned long start, unsigned long size, int type) e820.nr_map++; } -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -unsigned long __init e820_hole_size(unsigned long start, unsigned long end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - (ram << PAGE_SHIFT); -} - -static void __init e820_print_map(char *who) +void __init e820_print_map(char *who) { int i; @@ -506,19 +142,75 @@ static void __init e820_print_map(char *who) * Sanitize the BIOS e820 map. * * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. + * replaces the original e820 map with a new one, removing overlaps, + * and resolving conflicting memory types in favor of highest + * numbered type. * + * The input parameter biosmap points to an array of 'struct + * e820entry' which on entry has elements in the range [0, *pnr_map) + * valid, and which has space for up to max_nr_map entries. + * On return, the resulting sanitized e820 map entries will be in + * overwritten in the same location, starting at biosmap. + * + * The integer pointed to by pnr_map must be valid on entry (the + * current number of valid entries located at biosmap) and will + * be updated on return, with the new number of valid entries + * (something no more than max_nr_map.) + * + * The return value from sanitize_e820_map() is zero if it + * successfully 'sanitized' the map entries passed in, and is -1 + * if it did nothing, which can happen if either of (1) it was + * only passed one map entry, or (2) any of the input map entries + * were invalid (start + size < start, meaning that the size was + * so big the described memory range wrapped around through zero.) + * + * Visually we're performing the following + * (1,2,3,4 = memory types)... + * + * Sample memory map (w/overlaps): + * ____22__________________ + * ______________________4_ + * ____1111________________ + * _44_____________________ + * 11111111________________ + * ____________________33__ + * ___________44___________ + * __________33333_________ + * ______________22________ + * ___________________2222_ + * _________111111111______ + * _____________________11_ + * _________________4______ + * + * Sanitized equivalent (no overlap): + * 1_______________________ + * _44_____________________ + * ___1____________________ + * ____22__________________ + * ______11________________ + * _________1______________ + * __________3_____________ + * ___________44___________ + * _____________33_________ + * _______________2________ + * ________________1_______ + * _________________4______ + * ___________________2____ + * ____________________33__ + * ______________________4_ */ -static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) + +int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + int *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ unsigned long long addr; /* address for this change point */ }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; +static struct change_member change_point_list[2*E820_X_MAX] __initdata; +static struct change_member *change_point[2*E820_X_MAX] __initdata; +static struct e820entry *overlap_list[E820_X_MAX] __initdata; +static struct e820entry new_bios[E820_X_MAX] __initdata; struct change_member *change_tmp; unsigned long current_type, last_type; unsigned long long last_addr; @@ -528,48 +220,12 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) int old_nr, new_nr, chg_nr; int i; - /* - Visually we're performing the following - (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ if (*pnr_map < 2) return -1; old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); /* bail out if we find any unreasonable addresses in bios map */ for (i = 0; i < old_nr; i++) @@ -681,7 +337,7 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) * no more space left for new * bios entries ? */ - if (++new_bios_entry >= E820MAX) + if (++new_bios_entry >= max_nr_map) break; } if (current_type != 0) { @@ -703,22 +359,9 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) return 0; } -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) +static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map) { - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { + while (nr_map) { u64 start = biosmap->addr; u64 size = biosmap->size; u64 end = start + size; @@ -728,111 +371,37 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) if (start > end) return -1; - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - return 0; -} - -static void early_panic(char *msg) -{ - early_printk(msg); - panic(msg); -} - -/* We're not void only for x86 32-bit compat */ -char * __init machine_specific_memory_setup(void) -{ - char *who = "BIOS-e820"; - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) - early_panic("Cannot find a valid memory map"); - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - e820_print_map(who); - - /* In case someone cares... */ - return who; -} - -static int __init parse_memopt(char *p) -{ - if (!p) - return -EINVAL; - end_user_pfn = memparse(p, &p); - end_user_pfn >>= PAGE_SHIFT; - return 0; -} -early_param("mem", parse_memopt); - -static int userdef __initdata; - -static int __init parse_memmap_opt(char *p) -{ - char *oldp; - unsigned long long start_at, mem_size; - - if (!strcmp(p, "exactmap")) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is - * reset. - */ - e820_register_active_regions(0, 0, -1UL); - saved_max_pfn = e820_end_of_ram(); - remove_all_active_ranges(); -#endif - max_pfn_mapped = 0; - e820.nr_map = 0; - userdef = 1; - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - return -EINVAL; + e820_add_region(start, size, type); - userdef = 1; - if (*p == '@') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*p == '#') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*p == '$') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - end_user_pfn = (mem_size >> PAGE_SHIFT); + biosmap++; + nr_map--; } - return *p == '\0' ? 0 : -EINVAL; + return 0; } -early_param("memmap", parse_memmap_opt); -void __init finish_e820_parsing(void) +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + */ +int __init copy_e820_map(struct e820entry *biosmap, int nr_map) { - if (userdef) { - char nr = e820.nr_map; - - if (sanitize_e820_map(e820.map, &nr) < 0) - early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; - printk(KERN_INFO "user-defined physical RAM map:\n"); - e820_print_map("user"); - } + return __copy_e820_map(biosmap, nr_map); } -void __init update_memory_range(u64 start, u64 size, unsigned old_type, +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { int i; + u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -842,8 +411,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, if (ei->type != old_type) continue; /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { ei->type = new_type; + real_updated_size += ei->size; continue; } /* partially covered */ @@ -851,26 +422,25 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, final_end = min(start + size, ei->addr + ei->size); if (final_start >= final_end) continue; - add_memory_region(final_start, final_end - final_start, + e820_add_region(final_start, final_end - final_start, new_type); + real_updated_size += final_end - final_start; } + return real_updated_size; } void __init update_e820(void) { - u8 nr_map; + int nr_map; nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) return; e820.nr_map = nr_map; printk(KERN_INFO "modified physical RAM map:\n"); e820_print_map("modified"); } -unsigned long pci_mem_start = 0xaeedbabe; -EXPORT_SYMBOL(pci_mem_start); - /* * Search for the biggest gap in the low 32 bits of the e820 * memory space. We pass this space to PCI to assign MMIO resources @@ -880,7 +450,7 @@ EXPORT_SYMBOL(pci_mem_start); __init void e820_setup_gap(void) { unsigned long gapstart, gapsize, round; - unsigned long last; + unsigned long long last; int i; int found = 0; @@ -909,6 +479,7 @@ __init void e820_setup_gap(void) last = start; } +#ifdef CONFIG_X86_64 if (!found) { gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " @@ -916,6 +487,7 @@ __init void e820_setup_gap(void) KERN_ERR "PCI: Unassigned devices with 32bit resource " "registers may break!\n"); } +#endif /* * See how much we want to round up: start off with @@ -932,6 +504,586 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } +/** + * Because of the size limitation of struct boot_params, only first + * 128 E820 memory entries are passed to kernel via + * boot_params.e820_map, others are passed via SETUP_E820_EXT node of + * linked list of struct setup_data, which is parsed here. + */ +void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) +{ + u32 map_len; + int entries; + struct e820entry *extmap; + + entries = sdata->len / sizeof(struct e820entry); + map_len = sdata->len + sizeof(struct setup_data); + if (map_len > PAGE_SIZE) + sdata = early_ioremap(pa_data, map_len); + extmap = (struct e820entry *)(sdata->data); + __copy_e820_map(extmap, entries); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + if (map_len > PAGE_SIZE) + early_iounmap(sdata, map_len); + printk(KERN_INFO "extended physical RAM map:\n"); + e820_print_map("extended"); +} + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif + +/* + * Early reserved memory areas. + */ +#define MAX_EARLY_RES 20 + +struct early_res { + u64 start, end; + char name[16]; +}; +static struct early_res early_res[MAX_EARLY_RES] __initdata = { + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +#endif +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +#endif + {} +}; + +static int __init find_overlapped_early(u64 start, u64 end) +{ + int i; + struct early_res *r; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (end > r->start && start < r->end) + break; + } + + return i; +} + +void __init reserve_early(u64 start, u64 end, char *name) +{ + int i; + struct early_res *r; + + i = find_overlapped_early(start, end); + if (i >= MAX_EARLY_RES) + panic("Too many early reservations"); + r = &early_res[i]; + if (r->end) + panic("Overlapping early reservations " + "%llx-%llx %s to %llx-%llx %s\n", + start, end - 1, name?name:"", r->start, + r->end - 1, r->name); + r->start = start; + r->end = end; + if (name) + strncpy(r->name, name, sizeof(r->name) - 1); +} + +void __init free_early(u64 start, u64 end) +{ + struct early_res *r; + int i, j; + + i = find_overlapped_early(start, end); + r = &early_res[i]; + if (i >= MAX_EARLY_RES || r->end != end || r->start != start) + panic("free_early on not reserved area: %llx-%llx!", + start, end - 1); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memmove(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(u64 start, u64 end) +{ + int i; + u64 final_start, final_end; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start, + BOOTMEM_DEFAULT); + } +} + +/* Check for already reserved areas */ +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) +{ + int i; + u64 addr = *addrp; + int changed = 0; + struct early_res *r; +again: + i = find_overlapped_early(addr, addr + size); + r = &early_res[i]; + if (i < MAX_EARLY_RES && r->end) { + *addrp = addr = round_up(r->end, align); + changed = 1; + goto again; + } + return changed; +} + +/* Check for already reserved areas */ +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + int i; + u64 addr = *addrp, last; + u64 size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + while (bad_addr(&addr, size, align) && addr+size <= ei_last) + ; + last = addr + size; + if (last > ei_last) + continue; + if (last > end) + continue; + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} + +/* + * pre allocated 4k and reserved it in e820 + */ +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +{ + u64 size = 0; + u64 addr; + u64 start; + + start = startt; + while (size < sizet) + start = find_e820_area_size(start, &size, align); + + if (size < sizet) + return 0; + + addr = round_down(start + size - sizet, align); + e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820 for early_reserve_e820\n"); + update_e820(); + + return addr; +} + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) +# else +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) +# endif +#else /* CONFIG_X86_32 */ +# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT +#endif + +/* + * Last pfn which the user wants to use. + */ +unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; + +/* + * Find the highest page frame number we have available + */ +unsigned long __init e820_end_of_ram(void) +{ + unsigned long last_pfn; + unsigned long max_arch_pfn = MAX_ARCH_PFN; + + last_pfn = find_max_pfn_with_active_regions(); + + if (last_pfn > max_arch_pfn) + last_pfn = max_arch_pfn; + if (last_pfn > end_user_pfn) + last_pfn = end_user_pfn; + + printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n", + last_pfn, max_arch_pfn); + return last_pfn; +} + +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. + */ +int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + /* Obey end_user_pfn to save on memmap */ + if (*ei_startpfn >= end_user_pfn) + return 0; + if (*ei_endpfn > end_user_pfn) + *ei_endpfn = end_user_pfn; + + return 1; +} + +/* Walk the e820 map and register active regions within a node */ +void __init e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + int i; + + for (i = 0; i < e820.nr_map; i++) + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init e820_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - ((u64)ram << PAGE_SHIFT); +} + +static void early_panic(char *msg) +{ + early_printk(msg); + panic(msg); +} + +/* "mem=nopentium" disables the 4MB page tables. */ +static int __init parse_memopt(char *p) +{ + u64 mem_size; + + if (!p) + return -EINVAL; + +#ifdef CONFIG_X86_32 + if (!strcmp(p, "nopentium")) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + return 0; + } +#endif + + mem_size = memparse(p, &p); + end_user_pfn = mem_size>>PAGE_SHIFT; + return 0; +} +early_param("mem", parse_memopt); + +static int userdef __initdata; + +static int __init parse_memmap_opt(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!strcmp(p, "exactmap")) { +#ifdef CONFIG_CRASH_DUMP + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is + * reset. + */ + e820_register_active_regions(0, 0, -1UL); + saved_max_pfn = e820_end_of_ram(); + remove_all_active_ranges(); +#endif + e820.nr_map = 0; + userdef = 1; + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + userdef = 1; + if (*p == '@') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RESERVED); + } else { + end_user_pfn = (mem_size >> PAGE_SHIFT); + } + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", parse_memmap_opt); + +void __init finish_e820_parsing(void) +{ + if (userdef) { + int nr = e820.nr_map; + + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + + printk(KERN_INFO "user-defined physical RAM map:\n"); + e820_print_map("user"); + } +} + +/* + * Mark e820 reserved areas as busy for the resource manager. + */ +void __init e820_reserve_resources(void) +{ + int i; + struct resource *res; + + res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + for (i = 0; i < e820.nr_map; i++) { + switch (e820.map[i].type) { + case E820_RAM: res->name = "System RAM"; break; + case E820_ACPI: res->name = "ACPI Tables"; break; + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; + default: res->name = "reserved"; + } + res->start = e820.map[i].addr; + res->end = res->start + e820.map[i].size - 1; +#ifndef CONFIG_RESOURCES_64BIT + if (res->end > 0x100000000ULL) { + res++; + continue; + } +#endif + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + res++; + } +} + +char *__init default_machine_specific_memory_setup(void) +{ + char *who = "BIOS-e820"; + int new_nr; + /* + * Try to copy the BIOS-supplied E820-map. + * + * Otherwise fake a memory map; one section from 0k->640k, + * the next section from 1mb->appropriate_mem_k + */ + new_nr = boot_params.e820_entries; + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &new_nr); + boot_params.e820_entries = new_nr; + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { + u64 mem_size; + + /* compare results from other methods and take the greater */ + if (boot_params.alt_mem_k + < boot_params.screen_info.ext_mem_k) { + mem_size = boot_params.screen_info.ext_mem_k; + who = "BIOS-88"; + } else { + mem_size = boot_params.alt_mem_k; + who = "BIOS-e801"; + } + + e820.nr_map = 0; + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + } + + /* In case someone cares... */ + return who; +} + +char *__init __attribute__((weak)) machine_specific_memory_setup(void) +{ + return default_machine_specific_memory_setup(); +} + +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __init __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + +void __init setup_memory_map(void) +{ + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + e820_print_map(memory_setup()); +} + +#ifdef CONFIG_X86_64 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) { int i; @@ -950,3 +1102,4 @@ int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) max_pfn << PAGE_SHIFT) - *addr; return i + 1; } +#endif diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c deleted file mode 100644 index ed733e7cf4e..00000000000 --- a/arch/x86/kernel/e820_32.c +++ /dev/null @@ -1,775 +0,0 @@ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/ioport.h> -#include <linux/string.h> -#include <linux/kexec.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/pfn.h> -#include <linux/uaccess.h> -#include <linux/suspend.h> - -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/e820.h> -#include <asm/setup.h> - -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif -extern int user_defined_memmap; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -#define ROMSIGNATURE 0xaa55 - -static int __init romsignature(const unsigned char *rom) -{ - const unsigned short * const ptr = (const unsigned short *)rom; - unsigned short sig; - - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; -} - -static int __init romchecksum(const unsigned char *rom, unsigned long length) -{ - unsigned char sum, c; - - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) - sum += c; - return !length && !sum; -} - -static void __init probe_roms(void) -{ - const unsigned char *rom; - unsigned long start, length, upper; - unsigned char c; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -void __init init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); - request_resource(res, bss_resource); -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - request_resource(res, &crashk_res); -#endif - } - } -} - -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; i<old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { - return -1; - } - - /* create pointers for initial change-point information (for sorting) */ - for (i=0; i < 2*old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i=0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if <current_addr> > <last_addr>, swap */ - /* or, if current=<start_addr> & last=<end_addr>, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; i<overlap_entries; i++) - { - if (overlap_list[i] == change_point[chgidx]->pbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; i<overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - - return 0; -} - -/* - * Find the highest page frame number we have available - */ -void __init propagate_e820_map(void) -{ - int i; - - max_pfn = 0; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - -void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr; - int i; - - print_memory_map("limit_regions start"); - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - print_memory_map("limit_regions endfor"); - return; - } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range <start,end> is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - propagate_e820_map(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); -void __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { - ei->type = new_type; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - } -} -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - print_memory_map("modified"); -} diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f51e1ea9e8..84fd9f2a28f 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func) } -static void __init ati_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_X86_IO_APIC - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } -#endif -} - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, { PCI_VENDOR_ID_VIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, - { PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, {} diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 77d424cf68b..473c89fe507 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -213,6 +213,48 @@ unsigned long efi_get_time(void) eft.minute, eft.second); } +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init add_efi_memmap(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + e820_add_region(start, size, e820_type); + } + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + +void __init efi_reserve_early(void) +{ + unsigned long pmap; + + pmap = boot_params.efi_info.efi_memmap; +#ifdef CONFIG_X86_64 + pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32; +#endif + memmap.phys_map = (void *)pmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size / + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; + reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, + "EFI memmap"); +} + #if EFI_DEBUG static void __init print_efi_memmap(void) { @@ -242,21 +284,11 @@ void __init efi_init(void) int i = 0; void *tmp; -#ifdef CONFIG_X86_32 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; - memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); - memmap.phys_map = (void *) - (boot_params.efi_info.efi_memmap | - ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); +#ifdef CONFIG_X86_64 + efi_phys.systab = (void *)efi_phys.systab + + ((__u64)boot_params.efi_info.efi_systab_hi<<32); #endif - memmap.nr_map = boot_params.efi_info.efi_memmap_size / - boot_params.efi_info.efi_memdesc_size; - memmap.desc_version = boot_params.efi_info.efi_memdesc_version; - memmap.desc_size = boot_params.efi_info.efi_memdesc_size; efi.systab = early_ioremap((unsigned long)efi_phys.systab, sizeof(efi_system_table_t)); @@ -370,6 +402,7 @@ void __init efi_init(void) if (memmap.desc_size != sizeof(efi_memory_desc_t)) printk(KERN_WARNING "Kernel-defined memdesc" "doesn't match the one from EFI!\n"); + add_efi_memmap(); /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d0060fdccca..652c5287215 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void) early_runtime_code_mapping_set_exec(0); } -void __init efi_reserve_bootmem(void) -{ - reserve_bootmem_generic((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); -} - -void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { static unsigned pages_mapped __initdata; unsigned i, pages; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6fd1987466e..ff15ab55228 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -420,7 +420,6 @@ END(\label) PTREGSCALL stub_clone, sys_clone, %r8 PTREGSCALL stub_fork, sys_fork, %rdi PTREGSCALL stub_vfork, sys_vfork, %rdi - PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx PTREGSCALL stub_iopl, sys_iopl, %rsi @@ -930,11 +929,11 @@ error_kernelspace: iret run with kernel gs again, so don't set the user space flag. B stepping K8s sometimes report an truncated RIP for IRET exceptions returning to compat mode. Check for these here too. */ - leaq irq_return(%rip),%rbp - cmpq %rbp,RIP(%rsp) + leaq irq_return(%rip),%rcx + cmpq %rcx,RIP(%rsp) je error_swapgs - movl %ebp,%ebp /* zero extend */ - cmpq %rbp,RIP(%rsp) + movl %ecx,%ecx /* zero extend */ + cmpq %rcx,RIP(%rsp) je error_swapgs cmpq $gs_change,RIP(%rsp) je error_swapgs @@ -1124,10 +1123,6 @@ ENTRY(coprocessor_segment_overrun) zeroentry do_coprocessor_segment_overrun END(coprocessor_segment_overrun) -ENTRY(reserved) - zeroentry do_reserved -END(reserved) - /* runs on exception stack */ ENTRY(double_fault) XCPT_FRAME diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cbaaf69bedb..1fa8be5bd21 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (num_possible_cpus() <= 8) + if (max_physical_apicid < 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c new file mode 100644 index 00000000000..a727c0b9819 --- /dev/null +++ b/arch/x86/kernel/head.c @@ -0,0 +1,73 @@ +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/setup.h> +#include <asm/bios_ebda.h> + +#define BIOS_LOWMEM_KILOBYTES 0x413 + +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. + */ +void __init reserve_ebda_region(void) +{ + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ + if (paravirt_enabled()) + return; + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early(lowmem, 0x100000, "BIOS reserved"); +} + +void __init reserve_setup_data(void) +{ + struct setup_data *data; + u64 pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db05905892..fa1d25dd83e 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,7 +8,34 @@ #include <linux/init.h> #include <linux/start_kernel.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/e820.h> +#include <asm/bios_ebda.h> + void __init i386_start_kernel(void) { + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + +#ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } +#endif + reserve_early(init_pg_tables_start, init_pg_tables_end, + "INIT_PG_TABLE"); + + reserve_ebda_region(); + + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + start_kernel(); } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e25c57b8aa8..c970929bb15 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -25,6 +25,20 @@ #include <asm/e820.h> #include <asm/bios_ebda.h> +/* boot cpu pda */ +static struct x8664_pda _boot_cpu_pda __read_mostly; + +#ifdef CONFIG_SMP +/* + * We install an empty cpu_pda pointer table to indicate to early users + * (numa_set_node) that the cpu_pda pointer table for cpus other than + * the boot cpu is not yet setup. + */ +static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; +#else +static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; +#endif + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -51,74 +65,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_early(lowmem, 0x100000, "BIOS reserved"); -} - -static void __init reserve_setup_data(void) -{ - struct setup_data *data; - unsigned long pa_data; - char buf[32]; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, sizeof(*data)); - sprintf(buf, "setup data %x", data->type); - reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); - pa_data = data->next; - early_iounmap(data, sizeof(*data)); - } -} - void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -156,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data) early_printk("Kernel alive\n"); - for (i = 0; i < NR_CPUS; i++) - cpu_pda(i) = &boot_cpu_pda[i]; - + _cpu_pda = __cpu_pda; + cpu_pda(0) = &_boot_cpu_pda; pda_init(0); + + early_printk("Kernel really alive\n"); + copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f7357cc0162..b98b338aae1 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -194,6 +194,7 @@ default_entry: xorl %ebx,%ebx /* %ebx is kept at zero */ movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx movl $PTE_ATTR, %eax 10: @@ -219,6 +220,8 @@ default_entry: jb 10b 1: movl %edi,pa(init_pg_tables_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax @@ -228,6 +231,7 @@ default_entry: page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx movl $PTE_ATTR, %eax 10: @@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); cmpl %ebp,%eax jb 10b movl %edi,pa(init_pg_tables_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b817974ef94..263b9d14753 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/msr.h> #include <asm/cache.h> +#include <asm/processor-flags.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -154,9 +155,7 @@ ENTRY(secondary_startup_64) */ /* Enable PAE mode and PGE */ - xorq %rax, %rax - btsq $5, %rax - btsq $7, %rax + movl $(X86_CR4_PAE | X86_CR4_PGE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ @@ -184,14 +183,10 @@ ENTRY(secondary_startup_64) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ -#define CR0_PM 1 /* protected mode */ -#define CR0_MP (1<<1) -#define CR0_ET (1<<4) -#define CR0_NE (1<<5) -#define CR0_WP (1<<16) -#define CR0_AM (1<<18) -#define CR0_PAGING (1<<31) - movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ + X86_CR0_PG) + movl $CR0_STATE, %eax /* Make changes effective */ movq %rax, %cr0 @@ -327,11 +322,11 @@ early_idt_ripmsg: ENTRY(name) /* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << 21) + (PERM) ; \ - i = i + 1 ; \ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ .endr /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9b5cfcdfc42..ea230ec6905 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -17,7 +17,7 @@ /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 +#define FSEC_PER_NSEC 1000000L /* * HPET address is set in acpi/boot.c, when an ACPI entry exists @@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void) static void hpet_legacy_clockevent_register(void) { - uint64_t hpet_freq; - /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. + * The mult factor is defined as (include/linux/clockchips.h) + * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) + * hpet_period is in units of femtoseconds (per cycle), so + * mult/2^shift = cyc/ns = 10^6/hpet_period + * mult = (10^6 * 2^shift)/hpet_period + * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, hpet_clockevent.shift); + hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, + hpet_period, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); @@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = { static int hpet_clocksource_register(void) { - u64 tmp, start, now; + u64 start, now; cycle_t t1; /* Start the counter */ @@ -351,21 +350,15 @@ static int hpet_clocksource_register(void) return -ENODEV; } - /* Initialize and register HPET clocksource - * - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * approximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult + /* + * The definition of mult is (include/linux/clocksource.h) + * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc + * so we first need to convert hpet_period to ns/cyc units: + * mult/2^shift = ns/cyc = hpet_period/10^6 + * mult = (hpet_period * 2^shift)/10^6 + * mult = (hpet_period << shift)/FSEC_PER_NSEC */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; + clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); clocksource_register(&clocksource_hpet); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 95e80e5033c..eb9ddd8efb8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, int ret; if (!cpu_has_fxsr) - return -EIO; + return -ENODEV; ret = init_fpu(target); if (ret) @@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, int ret; if (!cpu_has_fxsr) - return -EIO; + return -ENODEV; ret = init_fpu(target); if (ret) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 0774b231a28..fedb3b113ac 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -58,7 +58,7 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); -int timer_over_8254 __initdata = 1; +int timer_through_8259 __initdata; /* * Is the SiS APIC rmw bug present ? @@ -72,15 +72,21 @@ int sis_apic_bug = -1; int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + static int disable_timer_pin_1 __initdata; /* @@ -110,7 +116,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -239,7 +245,7 @@ static void __init replace_pin_at_irq(unsigned int irq, } } -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; unsigned int pin, reg; @@ -259,30 +265,32 @@ static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsign } /* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) +static void __mask_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00010000, 0); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); } /* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) +static void __unmask_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0, 0x00010000); + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); } /* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); } /* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); } -static void mask_IO_APIC_irq (unsigned int irq) +static void mask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -291,7 +299,7 @@ static void mask_IO_APIC_irq (unsigned int irq) spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq (unsigned int irq) +static void unmask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -303,7 +311,7 @@ static void unmask_IO_APIC_irq (unsigned int irq) static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; - + /* Check delivery_mode to be sure we're not clearing an SMI pin */ entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) @@ -315,7 +323,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ioapic_mask_entry(apic, pin); } -static void clear_IO_APIC (void) +static void clear_IO_APIC(void) { int apic, pin; @@ -332,7 +340,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; cpumask_t tmp; - + cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) tmp = TARGET_CPUS; @@ -361,7 +369,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # include <linux/kernel_stat.h> /* kstat */ # include <linux/slab.h> /* kmalloc() */ # include <linux/timer.h> - + #define IRQBALANCE_CHECK_ARCH -999 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -373,14 +381,14 @@ static int physical_balance __read_mostly; static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; static struct irq_cpu_info { - unsigned long * last_irq; - unsigned long * irq_delta; + unsigned long *last_irq; + unsigned long *irq_delta; unsigned long irq; } irq_cpu_data[NR_CPUS]; #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) +#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) +#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) #define IDLE_ENOUGH(cpu,now) \ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) @@ -419,8 +427,8 @@ inside: if (cpu == -1) cpu = NR_CPUS-1; } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu,now))); + } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || + (search_idle && !IDLE_ENOUGH(cpu, now))); return cpu; } @@ -430,15 +438,14 @@ static inline void balance_irq(int cpu, int irq) unsigned long now = jiffies; cpumask_t allowed_mask; unsigned int new_cpu; - + if (irqbalance_disabled) - return; + return; cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) { + if (cpu != new_cpu) set_pending_irq(irq, cpumask_of_cpu(new_cpu)); - } } static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) @@ -450,14 +457,14 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) if (!irq_desc[j].action) continue; /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < + if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < useful_load_threshold) continue; balance_irq(i, j); } } balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); return; } @@ -486,22 +493,22 @@ static void do_irq_balance(void) /* Is this an active IRQ or balancing disabled ? */ if (!irq_desc[j].action || irq_balancing_disabled(j)) continue; - if ( package_index == i ) - IRQ_DELTA(package_index,j) = 0; + if (package_index == i) + IRQ_DELTA(package_index, j) = 0; /* Determine the total count per processor per IRQ */ value_now = (unsigned long) kstat_cpu(i).irqs[j]; /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i,j); + delta = value_now - LAST_CPU_IRQ(i, j); /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i,j) = value_now; + LAST_CPU_IRQ(i, j) = value_now; /* Ignore IRQs whose rate is less than the clock */ if (delta < useful_load_threshold) continue; /* update the load for the processor or package total */ - IRQ_DELTA(package_index,j) += delta; + IRQ_DELTA(package_index, j) += delta; /* Keep track of the higher numbered sibling as well */ if (i != package_index) @@ -527,7 +534,8 @@ static void do_irq_balance(void) max_cpu_irq = ULONG_MAX; tryanothercpu: - /* Look for heaviest loaded processor. + /* + * Look for heaviest loaded processor. * We may come back to get the next heaviest loaded processor. * Skip processors with trivial loads. */ @@ -536,7 +544,7 @@ tryanothercpu: for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; - if (max_cpu_irq <= CPU_IRQ(i)) + if (max_cpu_irq <= CPU_IRQ(i)) continue; if (tmp_cpu_irq < CPU_IRQ(i)) { tmp_cpu_irq = CPU_IRQ(i); @@ -545,8 +553,9 @@ tryanothercpu: } if (tmp_loaded == -1) { - /* In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original + /* + * In the case of small number of heavy interrupt sources, + * loading some of the cpus too much. We use Ingo's original * approach to rotate them around. */ if (!first_attempt && imbalance >= useful_load_threshold) { @@ -555,13 +564,14 @@ tryanothercpu: } goto not_worth_the_effort; } - + first_attempt = 0; /* heaviest search */ max_cpu_irq = tmp_cpu_irq; /* load */ max_loaded = tmp_loaded; /* processor */ imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* if imbalance is less than approx 10% of max load, then + + /* + * if imbalance is less than approx 10% of max load, then * observe diminishing returns action. - quit */ if (imbalance < (max_cpu_irq >> 3)) @@ -577,26 +587,25 @@ tryanotherirq: /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; - if (imbalance <= IRQ_DELTA(max_loaded,j)) + if (imbalance <= IRQ_DELTA(max_loaded, j)) continue; /* Try to find the IRQ that is closest to the imbalance * without going over. */ - if (move_this_load < IRQ_DELTA(max_loaded,j)) { - move_this_load = IRQ_DELTA(max_loaded,j); + if (move_this_load < IRQ_DELTA(max_loaded, j)) { + move_this_load = IRQ_DELTA(max_loaded, j); selected_irq = j; } } - if (selected_irq == -1) { + if (selected_irq == -1) goto tryanothercpu; - } imbalance = move_this_load; - + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors - * as much as possible. + * as much as possible. * * NOTE: the cpu_irq[] array holds the sum of the load for * sibling A and sibling B in the slot for the lowest numbered @@ -625,11 +634,11 @@ tryanotherirq: /* mark for change destination */ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - /* Since we made a change, come back sooner to + /* Since we made a change, come back sooner to * check for more variation. */ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); return; } goto tryanotherirq; @@ -640,7 +649,7 @@ not_worth_the_effort: * upward */ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); + balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); return; } @@ -679,13 +688,13 @@ static int __init balanced_irq_init(void) cpumask_t tmp; cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; + c = &boot_cpu_data; /* When not overwritten by the command line ask subarchitecture. */ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) irqbalance_disabled = NO_BALANCE_IRQ; if (irqbalance_disabled) return 0; - + /* disable irqbalance completely if there is only one processor online */ if (num_online_cpus() < 2) { irqbalance_disabled = 1; @@ -699,16 +708,14 @@ static int __init balanced_irq_init(void) physical_balance = 1; for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { printk(KERN_ERR "balanced_irq_init: out of memory"); goto failed; } - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); } - + printk(KERN_INFO "Starting balanced_irq\n"); if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) return 0; @@ -801,10 +808,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -818,13 +825,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -834,17 +841,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + for (apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -864,28 +871,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -900,7 +907,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* - * This function currently is only a helper for the i386 smp boot process where + * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ @@ -952,7 +959,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -969,118 +976,115 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { + case 0: /* conforms, ie. bus-type dependent polarity */ { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } + polarity = test_bit(bus, mp_bus_not_pci)? + default_ISA_polarity(idx): + default_PCI_polarity(idx); + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } } return polarity; } static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { + case 0: /* conforms, ie. bus-type dependent */ { - case 0: /* conforms, ie. bus-type dependent */ - { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); + trigger = test_bit(bus, mp_bus_not_pci)? + default_ISA_trigger(idx): + default_PCI_trigger(idx); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ break; } - case 1: /* edge */ + case MP_BUS_EISA: /* EISA pin */ { - trigger = 0; + trigger = default_EISA_trigger(idx); break; } - case 2: /* reserved */ + case MP_BUS_PCI: /* PCI pin */ { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; + /* set before the switch */ break; } - case 3: /* level */ + case MP_BUS_MCA: /* MCA pin */ { - trigger = 1; + trigger = default_MCA_trigger(idx); break; } - default: /* invalid */ + default: { printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; + trigger = 1; break; } } +#endif + break; + } + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } return trigger; } @@ -1097,16 +1101,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; else { /* * PCI IRQs are mapped in order @@ -1148,8 +1152,8 @@ static inline int IO_APIC_irq_trigger(int irq) for (apic = 0; apic < nr_ioapics; apic++) { for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) return irq_trigger(idx); } } @@ -1164,7 +1168,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); @@ -1237,25 +1241,25 @@ static void __init setup_IO_APIC_irqs(void) /* * add it to the IO-APIC irq-routing table: */ - memset(&entry,0,sizeof(entry)); + memset(&entry, 0, sizeof(entry)); entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - idx = find_irq_entry(apic,pin,mp_INT); + idx = find_irq_entry(apic, pin, mp_INT); if (idx == -1) { if (first_notcon) { apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, + mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); + mp_ioapics[apic].mp_apicid, pin); continue; } @@ -1289,7 +1293,7 @@ static void __init setup_IO_APIC_irqs(void) vector = assign_irq_vector(irq); entry.vector = vector; ioapic_register_intr(irq, vector, IOAPIC_AUTO); - + if (!apic && (irq < 16)) disable_8259A_irq(irq); } @@ -1302,25 +1306,21 @@ static void __init setup_IO_APIC_irqs(void) } /* - * Set up the 8259A-master output pin: + * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) { struct IO_APIC_route_entry entry; - memset(&entry,0,sizeof(entry)); - - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + memset(&entry, 0, sizeof(entry)); /* * We use logical delivery to get the timer IRQ * to the first CPU. */ entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ + entry.mask = 1; /* mask IRQ now */ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; @@ -1329,17 +1329,14 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... + * scene we may have a 8259A-master in AEOI mode ... */ - irq_desc[0].chip = &ioapic_chip; - set_irq_handler(0, handle_edge_irq); + ioapic_register_intr(0, vector, IOAPIC_EDGE); /* * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); } void __init print_IO_APIC(void) @@ -1354,10 +1351,10 @@ void __init print_IO_APIC(void) if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1376,7 +1373,7 @@ void __init print_IO_APIC(void) reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -1459,7 +1456,7 @@ void __init print_IO_APIC(void) #if 0 -static void print_APIC_bitfield (int base) +static void print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1480,7 +1477,7 @@ static void print_APIC_bitfield (int base) } } -void /*__init*/ print_local_APIC(void * dummy) +void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1489,6 +1486,7 @@ void /*__init*/ print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); @@ -1563,7 +1561,7 @@ void /*__init*/ print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +void print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1, 1); } @@ -1586,11 +1584,11 @@ void /*__init*/ print_PIC(void) v = inb(0xa0) << 8 | inb(0x20); printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - outb(0x0b,0xa0); - outb(0x0b,0x20); + outb(0x0b, 0xa0); + outb(0x0b, 0x20); v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); + outb(0x0a, 0xa0); + outb(0x0a, 0x20); spin_unlock_irqrestore(&i8259A_lock, flags); @@ -1626,7 +1624,7 @@ static void __init enable_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - for(apic = 0; apic < nr_ioapics; apic++) { + for (apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { @@ -1716,7 +1714,6 @@ void disable_IO_APIC(void) * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 */ -#ifndef CONFIG_X86_NUMAQ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; @@ -1726,6 +1723,11 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; +#ifdef CONFIG_X86_NUMAQ + if (found_numaq) + return; +#endif + /* * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. @@ -1748,15 +1750,15 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mpc_apicid; - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; } /* @@ -1765,9 +1767,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { + mp_ioapics[apic].mp_apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -1776,13 +1778,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; + mp_ioapics[apic].mp_apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -1791,21 +1793,21 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mpc_apicid) + if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; /* * Read the right value from the MPC table and * write it into the ID register. - */ + */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -1816,15 +1818,12 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); } } -#else -static void __init setup_ioapic_ids_from_mpc(void) { } -#endif int no_timer_check __initdata; @@ -2020,7 +2019,7 @@ static void ack_apic(unsigned int irq) ack_APIC_irq(); } -static void mask_lapic_irq (unsigned int irq) +static void mask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2028,7 +2027,7 @@ static void mask_lapic_irq (unsigned int irq) apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); } -static void unmask_lapic_irq (unsigned int irq) +static void unmask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2037,7 +2036,7 @@ static void unmask_lapic_irq (unsigned int irq) } static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC-edge", + .name = "local-APIC", .mask = mask_lapic_irq, .unmask = unmask_lapic_irq, .eoi = ack_apic, @@ -2046,14 +2045,14 @@ static struct irq_chip lapic_chip __read_mostly = { static void __init setup_nmi(void) { /* - * Dirty trick to enable the NMI watchdog ... + * Dirty trick to enable the NMI watchdog ... * We put the 8259A master into AEOI mode and * unmask on all local APICs LVT0 as NMI. * * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') * is from Maciej W. Rozycki - so we do not have to EOI from * the NMI handler or the timer interrupt. - */ + */ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); enable_NMI_through_LVT0(); @@ -2129,11 +2128,16 @@ static inline void __init unlock_ExtINT_logic(void) static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; + int no_pin1 = 0; int vector; + unsigned int ver; unsigned long flags; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2142,17 +2146,17 @@ static inline void __init check_timer(void) set_intr_gate(vector, interrupt[0]); /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; - if (timer_over_8254 > 0) - enable_8259A_irq(0); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2162,14 +2166,33 @@ static inline void __init check_timer(void) printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + if (pin1 != -1) { /* * Ok, does IRQ0 through the IOAPIC work? */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, vector); + } unmask_IO_APIC_irq(0); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); } @@ -2178,43 +2201,46 @@ static inline void __init check_timer(void) goto out; } clear_IO_APIC_pin(apic1, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " - "IO-APIC\n"); - } + if (!no_pin1) + printk(KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); - if (pin2 != -1) { + printk(KERN_INFO "...trying to set up timer (IRQ0) " + "through the 8259A ... "); printk("\n..... (found pin %d) ...", pin2); /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); if (timer_irq_works()) { printk("works.\n"); - if (pin1 != -1) - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - else - add_pin_to_irq(0, apic2, pin2); + timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); setup_nmi(); + enable_8259A_irq(0); } goto out; } /* * Cleanup, just in case ... */ + disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); + printk(" failed.\n"); } - printk(" failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; + nmi_watchdog = NMI_NONE; } + timer_ack = 0; printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - disable_8259A_irq(0); set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, "fasteoi"); apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ @@ -2224,12 +2250,12 @@ static inline void __init check_timer(void) printk(" works.\n"); goto out; } + disable_8259A_irq(0); apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); printk(" failed.\n"); printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); - timer_ack = 0; init_8259A(0); make_8259A_irq(0); apic_write_around(APIC_LVT0, APIC_DM_EXTINT); @@ -2286,28 +2312,14 @@ void __init setup_IO_APIC(void) print_IO_APIC(); } -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path */ - + static int __init io_apic_bug_finalize(void) { - if(sis_apic_bug == -1) + if (sis_apic_bug == -1) sis_apic_bug = 0; return 0; } @@ -2318,17 +2330,17 @@ struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; }; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; +static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; struct sysfs_ioapic_data *data; int i; - + data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) entry[i] = ioapic_read_entry(dev->id, i); return 0; @@ -2341,18 +2353,18 @@ static int ioapic_resume(struct sys_device *dev) unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - + data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); return 0; @@ -2366,24 +2378,23 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { - struct sys_device * dev; + struct sys_device *dev; int i, size, error = 0; error = sysdev_class_register(&ioapic_sysdev_class); if (error) return error; - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] + for (i = 0; i < nr_ioapics; i++) { + size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; - dev->id = i; + dev->id = i; dev->cls = &ioapic_sysdev_class; error = sysdev_register(dev); if (error) { @@ -2458,7 +2469,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_lo = MSI_ADDR_BASE_LO | ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: +MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | ((INT_DELIVERY_MODE != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: @@ -2469,7 +2480,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: +MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(vector); } @@ -2640,12 +2651,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #endif /* CONFIG_HT_IRQ */ /* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration + ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI -int __init io_apic_get_unique_id (int ioapic, int apic_id) +int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -2654,10 +2665,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) int i = 0; /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only * supports up to 16 on one shared APIC bus. - * + * * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full * advantage of new APIC bus architecture. */ @@ -2676,7 +2687,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) } /* - * Every APIC in a system must have a unique ID or we get lots of nice + * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(apic_id_map, apic_id)) { @@ -2693,7 +2704,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) "trying %d\n", ioapic, apic_id, i); apic_id = i; - } + } tmp = apicid_to_cpu_present(apic_id); physids_or(apic_id_map, apic_id_map, tmp); @@ -2720,7 +2731,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) } -int __init io_apic_get_version (int ioapic) +int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -2733,7 +2744,7 @@ int __init io_apic_get_version (int ioapic) } -int __init io_apic_get_redir_entries (int ioapic) +int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -2746,7 +2757,7 @@ int __init io_apic_get_redir_entries (int ioapic) } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; @@ -2762,7 +2773,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * corresponding device driver registers for this IRQ. */ - memset(&entry,0,sizeof(entry)); + memset(&entry, 0, sizeof(entry)); entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; @@ -2781,7 +2792,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); @@ -2802,8 +2813,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index f1e1ae3e5c7..2eba4f4c14b 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -61,7 +61,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { +static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -94,7 +94,7 @@ static int no_timer_check; static int disable_timer_pin_1 __initdata; -int timer_over_8254 __initdata = 1; +int timer_through_8259 __initdata; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -108,15 +108,17 @@ DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -144,7 +146,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -187,7 +189,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) break; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ - if ((reg >> 14) & 1) { + if (reg & IO_APIC_REDIR_REMOTE_IRR) { spin_unlock_irqrestore(&ioapic_lock, flags); return true; } @@ -302,7 +304,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) break; io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~0x000000ff; + reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; io_apic_modify(apic, reg); if (!entry->next) @@ -364,16 +366,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_pin_list *entry = irq_2_pin + irq; + + while (1) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + } + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +} + #define DO_ACTION(name,R,ACTION, FINAL) \ \ static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) static void mask_IO_APIC_irq (unsigned int irq) { @@ -434,20 +457,6 @@ static int __init disable_timer_pin_setup(char *arg) } __setup("disable_timer_pin_1", disable_timer_pin_setup); -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - /* * Find the IRQ entry number of a certain pin. @@ -457,10 +466,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -474,13 +483,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -490,17 +499,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -520,28 +529,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -569,13 +578,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -611,13 +620,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -664,16 +673,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -850,7 +859,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); /* @@ -891,10 +900,10 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } if (!first_notcon) { @@ -915,26 +924,21 @@ static void __init setup_IO_APIC_irqs(void) } /* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. + * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - /* * We use logical delivery to get the timer IRQ * to the first CPU. */ entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ + entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; @@ -943,7 +947,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... + * scene we may have a 8259A-master in AEOI mode ... */ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); @@ -951,8 +955,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); } void __apicdebuginit print_IO_APIC(void) @@ -969,7 +971,7 @@ void __apicdebuginit print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -987,7 +989,7 @@ void __apicdebuginit print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1081,6 +1083,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); @@ -1663,6 +1666,7 @@ static inline void __init check_timer(void) struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; unsigned long flags; + int no_pin1 = 0; local_irq_save(flags); @@ -1673,16 +1677,11 @@ static inline void __init check_timer(void) assign_irq_vector(0, TARGET_CPUS); /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -1692,15 +1691,39 @@ static inline void __init check_timer(void) apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", cfg->vector, apic1, pin1, apic2, pin2); + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + + replace_pin_at_irq(0, 0, 0, apic1, pin1); + apic1 = 0; + pin1 = 0; + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + if (pin1 != -1) { /* * Ok, does IRQ0 through the IOAPIC work? */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } unmask_IO_APIC_irq(0); if (!no_timer_check && timer_irq_works()) { nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); } @@ -1709,42 +1732,48 @@ static inline void __init check_timer(void) goto out; } clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + if (!no_pin1) + apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); - if (pin2 != -1) { + apic_printk(APIC_VERBOSE,KERN_INFO + "...trying to set up timer (IRQ0) " + "through the 8259A ... "); apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", apic2, pin2); /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); + timer_through_8259 = 1; nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); setup_nmi(); + enable_8259A_irq(0); } goto out; } /* * Cleanup, just in case ... */ + disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); + apic_printk(APIC_VERBOSE," failed.\n"); } - apic_printk(APIC_VERBOSE," failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; + nmi_watchdog = NMI_NONE; } apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - disable_8259A_irq(0); irq_desc[0].chip = &lapic_irq_type; apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -1753,6 +1782,7 @@ static inline void __init check_timer(void) apic_printk(APIC_VERBOSE," works.\n"); goto out; } + disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); @@ -1845,8 +1875,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2246,8 +2276,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; @@ -2340,7 +2370,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; } else { ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index c0df7b89ca2..9d98cda39ad 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -8,7 +8,6 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/cache.h> -#include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/module.h> diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4e3e8ec6027..47a6f6f1247 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -335,16 +335,20 @@ skip: per_cpu(irq_stat,j).irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); +#endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); @@ -353,6 +357,40 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = nmi_count(cpu); + +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += per_cpu(irq_stat, cpu).irq_resched_count; + sum += per_cpu(irq_stat, cpu).irq_call_count; + sum += per_cpu(irq_stat, cpu).irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += per_cpu(irq_stat, cpu).irq_thermal_count; +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU #include <mach_apic.h> diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac15466a9..1f78b238d8d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -135,6 +135,7 @@ skip: seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); @@ -143,6 +144,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); +#endif seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); @@ -153,6 +155,32 @@ skip: } /* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = cpu_pda(cpu)->__nmi_count; + + sum += cpu_pda(cpu)->apic_timer_irqs; +#ifdef CONFIG_SMP + sum += cpu_pda(cpu)->irq_resched_count; + sum += cpu_pda(cpu)->irq_call_count; + sum += cpu_pda(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += cpu_pda(cpu)->irq_thermal_count; + sum += cpu_pda(cpu)->irq_threshold_count; +#endif + sum += cpu_pda(cpu)->irq_spurious_count; + return sum; +} + +u64 arch_irq_stat(void) +{ + return atomic_read(&irq_err_count); +} + +/* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0224c3637c7..21f2bae98c1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -20,9 +20,9 @@ #include <asm/mmu_context.h> #ifdef CONFIG_SMP -static void flush_ldt(void *null) +static void flush_ldt(void *current_mm) { - if (current->active_mm) + if (current->active_mm == current_mm) load_LDT(¤t->active_mm->context); } #endif @@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); + smp_call_function(flush_ldt, current->mm, 1, 1); preempt_enable(); #else load_LDT(pc); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c9fc3..f4960171bc6 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit) curidt.address = (unsigned long)newidt; load_idt(&curidt); -}; +} static void set_gdt(void *newgdt, __u16 limit) @@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit) curgdt.address = (unsigned long)newgdt; load_gdt(&curgdt); -}; +} static void load_segments(void) { diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 69729e38b78..9758fea87c5 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -5,13 +5,14 @@ * 2006 Shaohua Li <shaohua.li@intel.com> * * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, + * belonging to IA-32 family - PentiumPro, Pentium II, * Pentium III, Xeon, Pentium 4, etc. * - * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, - * Order Number 245472 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/245472.htm + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm * * For more information, go to http://www.urbanmyth.org/microcode * @@ -58,12 +59,12 @@ * nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, + * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, * Jun Nakajima <jun.nakajima@intel.com> * Support for the microcode updates in the new format. * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode + * because we no longer hold a copy of applied microcode * in kernel memory. * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> * Fix sigmatch() macro to handle old CPUs with pf == 0. @@ -320,11 +321,11 @@ static void apply_microcode(int cpu) return; /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); + spin_lock_irqsave(µcode_update_lock, flags); /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, + (unsigned long) uci->mc->bits, (unsigned long) uci->mc->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); @@ -341,7 +342,7 @@ static void apply_microcode(int cpu) return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", + "0x%x to 0x%x, date = %08x \n", cpu_num, uci->rev, val[1], uci->mc->hdr.date); uci->rev = val[1]; } @@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu) c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { - pr_debug("microcode: ucode data file %s load failed\n", name); + pr_debug("microcode: data file %s load failed\n", name); return error; } buf = firmware->data; @@ -805,6 +806,9 @@ static int __init microcode_init (void) { int error; + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); + error = microcode_dev_init(); if (error) return error; @@ -825,9 +829,6 @@ static int __init microcode_init (void) } register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); return 0; } diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbfe85c..fdfdc550b36 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include <asm/io.h> #include <asm/msr.h> #include <asm/acpi.h> +#include <asm/mmconfig.h> #include "../pci/pci.h" diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 404683b94e7..8b6b1e05c30 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -25,6 +25,8 @@ #include <asm/proto.h> #include <asm/acpi.h> #include <asm/bios_ebda.h> +#include <asm/e820.h> +#include <asm/trampoline.h> #include <mach_apic.h> #ifdef CONFIG_X86_32 @@ -32,28 +34,6 @@ #include <mach_mpparse.h> #endif -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; - -static int mp_current_pci_id; - -int pic_mode; - -/* - * Intel MP BIOS table parsing routines: - */ - /* * Checksum an MP configuration block. */ @@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len) } #ifdef CONFIG_X86_NUMAQ +int found_numaq; /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ +struct mpc_config_translation { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; + +static inline int generate_logical_apicid(int quad, int phys_apicid) +{ + return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); +} + + +static inline int mpc_apic_id(struct mpc_config_processor *m, + struct mpc_config_translation *translation_record) +{ + int quad = translation_record->trans_quad; + int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); + + printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); + return logical_apicid; +} + +int mp_bus_id_to_node[MAX_MP_BUSSES]; + +int mp_bus_id_to_local[MAX_MP_BUSSES]; + +static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = local; + printk(KERN_INFO "Bus #%d is %s (node %d)\n", + m->mpc_busid, name, quad); +} + +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +static void mpc_oem_pci_bus(struct mpc_config_bus *m, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; +} + #endif static void __cpuinit MP_processor_info(struct mpc_config_processor *m) @@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) return; } #ifdef CONFIG_X86_NUMAQ - apicid = mpc_apic_id(m, translation_table[mpc_record]); + if (found_numaq) + apicid = mpc_apic_id(m, translation_table[mpc_record]); + else + apicid = m->mpc_apicid; #else apicid = m->mpc_apicid; #endif @@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) generic_processor_info(apicid, m->mpc_apicver); } +#ifdef CONFIG_X86_IO_APIC static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; - memcpy(str, m->mpc_bustype, 6); str[6] = 0; #ifdef CONFIG_X86_NUMAQ - mpc_oem_bus_info(m, str, translation_table[mpc_record]); + if (found_numaq) + mpc_oem_bus_info(m, str, translation_table[mpc_record]); #else - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); #endif #if MAX_MP_BUSSES < 256 @@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { #ifdef CONFIG_X86_NUMAQ - mpc_oem_pci_bus(m, translation_table[mpc_record]); + if (found_numaq) + mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { @@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) } else printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } +#endif #ifdef CONFIG_X86_IO_APIC @@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) if (bad_ioapic(m->mpc_apicaddr)) return; - mp_ioapics[nr_ioapics] = *m; + mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; + mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; + mp_ioapics[nr_ioapics].mp_type = m->mpc_type; + mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; + mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; nr_ioapics++; } -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs[mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); +} + +static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +{ + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, + (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, + mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); +} + +static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, + struct mp_config_intsrc *mp_irq) +{ + mp_irq->mp_dstapic = m->mpc_dstapic; + mp_irq->mp_type = m->mpc_type; + mp_irq->mp_irqtype = m->mpc_irqtype; + mp_irq->mp_irqflag = m->mpc_irqflag; + mp_irq->mp_srcbus = m->mpc_srcbus; + mp_irq->mp_srcbusirq = m->mpc_srcbusirq; + mp_irq->mp_dstirq = m->mpc_dstirq; +} + +static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + m->mpc_dstapic = mp_irq->mp_dstapic; + m->mpc_type = mp_irq->mp_type; + m->mpc_irqtype = mp_irq->mp_irqtype; + m->mpc_irqflag = mp_irq->mp_irqflag; + m->mpc_srcbus = mp_irq->mp_srcbus; + m->mpc_srcbusirq = mp_irq->mp_srcbusirq; + m->mpc_dstirq = mp_irq->mp_dstirq; +} + +static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + if (mp_irq->mp_dstapic != m->mpc_dstapic) + return 1; + if (mp_irq->mp_type != m->mpc_type) + return 2; + if (mp_irq->mp_irqtype != m->mpc_irqtype) + return 3; + if (mp_irq->mp_irqflag != m->mpc_irqflag) + return 4; + if (mp_irq->mp_srcbus != m->mpc_srcbus) + return 5; + if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) + return 6; + if (mp_irq->mp_dstirq != m->mpc_dstirq) + return 7; + + return 0; +} + +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +{ + int i; + + print_MP_intsrc_info(m); + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } @@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, +void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); + printk("Warning! Not a NUMA-Q system!\n"); + else + found_numaq = 1; + if (mpc->mpc_oemptr) smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, mpc->mpc_oemsize); @@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, * Read/parse the MPC */ -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, + char *str) { - char str[16]; - char oem[10]; - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", @@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } memcpy(oem, mpc->mpc_oem, 8); oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); + printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); memcpy(str, mpc->mpc_productid, 12); str[12] = 0; - printk("Product ID: %s ", str); -#ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); -#endif - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); + printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); + return 1; +} + +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +{ + char str[16]; + char oem[10]; + + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +#ifdef CONFIG_X86_32 + /* + * need to make sure summit and es7000's mps_oem_check is safe to be + * called early via genericarch 's mps_oem_check + */ + if (early) { +#ifdef CONFIG_X86_NUMAQ + numaq_mps_oem_check(mpc, oem, str); +#endif + } else + mps_oem_check(mpc, oem, str); +#endif + /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { struct mpc_config_bus *m = (struct mpc_config_bus *)mpt; +#ifdef CONFIG_X86_IO_APIC MP_bus_info(m); +#endif mpt += sizeof(*m); count += sizeof(*m); break; @@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ++mpc_record; #endif } + +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif + setup_apic_routing(); if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); @@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; intsrc.mpc_irqtype = mp_INT; @@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) MP_intsrc_info(&intsrc); } -#endif -static inline void __init construct_default_ISA_mptable(int mpc_default_type) +static void construct_ioapic_table(int mpc_default_type) { - struct mpc_config_processor processor; - struct mpc_config_bus bus; -#ifdef CONFIG_X86_IO_APIC struct mpc_config_ioapic ioapic; -#endif - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } + struct mpc_config_bus bus; bus.mpc_type = MP_BUS; bus.mpc_busid = 0; @@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) MP_bus_info(&bus); } -#ifdef CONFIG_X86_IO_APIC ioapic.mpc_type = MP_IOAPIC; ioapic.mpc_apicid = 2; ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; @@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) * We set up most of the low 16 IO-APIC pins according to MPS rules. */ construct_default_ioirq_mptable(mpc_default_type); +} +#else +static inline void construct_ioapic_table(int mpc_default_type) { } #endif + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + construct_ioapic_table(mpc_default_type); + lintsrc.mpc_type = MP_LINTSRC; lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; @@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early) printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) if (mpf->mpf_feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; @@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early) * override the defaults. */ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; +#endif printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. " @@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); + printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { @@ -699,15 +849,21 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, !mpf_checksum((unsigned char *)bp, 16) && ((mpf->mpf_specification == 1) || (mpf->mpf_specification == 4))) { - +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif mpf_found = mpf; -#ifdef CONFIG_X86_32 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", mpf, virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, + + if (!reserve) + return 1; + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); if (mpf->mpf_physptr) { + unsigned long size = PAGE_SIZE; +#ifdef CONFIG_X86_32 /* * We cannot access to MPC table to compute * table size yet, as only few megabytes from @@ -717,24 +873,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * PAGE_SIZE from mpg->mpf_physptr yields BUG() * in reserve_bootmem. */ - unsigned long size = PAGE_SIZE; unsigned long end = max_low_pfn * PAGE_SIZE; if (mpf->mpf_physptr + size > end) size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size, +#endif + reserve_bootmem_generic(mpf->mpf_physptr, size, BOOTMEM_DEFAULT); } -#else - if (!reserve) - return 1; - - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, - PAGE_SIZE); -#endif - return 1; + return 1; } bp += 4; length -= 16; @@ -790,298 +937,294 @@ void __init find_smp_config(void) __find_smp_config(1); } -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ +#ifdef CONFIG_X86_IO_APIC +static u8 __initdata irq_used[MAX_IRQ_SOURCES]; -/* - * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: - */ -int es7000_plat; +static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) +{ + int i; -#ifdef CONFIG_ACPI + if (m->mpc_irqtype != mp_INT) + return 0; -#ifdef CONFIG_X86_IO_APIC + if (m->mpc_irqflag != 0x0f) + return 0; -#define MP_ISA_BUS 0 + /* not legacy */ -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; + for (i = 0; i < mp_irq_entries; i++) { + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; -static int mp_find_ioapic(int gsi) -{ - int i = 0; + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; + if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) + continue; + if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) + continue; + if (irq_used[i]) { + /* already claimed */ + return -2; + } + irq_used[i] = 1; + return i; } - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + /* not found */ return -1; } -static u8 __init uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); +#define SPARE_SLOT_NUM 20 + +static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; #endif -} -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +static int __init replace_intsrc_all(struct mp_config_table *mpc, + unsigned long mpc_new_phys, + unsigned long mpc_new_length) { - int idx = 0; - - if (bad_ioapic(address)) - return; +#ifdef CONFIG_X86_IO_APIC + int i; + int nr_m_spare = 0; +#endif - idx = nr_ioapics; + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; + printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + } + case MP_INTSRC: + { +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].mpc_apicver = 0; + printk(KERN_INFO "OLD "); + print_MP_intsrc_info(m); + i = get_MP_intsrc_index(m); + if (i > 0) { + assign_to_mpc_intsrc(&mp_irqs[i], m); + printk(KERN_INFO "NEW "); + print_mp_irq_info(&mp_irqs[i]); + } else if (!i) { + /* legacy, do nothing */ + } else if (nr_m_spare < SPARE_SLOT_NUM) { + /* + * not found (-1), or duplicated (-2) + * are invalid entries, + * we need to use the slot later + */ + m_spare[nr_m_spare] = m; + nr_m_spare++; + } #endif - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + default: + /* wrong mptable */ + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, + 1, mpc, mpc->mpc_length, 1); + goto out; + } + } -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; +#ifdef CONFIG_X86_IO_APIC + for (i = 0; i < mp_irq_entries; i++) { + if (irq_used[i]) + continue; - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ + if (nr_m_spare > 0) { + printk(KERN_INFO "*NEW* found "); + nr_m_spare--; + assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); + m_spare[nr_m_spare] = NULL; + } else { + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; + count += sizeof(struct mpc_config_intsrc); + if (!mpc_new_phys) { + printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); + } else { + if (count <= mpc_new_length) + printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); + else { + printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); + goto out; + } + } + assign_to_mpc_intsrc(&mp_irqs[i], m); + mpc->mpc_length = count; + mpt += sizeof(struct mpc_config_intsrc); + } + print_mp_irq_info(&mp_irqs[i]); + } +#endif +out: + /* update checksum */ + mpc->mpc_checksum = 0; + mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, + mpc->mpc_length); - MP_intsrc_info(&intsrc); + return 0; } -void __init mp_config_acpi_legacy_irqs(void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; +static int __initdata enable_update_mptable; -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); +static int __init update_mptable_setup(char *str) +{ + enable_update_mptable = 1; + return 0; +} +early_param("update_mptable", update_mptable_setup); - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; +static unsigned long __initdata mpc_new_phys; +static unsigned long mpc_new_length __initdata = 4096; - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; +/* alloc_mptable or alloc_mptable=4k */ +static int __initdata alloc_mptable; +static int __init parse_alloc_mptable_opt(char *p) +{ + enable_update_mptable = 1; + alloc_mptable = 1; + if (!p) + return 0; + mpc_new_length = memparse(p, &p); + return 0; +} +early_param("alloc_mptable", parse_alloc_mptable_opt); - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; +void __init early_reserve_e820_mpc_new(void) +{ + if (enable_update_mptable && alloc_mptable) { + u64 startt = 0; +#ifdef CONFIG_X86_TRAMPOLINE + startt = TRAMPOLINE_BASE; #endif - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - MP_intsrc_info(&intsrc); + mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +static int __init update_mp_table(void) { - int ioapic; - int ioapic_pin; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 + char str[16]; + char oem[10]; + struct intel_mp_floating *mpf; + struct mp_config_table *mpc; + struct mp_config_table *mpc_new; + + if (!enable_update_mptable) + return 0; + + mpf = mpf_found; + if (!mpf) + return 0; - static int pci_irq = IRQ_COMPRESSION_START; /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. + * Now see if we need to go further. */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; -#endif + if (mpf->mpf_feature1 != 0) + return 0; - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; + if (!mpf->mpf_physptr) + return 0; - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } + mpc = phys_to_virt(mpf->mpf_physptr); - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + if (!smp_check_mpc(mpc, oem, str)) + return 0; -#ifdef CONFIG_X86_32 - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); -#endif + printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; + if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { + mpc_new_phys = 0; + printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); } - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif + + if (!mpc_new_phys) { + unsigned char old, new; + /* check if we can change the postion */ + mpc->mpc_checksum = 0; + old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + mpc->mpc_checksum = 0xff; + new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + if (old == new) { + printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + return 0; + } + printk(KERN_INFO "use in-positon replacing\n"); + } else { + mpf->mpf_physptr = mpc_new_phys; + mpc_new = phys_to_virt(mpc_new_phys); + memcpy(mpc_new, mpc, mpc->mpc_length); + mpc = mpc_new; + /* check if we can modify that */ + if (mpc_new_phys - mpf->mpf_physptr) { + struct intel_mp_floating *mpf_new; + /* steal 16 bytes from [0, 1k) */ + printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + mpf_new = phys_to_virt(0x400 - 16); + memcpy(mpf_new, mpf, 16); + mpf = mpf_new; + mpf->mpf_physptr = mpc_new_phys; + } + mpf->mpf_checksum = 0; + mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); } - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); -#ifdef CONFIG_X86_32 /* - * For GSI >= 64, use IRQ compression + * only replace the one with mp_INT and + * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, + * already in mp_irqs , stored by ... and mp_config_acpi_gsi, + * may need pci=routeirq for all coverage */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; + replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); + + return 0; } -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ +late_initcall(update_mp_table); diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi.c index 5a29ded994f..9ebf71323c9 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi.c @@ -6,10 +6,13 @@ * Fixes: * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. * Mikael Pettersson : Power Management for local APIC NMI watchdog. + * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. * Pavel Machek and * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include <asm/apic.h> + #include <linux/nmi.h> #include <linux/mm.h> #include <linux/delay.h> @@ -17,20 +20,26 @@ #include <linux/module.h> #include <linux/sysdev.h> #include <linux/sysctl.h> +#include <linux/percpu.h> #include <linux/kprobes.h> #include <linux/cpumask.h> +#include <linux/kernel_stat.h> #include <linux/kdebug.h> +#include <linux/smp.h> +#include <asm/i8259.h> +#include <asm/io_apic.h> #include <asm/smp.h> #include <asm/nmi.h> #include <asm/proto.h> +#include <asm/timer.h> + #include <asm/mce.h> #include <mach_traps.h> int unknown_nmi_panic; int nmi_watchdog_enabled; -int panic_on_unrecovered_nmi; static cpumask_t backtrace_mask = CPU_MASK_NONE; @@ -41,12 +50,47 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -static int panic_on_timeout; +EXPORT_SYMBOL(nmi_active); unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +EXPORT_SYMBOL(nmi_watchdog); + +static int panic_on_timeout; +static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); +static int endflag __initdata; + +static inline unsigned int get_nmi_count(int cpu) +{ +#ifdef CONFIG_X86_64 + return cpu_pda(cpu)->__nmi_count; +#else + return nmi_count(cpu); +#endif +} + +static inline int mce_in_progress(void) +{ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ +#ifdef CONFIG_X86_64 + return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); +#else + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +#endif +} /* Run after command line and cpu_init init, but before all other checks */ void nmi_watchdog_default(void) @@ -56,22 +100,23 @@ void nmi_watchdog_default(void) nmi_watchdog = NMI_NONE; } -static int endflag __initdata = 0; - #ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when +/* + * The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. */ static __init void nmi_cpu_busy(void *data) { local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ + /* + * Intentionally don't use cpu_relax here. This is + * to make sure that the performance counter really ticks, + * even if there is a simulator or similar that catches the + * pause instruction. On a real HT machine this is fine because + * all other CPUs are busy with "useless" delay loops and don't + * care if they get somewhat less cycles. + */ while (endflag == 0) mb(); } @@ -79,18 +124,18 @@ static __init void nmi_cpu_busy(void *data) int __init check_nmi_watchdog(void) { - int *prev_nmi_count; + unsigned int *prev_nmi_count; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) + if (nmi_watchdog == NMI_NONE || nmi_watchdog == NMI_DISABLED) return 0; if (!atomic_read(&nmi_active)) return 0; - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - return -1; + goto error; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -99,20 +144,20 @@ int __init check_nmi_watchdog(void) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); #endif - for (cpu = 0; cpu < NR_CPUS; cpu++) - prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = get_nmi_count(cpu); local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", + "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], - cpu_pda(cpu)->__nmi_count); + get_nmi_count(cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } @@ -121,24 +166,30 @@ int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - return -1; + goto error; } printk("OK.\n"); - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ + /* + * now that we know it works we can reduce NMI frequency to + * something more reasonable; makes a difference in some configs + */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); kfree(prev_nmi_count); return 0; +error: + if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) + disable_8259A_irq(0); + return -1; } static int __init setup_nmi_watchdog(char *str) { int nmi; - if (!strncmp(str,"panic",5)) { + if (!strncmp(str, "panic", 5)) { panic_on_timeout = 1; str = strchr(str, ','); if (!str) @@ -148,15 +199,17 @@ static int __init setup_nmi_watchdog(char *str) get_option(&str, &nmi); - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) + if (nmi >= NMI_INVALID || nmi < NMI_NONE) return 0; nmi_watchdog = nmi; return 1; } - __setup("nmi_watchdog=", setup_nmi_watchdog); +/* + * Suspend/resume support + */ #ifdef CONFIG_PM static int nmi_pm_active; /* nmi_active before suspend */ @@ -195,7 +248,8 @@ static int __init init_lapic_nmi_sysfs(void) { int error; - /* should really be a BUG_ON but b/c this is an + /* + * should really be a BUG_ON but b/c this is an * init call, it just doesn't work. -dcz */ if (nmi_watchdog != NMI_LOCAL_APIC) @@ -209,6 +263,7 @@ static int __init init_lapic_nmi_sysfs(void) error = sysdev_register(&device_lapic_nmi); return error; } + /* must come after the local APIC's device_initcall() */ late_initcall(init_lapic_nmi_sysfs); @@ -216,7 +271,7 @@ late_initcall(init_lapic_nmi_sysfs); static void __acpi_nmi_enable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI); + apic_write_around(APIC_LVT0, APIC_DM_NMI); } /* @@ -230,7 +285,7 @@ void acpi_nmi_enable(void) static void __acpi_nmi_disable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); } /* @@ -249,11 +304,12 @@ void setup_apic_nmi_watchdog(void *unused) /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) + if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: + /* enable it before to avoid race with handler */ __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; @@ -269,9 +325,9 @@ void setup_apic_nmi_watchdog(void *unused) void stop_apic_nmi_watchdog(void *unused) { /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; + if (nmi_watchdog != NMI_LOCAL_APIC && + nmi_watchdog != NMI_IO_APIC) + return; if (__get_cpu_var(wd_enabled) == 0) return; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -287,6 +343,11 @@ void stop_apic_nmi_watchdog(void *unused) * * as these watchdog NMI IRQs are generated on every CPU, we only * have to check the current processor. + * + * since NMIs don't listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up any console locks first ... + * [when there will be more tty-related locks, break them up here too!] */ static DEFINE_PER_CPU(unsigned, last_irq_sum); @@ -295,11 +356,12 @@ static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) { - if (nmi_watchdog > 0) { + if (nmi_watchdog == NMI_LOCAL_APIC || + nmi_watchdog == NMI_IO_APIC) { unsigned cpu; /* - * Tell other CPUs to reset their alert counters. We cannot + * Tell other CPUs to reset their alert counters. We cannot * do it ourselves because the alert count increase is not * atomic. */ @@ -309,6 +371,9 @@ void touch_nmi_watchdog(void) } } + /* + * Tickle the softlockup detector too: + */ touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); @@ -316,7 +381,12 @@ EXPORT_SYMBOL(touch_nmi_watchdog); notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { - int sum; + /* + * Since current_thread_info()-> is always on the stack, and we + * always switch the stack NMI-atomically, it's safe to use + * smp_processor_id(). + */ + unsigned int sum; int touched = 0; int cpu = smp_processor_id(); int rc = 0; @@ -328,7 +398,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } - sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs); + sum = get_timer_irqs(cpu); + if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -338,28 +409,29 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); spin_unlock(&lock); cpu_clear(cpu, backtrace_mask); } -#ifdef CONFIG_X86_MCE - /* Could check oops_in_progress here too, but it's safer - not too */ - if (atomic_read(&mce_entry) > 0) + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) touched = 1; -#endif - /* if the apic timer isn't firing, this cpu isn't doing much */ + + /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) - die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, - panic_on_timeout); + if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); @@ -373,7 +445,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) rc |= lapic_wd_event(nmi_hz); break; case NMI_IO_APIC: - /* don't know how to accurately check for this. + /* + * don't know how to accurately check for this. * just assume it was a watchdog timer interrupt * This matches the old behaviour. */ @@ -383,30 +456,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) return rc; } -static unsigned ignore_nmis; - -asmlinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - add_pda(__nmi_count,1); - if (!ignore_nmis) - default_do_nmi(regs); - nmi_exit(); -} - -void stop_nmi(void) -{ - acpi_nmi_disable(); - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; - acpi_nmi_enable(); -} - #ifdef CONFIG_SYSCTL static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) @@ -415,7 +464,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) char buf[64]; sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ + die_nmi(buf, regs, 1); /* Always panic here */ return 0; } @@ -434,27 +483,37 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return 0; if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + printk(KERN_WARNING + "NMI watchdog is permanently disabled\n"); return -EIO; } /* if nmi_watchdog is not set yet, then set it */ nmi_watchdog_default(); +#ifdef CONFIG_X86_32 + if (nmi_watchdog == NMI_NONE) { + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; + } +#endif + if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) enable_lapic_nmi_watchdog(); else disable_lapic_nmi_watchdog(); } else { - printk( KERN_WARNING + printk(KERN_WARNING "NMI watchdog doesn't know what hardware to touch\n"); return -EIO; } return 0; } -#endif +#endif /* CONFIG_SYSCTL */ int do_nmi_callback(struct pt_regs *regs, int cpu) { @@ -477,6 +536,3 @@ void __trigger_all_cpu_backtrace(void) mdelay(1); } } - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c deleted file mode 100644 index 84160f74eeb..00000000000 --- a/arch/x86/kernel/nmi_32.c +++ /dev/null @@ -1,467 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar <mingo@redhat.com> - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/nmi.h> -#include <linux/sysdev.h> -#include <linux/sysctl.h> -#include <linux/percpu.h> -#include <linux/kprobes.h> -#include <linux/cpumask.h> -#include <linux/kernel_stat.h> -#include <linux/kdebug.h> -#include <linux/slab.h> - -#include <asm/smp.h> -#include <asm/nmi.h> - -#include "mach_traps.h" - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -static int endflag __initdata = 0; - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - return -1; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = nmi_count(cpu); - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - /* Check cpu_callin_map here because that is set - after the timer is started. */ - if (!cpu_isset(cpu, cpu_callin_map)) - continue; -#endif - if (!per_cpu(wd_enabled, cpu)) - continue; - if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - return -1; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -} - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - - -/* Suspend/resume support */ - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] - */ - -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): - */ - for_each_present_cpu(cpu) { - if (alert_counter[cpu]) - alert_counter[cpu] = 0; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -extern void die_nmi(struct pt_regs *, const char *msg); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - - /* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && last_irq_sums[cpu] == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); - } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; - } - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index e65281b1634..f0f1de1c4a1 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -31,6 +31,8 @@ #include <asm/numaq.h> #include <asm/topology.h> #include <asm/processor.h> +#include <asm/mpspec.h> +#include <asm/e820.h> #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -58,6 +60,8 @@ static void __init smp_dump_qct(void) node_end_pfn[node] = MB_TO_PAGES( eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); + e820_register_active_regions(node, node_start_pfn[node], + node_end_pfn[node]); memory_present(node, node_start_pfn[node], node_end_pfn[node]); node_remap_size[node] = node_memmap_size_bytes(node, @@ -67,13 +71,24 @@ static void __init smp_dump_qct(void) } } -/* - * Unlike Summit, we don't really care to let the NUMA-Q - * fall back to flat mode. Don't compile for NUMA-Q - * unless you really need it! - */ +static __init void early_check_numaq(void) +{ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); +} + int __init get_memcfg_numaq(void) { + early_check_numaq(); + if (!found_numaq) + return 0; smp_dump_qct(); return 1; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5ea2a0..f1ab0f72700 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -380,6 +380,9 @@ struct pv_mmu_ops pv_mmu_ops = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = kmap_atomic, #endif @@ -403,6 +406,7 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, + .pte_flags = native_pte_val, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dc00a1331ac..cb0bdf44071 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -7,6 +7,7 @@ #include <asm/dma.h> #include <asm/gart.h> #include <asm/calgary.h> +#include <asm/amd_iommu.h> int forbid_dac __read_mostly; EXPORT_SYMBOL(forbid_dac); @@ -77,10 +78,14 @@ void __init dma32_reserve_bootmem(void) if (end_pfn <= MAX_DMA32_PFN) return; + /* + * check aperture_64.c allocate_aperture() for reason about + * using 512M as goal + */ align = 64ULL<<20; size = round_up(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - __pa(MAX_DMA_ADDRESS)); + 512ULL<<20); if (dma32_bootmem_ptr) dma32_bootmem_size = size; else @@ -88,7 +93,6 @@ void __init dma32_reserve_bootmem(void) } static void __init dma32_free_bootmem(void) { - int node; if (end_pfn <= MAX_DMA32_PFN) return; @@ -96,9 +100,7 @@ static void __init dma32_free_bootmem(void) if (!dma32_bootmem_ptr) return; - for_each_online_node(node) - free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), - dma32_bootmem_size); + free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); dma32_bootmem_ptr = NULL; dma32_bootmem_size = 0; @@ -122,6 +124,8 @@ void __init pci_iommu_alloc(void) detect_intel_iommu(); + amd_iommu_detect(); + #ifdef CONFIG_SWIOTLB pci_swiotlb_init(); #endif @@ -357,7 +361,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); /* Allocate DMA memory on node near device */ -noinline struct page * +static noinline struct page * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { int node; @@ -502,6 +506,8 @@ static int __init pci_iommu_init(void) intel_iommu_init(); + amd_iommu_init(); + #ifdef CONFIG_GART_IOMMU gart_iommu_init(); #endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aa8ec928caa..021f3c684a6 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -104,7 +104,6 @@ static unsigned long alloc_iommu(struct device *dev, int size) size, base_index, boundary_size, 0); } if (offset != -1) { - set_bit_string(iommu_gart_bitmap, offset, size); next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; @@ -534,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - pci_read_config_dword(dev, 0x94, &aper_base_32); - pci_read_config_dword(dev, 0x90, &aper_order); + pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); aper_order = (aper_order >> 1) & 7; aper_base = aper_base_32 & 0x7fff; @@ -549,14 +548,63 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static void enable_gart_translations(void) +{ + int i; + + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; + + enable_gart_translation(dev, __pa(agp_gatt_table)); + } +} + +/* + * If fix_up_north_bridges is set, the north bridges have to be fixed up on + * resume in the same way as they are handled in gart_iommu_hole_init(). + */ +static bool fix_up_north_bridges; +static u32 aperture_order; +static u32 aperture_alloc; + +void set_up_gart_resume(u32 aper_order, u32 aper_alloc) +{ + fix_up_north_bridges = true; + aperture_order = aper_order; + aperture_alloc = aper_alloc; +} + static int gart_resume(struct sys_device *dev) { + printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); + + if (fix_up_north_bridges) { + int i; + + printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); + + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; + + /* + * Don't enable translations just yet. That is the next + * step. Restore the pre-suspend aperture settings. + */ + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, + aperture_order << 1); + pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, + aperture_alloc >> 25); + } + } + + enable_gart_translations(); + return 0; } static int gart_suspend(struct sys_device *dev, pm_message_t state) { - return -EINVAL; + return 0; } static struct sysdev_class gart_sysdev_class = { @@ -614,27 +662,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) memset(gatt, 0, gatt_size); agp_gatt_table = gatt; - for (i = 0; i < num_k8_northbridges; i++) { - u32 gatt_reg; - u32 ctl; - - dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; - pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); - - ctl |= 1; - ctl &= ~((1<<4) | (1<<5)); - - pci_write_config_dword(dev, 0x90, ctl); - } + enable_gart_translations(); error = sysdev_class_register(&gart_sysdev_class); if (!error) error = sysdev_register(&device_gart); if (error) panic("Could not register gart_sysdev -- would corrupt data on next suspend"); + flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", @@ -677,11 +712,11 @@ void gart_iommu_shutdown(void) u32 ctl; dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - ctl &= ~1; + ctl &= ~GARTEN; - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } } @@ -788,10 +823,10 @@ void __init gart_iommu_init(void) wbinvd(); /* - * Try to workaround a bug (thanks to BenH) + * Try to workaround a bug (thanks to BenH): * Set unmapped entries to a scratch page instead of 0. * Any prefetches that hit unmapped entries won't get an bus abort - * then. + * then. (P2P bridge may be prefetching on DMA reads). */ scratch = get_zeroed_page(GFP_KERNEL); if (!scratch) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba370dc8685..4061d63aabe 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/pm.h> +#include <linux/clockchips.h> struct kmem_cache *task_xstate_cachep; @@ -45,6 +46,76 @@ void arch_task_cache_init(void) SLAB_PANIC, NULL); } +/* + * Idle related variables and functions + */ +unsigned long boot_option_idle_override = 0; +EXPORT_SYMBOL(boot_option_idle_override); + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); + +#ifdef CONFIG_X86_32 +/* + * This halt magic was a workaround for ancient floppy DMA + * wreckage. It should be safe to remove. + */ +static int hlt_counter; +void disable_hlt(void) +{ + hlt_counter++; +} +EXPORT_SYMBOL(disable_hlt); + +void enable_hlt(void) +{ + hlt_counter--; +} +EXPORT_SYMBOL(enable_hlt); + +static inline int hlt_use_halt(void) +{ + return (!hlt_counter && boot_cpu_data.hlt_works_ok); +} +#else +static inline int hlt_use_halt(void) +{ + return 1; +} +#endif + +/* + * We use this if we don't have any better + * idle routine.. + */ +void default_idle(void) +{ + if (hlt_use_halt()) { + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + + if (!need_resched()) + safe_halt(); /* enables interrupts racelessly */ + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + } else { + local_irq_enable(); + /* loop is done by the caller */ + cpu_relax(); + } +} +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif + static void do_nothing(void *unused) { } @@ -122,44 +193,129 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ + +#define MWAIT_INFO 0x05 +#define MWAIT_ECX_EXTENDED_INFO 0x01 +#define MWAIT_EDX_C1 0xf0 + static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) { + u32 eax, ebx, ecx, edx; + if (force_mwait) return 1; - if (c->x86_vendor == X86_VENDOR_AMD) { - switch(c->x86) { - case 0x10: - case 0x11: - return 0; - } - } + if (c->cpuid_level < MWAIT_INFO) + return 0; + + cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); + /* Check, whether EDX has extended info about MWAIT */ + if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) + return 1; + + /* + * edx enumeratios MONITOR/MWAIT extensions. Check, whether + * C1 supports MWAIT + */ + return (edx & MWAIT_EDX_C1); +} + +/* + * Check for AMD CPUs, which have potentially C1E support + */ +static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_AMD) + return 0; + + if (c->x86 < 0x0F) + return 0; + + /* Family 0x0f models < rev F do not have C1E */ + if (c->x86 == 0x0f && c->x86_model < 0x40) + return 0; + return 1; } -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +/* + * C1E aware idle routine. We check for C1E active in the interrupt + * pending message MSR. If we detect C1E, then we handle it the same + * way as C3 power states (local apic timer and TSC stop) + */ +static void c1e_idle(void) { - static int selected; + static cpumask_t c1e_mask = CPU_MASK_NONE; + static int c1e_detected; - if (selected) + if (need_resched()) return; + + if (!c1e_detected) { + u32 lo, hi; + + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (lo & K8_INTP_C1E_ACTIVE_MASK) { + c1e_detected = 1; + mark_tsc_unstable("TSC halt in C1E"); + printk(KERN_INFO "System has C1E enabled\n"); + } + } + + if (c1e_detected) { + int cpu = smp_processor_id(); + + if (!cpu_isset(cpu, c1e_mask)) { + cpu_set(cpu, c1e_mask); + /* + * Force broadcast so ACPI can not interfere. Needs + * to run with interrupts enabled as it uses + * smp_function_call. + */ + local_irq_enable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, + &cpu); + printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", + cpu); + local_irq_disable(); + } + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + + default_idle(); + + /* + * The switch back from broadcast mode needs to be + * called with interrupts disabled. + */ + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); + } else + default_idle(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ #ifdef CONFIG_X86_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { printk(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); } #endif + if (pm_idle) + return; + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { /* - * Skip, if setup has overridden idle. * One CPU supports mwait => All CPUs supports mwait */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } else if (check_c1e_idle(c)) { + printk(KERN_INFO "using C1E aware idle routine\n"); + pm_idle = c1e_idle; + } else + pm_idle = default_idle; } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e2db9ac5c61..c2a11d77b1b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,11 +58,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -static int hlt_counter; - -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); @@ -77,55 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); - -void disable_hlt(void) -{ - hlt_counter++; -} - -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} - -EXPORT_SYMBOL(enable_hlt); - -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - } else { - local_irq_enable(); - /* loop is done by the caller */ - cpu_relax(); - } -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(default_idle); -#endif - #ifdef CONFIG_HOTPLUG_CPU #include <asm/nmi.h> /* We don't actually take CPU down, just spin without interrupts. */ @@ -168,24 +114,19 @@ void cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(); while (!need_resched()) { - void (*idle)(void); check_pgt_cache(); rmb(); - idle = pm_idle; if (rcu_pending(cpu)) rcu_check_callbacks(cpu, 0); - if (!idle) - idle = default_idle; - if (cpu_is_offline(cpu)) play_dead(); local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; - idle(); + pm_idle(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c6eb5c91e5f..290183e9731 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -56,15 +56,6 @@ asmlinkage extern void ret_from_fork(void); unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); - static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) @@ -94,25 +85,6 @@ void exit_idle(void) __exit_idle(); } -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -150,12 +122,9 @@ void cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(); while (!need_resched()) { - void (*idle)(void); rmb(); - idle = pm_idle; - if (!idle) - idle = default_idle; + if (cpu_is_offline(smp_processor_id())) play_dead(); /* @@ -165,7 +134,7 @@ void cpu_idle(void) */ local_irq_disable(); enter_idle(); - idle(); + pm_idle(); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a7835f28293..77040b6070e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -943,13 +943,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return copy_regset_to_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648fe71..79bdcd11c66 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -65,6 +65,7 @@ static enum { ICH_FORCE_HPET_RESUME, VT8237_FORCE_HPET_RESUME, NVIDIA_FORCE_HPET_RESUME, + ATI_FORCE_HPET_RESUME, } force_hpet_resume_type; static void __iomem *rcba_base; @@ -158,6 +159,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, @@ -174,6 +177,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, static struct pci_dev *cached_dev; +static void hpet_print_force_info(void) +{ + printk(KERN_INFO "HPET not enabled in BIOS. " + "You might try hpet=force boot option\n"); +} + static void old_ich_force_hpet_resume(void) { u32 val; @@ -253,6 +262,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) { if (hpet_force_user) old_ich_force_enable_hpet(dev); + else + hpet_print_force_info(); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, @@ -290,8 +301,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_read_config_dword(dev, 0x68, &val); /* @@ -330,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, vt8237_force_enable_hpet); +static void ati_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x14, 0xfed00000); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void ati_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); + return; + } + + pci_write_config_dword(dev, 0x14, 0xfed00000); + pci_read_config_dword(dev, 0x14, &val); + force_hpet_address = val; + force_hpet_resume_type = ATI_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + ati_force_enable_hpet); + /* * Undocumented chipset feature taken from LinuxBIOS. */ @@ -343,8 +389,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_write_config_dword(dev, 0x44, 0xfed00001); pci_read_config_dword(dev, 0x44, &val); @@ -397,6 +448,9 @@ void force_hpet_resume(void) case NVIDIA_FORCE_HPET_RESUME: nvidia_force_hpet_resume(); return; + case ATI_FORCE_HPET_RESUME: + ati_force_hpet_resume(); + return; default: break; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f6be7d5f82f..f8a62160e15 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -27,7 +27,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static long no_idt[3]; +static const struct desc_ptr no_idt = {}; static int reboot_mode; enum reboot_type reboot_type = BOOT_KBD; int reboot_force; @@ -201,15 +201,15 @@ core_initcall(reboot_init); controller to pulse the CPU reset line, which is more thorough, but doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct desc_ptr +static const struct desc_ptr real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }; @@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] = 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, int length) { local_irq_disable(); @@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void) } case BOOT_TRIPLE: - load_idt((const struct desc_ptr *)&no_idt); + load_idt(&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index dec0b5ec25c..61a837743fe 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -49,7 +49,7 @@ struct device_fixup { void (*reboot_fixup)(struct pci_dev *); }; -static struct device_fixup fixups_table[] = { +static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, @@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = { */ void mach_reboot_fixups(void) { - struct device_fixup *cur; + const struct device_fixup *cur; struct pci_dev *dev; int i; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a19..ebb0a2bcdc0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -17,15 +17,37 @@ unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; +unsigned int max_physical_apicid; EXPORT_SYMBOL(boot_cpu_physical_apicid); -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); - /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; #endif +/* map cpu index to physical APIC ID */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#define X86_64_NUMA 1 + +/* map cpu index to node index */ +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +/* which logical CPUs are on which nodes */ +cpumask_t *node_to_cpumask_map; +EXPORT_SYMBOL(node_to_cpumask_map); + +/* setup node_to_cpumask_map */ +static void __init setup_node_to_cpumask_map(void); + +#else +static inline void setup_node_to_cpumask_map(void) { } +#endif + #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) /* * Copy data used in early init routines from the initial arrays to the @@ -37,20 +59,21 @@ static void __init setup_per_cpu_maps(void) int cpu; for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = - x86_bios_cpu_apicid_init[cpu]; -#ifdef CONFIG_NUMA + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA per_cpu(x86_cpu_to_node_map, cpu) = - x86_cpu_to_node_map_init[cpu]; + early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif } /* indicate the early static arrays will soon be gone */ - x86_cpu_to_apicid_early_ptr = NULL; - x86_bios_cpu_apicid_early_ptr = NULL; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = NULL; + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif } @@ -79,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { } */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_cpu_pda_map(void) { } + +#elif !defined(CONFIG_SMP) +static inline void setup_cpu_pda_map(void) { } + +#else /* CONFIG_SMP && CONFIG_X86_64 */ + +/* + * Allocate cpu_pda pointer table and array via alloc_bootmem. + */ +static void __init setup_cpu_pda_map(void) +{ + char *pda; + struct x8664_pda **new_cpu_pda; + unsigned long size; + int cpu; + + size = roundup(sizeof(struct x8664_pda), cache_line_size()); + + /* allocate cpu_pda array and pointer table */ + { + unsigned long tsize = nr_cpu_ids * sizeof(void *); + unsigned long asize = size * (nr_cpu_ids - 1); + + tsize = roundup(tsize, cache_line_size()); + new_cpu_pda = alloc_bootmem(tsize + asize); + pda = (char *)new_cpu_pda + tsize; + } + + /* initialize pointer table to static pda's */ + for_each_possible_cpu(cpu) { + if (cpu == 0) { + /* leave boot cpu pda in place */ + new_cpu_pda[0] = cpu_pda(0); + continue; + } + new_cpu_pda[cpu] = (struct x8664_pda *)pda; + new_cpu_pda[cpu]->in_bootmem = 1; + pda += size; + } + + /* point to new pointer table */ + _cpu_pda = new_cpu_pda; +} #endif /* @@ -88,52 +155,252 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - int i, highest_cpu = 0; - unsigned long size; + ssize_t size = PERCPU_ENOUGH_ROOM; + char *ptr; + int cpu; #ifdef CONFIG_HOTPLUG_CPU prefill_possible_map(); +#else + nr_cpu_ids = num_processors; #endif + /* Setup cpu_pda map */ + setup_cpu_pda_map(); + /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", + printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); - for_each_possible_cpu(i) { - char *ptr; + for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES ptr = alloc_bootmem_pages(size); #else - int node = early_cpu_to_node(i); + int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { ptr = alloc_bootmem_pages(size); printk(KERN_INFO - "cpu %d has no node or node-local memory\n", i); + "cpu %d has no node %d or node-local memory\n", + cpu, node); } else ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); #endif - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); -#ifdef CONFIG_X86_64 - cpu_pda(i)->data_offset = ptr - __per_cpu_start; -#else - __per_cpu_offset[i] = ptr - __per_cpu_start; -#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - highest_cpu = i; } - nr_cpu_ids = highest_cpu + 1; - printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", + NR_CPUS, nr_cpu_ids, nr_node_ids); /* Setup percpu data maps */ setup_per_cpu_maps(); + /* Setup node to cpumask map */ + setup_node_to_cpumask_map(); + /* Setup cpumask_of_cpu map */ setup_cpumask_of_cpu(); } #endif + +void __init parse_setup_data(void) +{ + struct setup_data *data; + u64 pa_data; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, PAGE_SIZE); + switch (data->type) { + case SETUP_E820_EXT: + parse_e820_ext(data, pa_data); + break; + default: + break; + } +#ifndef CONFIG_DEBUG_BOOT_PARAMS + free_early(pa_data, pa_data+sizeof(*data)+data->len); +#endif + pa_data = data->next; + early_iounmap(data, PAGE_SIZE); + } +} + +#ifdef X86_64_NUMA + +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + */ +static void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + cpumask_t *map; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + + Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); + + /* node_to_cpumask() will now work */ + node_to_cpumask_map = map; +} + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + if (cpu_pda(cpu) && node != NUMA_NO_NODE) + cpu_pda(cpu)->nodenumber = node; + + if (cpu_to_node_map) + cpu_to_node_map[cpu] = node; + + else if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_node_map, cpu) = node; + + else + Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); +} + +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ + +/* + * --------- debug versions of the numa functions --------- + */ +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + int node = cpu_to_node(cpu); + cpumask_t *mask; + char buf[64]; + + if (node_to_cpumask_map == NULL) { + printk(KERN_ERR "node_to_cpumask_map NULL\n"); + dump_stack(); + return; + } + + mask = &node_to_cpumask_map[node]; + if (enable) + cpu_set(cpu, *mask); + else + cpu_clear(cpu, *mask); + + cpulist_scnprintf(buf, sizeof(buf), *mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); + } + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!per_cpu_offset(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +cpumask_t *_node_to_cpumask_ptr(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", + node); + dump_stack(); + return &cpu_online_map; + } + BUG_ON(node >= nr_node_ids); + return &node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(_node_to_cpumask_ptr); + +/* + * Returns a bitmask of CPUs on Node 'node'. + */ +cpumask_t node_to_cpumask(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); + dump_stack(); + return cpu_online_map; + } + BUG_ON(node >= nr_node_ids); + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(node_to_cpumask); + +/* + * --------- end of debug versions of the numa functions --------- + */ + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ + +#endif /* X86_64_NUMA */ diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index aee0e820077..631ea6cc01d 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -12,6 +12,7 @@ #include <linux/bitops.h> #include <linux/module.h> #include <linux/kgdb.h> +#include <linux/topology.h> #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -34,9 +35,8 @@ struct boot_params boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; +struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; @@ -114,8 +114,10 @@ void pda_init(int cpu) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!pda->irqstackptr) panic("cannot allocate irqstack for cpu %d", cpu); - } + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } pda->irqstackptr += IRQSTACKSIZE-64; } diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5a2f8e06388..a9b19ad24ed 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -59,6 +59,7 @@ #include <asm/setup.h> #include <asm/arch_hooks.h> #include <asm/sections.h> +#include <asm/dmi.h> #include <asm/io_apic.h> #include <asm/ist.h> #include <asm/io.h> @@ -67,10 +68,13 @@ #include <asm/bios_ebda.h> #include <asm/cacheflush.h> #include <asm/processor.h> +#include <asm/efi.h> +#include <asm/bugs.h> /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ +unsigned long init_pg_tables_start __initdata = ~0UL; unsigned long init_pg_tables_end __initdata = ~0UL; /* @@ -182,6 +186,12 @@ int bootloader_type; static unsigned int highmem_pages = -1; /* + * Early DMI memory + */ +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; + +/* * Setup options */ struct screen_info screen_info; @@ -237,42 +247,6 @@ static inline void copy_edd(void) } #endif -int __initdata user_defined_memmap; - -/* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to <mem>, overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * <start> to <start>+<mem>, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] - */ -static int __init parse_mem(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "nopentium") == 0) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; - - mem_size = memparse(arg, &arg); - limit_regions(mem_size); - user_defined_memmap = 1; - } - return 0; -} -early_param("mem", parse_mem); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -395,56 +369,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); -} - #ifndef CONFIG_NEED_MULTIPLE_NODES static void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) @@ -462,11 +386,13 @@ static unsigned long __init setup_memory(void) if (max_pfn > max_low_pfn) { highstart_pfn = max_low_pfn; } + memory_present(0, 0, highend_pfn); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else + memory_present(0, 0, max_low_pfn); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif @@ -488,11 +414,12 @@ static void __init zone_sizes_init(void) max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + remove_all_active_ranges(); #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; - add_active_range(0, 0, highend_pfn); + e820_register_active_regions(0, 0, highend_pfn); #else - add_active_range(0, 0, max_low_pfn); + e820_register_active_regions(0, 0, max_low_pfn); #endif free_area_init_nodes(max_zone_pfns); @@ -526,25 +453,28 @@ static void __init reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { - if (crash_base > 0) { - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); - - if (reserve_bootmem(crash_base, crash_size, - BOOTMEM_EXCLUSIVE) < 0) { - printk(KERN_INFO "crashkernel reservation " - "failed - memory is in use\n"); - return; - } - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } else + if (crash_base <= 0) { printk(KERN_INFO "crashkernel reservation failed - " "you have to specify a base address\n"); + return; + } + + if (reserve_bootmem_generic(crash_base, crash_size, + BOOTMEM_EXCLUSIVE) < 0) { + printk(KERN_INFO "crashkernel reservation failed - " + "memory is in use\n"); + return; + } + + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); } } #else @@ -558,44 +488,57 @@ static bool do_relocate_initrd = false; static void __init reserve_initrd(void) { - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; - unsigned long ramdisk_here; - - initrd_start = 0; + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 ramdisk_here; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ - if (ramdisk_end < ramdisk_image) { - printk(KERN_ERR "initrd wraps around end of memory, " - "disabling initrd\n"); - return; - } + initrd_start = 0; + if (ramdisk_size >= end_of_lowmem/2) { + free_early(ramdisk_image, ramdisk_end); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; } + + printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image, + ramdisk_end); + + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); + /* + * don't need to reserve again, already reserved early + * in i386_start_kernel + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; return; } /* We need to move the initrd down into lowmem */ - ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; + ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT, + end_of_lowmem, ramdisk_size, + PAGE_SIZE); + + if (ramdisk_here == -1ULL) + panic("Cannot find place for new RAMDISK of size %lld\n", + ramdisk_size); /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); + reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, + "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; + printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", + ramdisk_here, ramdisk_here + ramdisk_size); do_relocate_initrd = true; } @@ -604,10 +547,10 @@ static void __init reserve_initrd(void) static void __init relocate_initrd(void) { - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; - unsigned long ramdisk_here; + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; @@ -624,6 +567,10 @@ static void __init relocate_initrd(void) p = (char *)__va(ramdisk_image); memcpy(q, p, clen); q += clen; + /* need to free these low pages...*/ + printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n", + ramdisk_image, ramdisk_image + clen - 1); + free_bootmem(ramdisk_image, clen); ramdisk_image += clen; ramdisk_size -= clen; } @@ -642,66 +589,47 @@ static void __init relocate_initrd(void) ramdisk_image += clen; ramdisk_size -= clen; } + /* high pages is not converted by early_res_to_bootmem */ + ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_size = boot_params.hdr.ramdisk_size; + printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n", + ramdisk_image, ramdisk_image + ramdisk_size - 1, + ramdisk_here, ramdisk_here + ramdisk_size - 1); + + /* need to free that, otherwise init highmem will reserve it again */ + free_early(ramdisk_image, ramdisk_image+ramdisk_size); } #endif /* CONFIG_BLK_DEV_INITRD */ void __init setup_bootmem_allocator(void) { - unsigned long bootmap_size; + int i; + unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); - - register_bootmem_low_pages(max_low_pfn); - - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), - BOOTMEM_DEFAULT); - - /* - * reserve physical page 0 - it's a special BIOS page on many boxes, - * enabling clean reboots, SMP operation, laptop functions. - */ - reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); - - /* reserve EBDA region */ - reserve_ebda_region(); - -#ifdef CONFIG_SMP - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - */ - reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); -#endif -#ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. - */ - acpi_reserve_bootmem(); -#endif -#ifdef CONFIG_X86_FIND_SMP_CONFIG - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); -#endif + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, + max_pfn_mapped<<PAGE_SHIFT, bootmap_size, + PAGE_SIZE); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n", bootmap_size); + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); #ifdef CONFIG_BLK_DEV_INITRD reserve_initrd(); #endif - numa_kva_reserve(); - reserve_crashkernel(); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); + printk(KERN_INFO " mapped low ram: 0 - %08lx\n", + max_pfn_mapped<<PAGE_SHIFT); + printk(KERN_INFO " low ram: %08lx - %08lx\n", + min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); + printk(KERN_INFO " bootmap %08lx - %08lx\n", + bootmap, bootmap + bootmap_size); + for_each_online_node(i) + free_bootmem_with_active_regions(i, max_low_pfn); + early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); - reserve_ibft_region(); } /* @@ -731,23 +659,7 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __init __attribute__((weak)) memory_setup(void) -{ - return machine_specific_memory_setup(); -} - -#ifdef CONFIG_NUMA -/* - * In the golden day, when everything among i386 and x86_64 will be - * integrated, this will not live here - */ -void *x86_cpu_to_node_map_early_ptr; -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -#endif +static void probe_roms(void); /* * Determine if we were loaded by an EFI loader. If so, then we have also been @@ -758,17 +670,21 @@ DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; */ void __init setup_arch(char **cmdline_p) { + int i; unsigned long max_low_pfn; memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); pre_setup_arch_hook(); early_cpu_init(); early_ioremap_init(); + reserve_setup_data(); #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL32", 4)) + "EL32", 4)) { efi_enabled = 1; + efi_reserve_early(); + } #endif ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); @@ -792,8 +708,7 @@ void __init setup_arch(char **cmdline_p) #endif ARCH_SETUP - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); + setup_memory_map(); copy_edd(); @@ -811,12 +726,18 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; + parse_setup_data(); + parse_early_param(); - if (user_defined_memmap) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + finish_e820_parsing(); + + probe_roms(); + + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -824,14 +745,67 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_init(); + if (ppro_with_ram_bug()) { + e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, + E820_RESERVED); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + printk(KERN_INFO "fixed physical RAM map:\n"); + e820_print_map("bad_ppro"); + } + + e820_register_active_regions(0, 0, -1UL); + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + max_pfn = e820_end_of_ram(); + + /* preallocate 4k for mptable mpc */ + early_reserve_e820_mpc_new(); /* update e820 for memory not covered by WB MTRRs */ - propagate_e820_map(); mtrr_bp_init(); - if (mtrr_trim_uncached_memory(max_pfn)) - propagate_e820_map(); + if (mtrr_trim_uncached_memory(max_pfn)) { + remove_all_active_ranges(); + e820_register_active_regions(0, 0, -1UL); + max_pfn = e820_end_of_ram(); + } + + dmi_scan_machine(); + + io_delay_init(); + +#ifdef CONFIG_ACPI + /* + * Parse the ACPI tables for possible boot-time SMP configuration. + */ + acpi_boot_table_init(); +#endif + +#ifdef CONFIG_ACPI_NUMA + /* + * Parse SRAT to discover nodes. + */ + acpi_numa_init(); +#endif max_low_pfn = setup_memory(); +#ifdef CONFIG_ACPI_SLEEP + /* + * Reserve low memory region for sleep support. + */ + acpi_reserve_bootmem(); +#endif +#ifdef CONFIG_X86_FIND_SMP_CONFIG + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); +#endif + reserve_crashkernel(); + + reserve_ibft_region(); + #ifdef CONFIG_KVM_CLOCK kvmclock_init(); #endif @@ -855,9 +829,6 @@ void __init setup_arch(char **cmdline_p) * not to exceed the 8Mb limit. */ -#ifdef CONFIG_SMP - smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ -#endif paging_init(); /* @@ -869,10 +840,6 @@ void __init setup_arch(char **cmdline_p) init_ohci1394_dma_on_all_controllers(); #endif - remapped_pgdat_init(); - sparse_init(); - zone_sizes_init(); - /* * NOTE: at this point the bootmem allocator is fully available. */ @@ -881,54 +848,41 @@ void __init setup_arch(char **cmdline_p) relocate_initrd(); #endif - paravirt_post_allocator_init(); - - dmi_scan_machine(); - - io_delay_init(); + remapped_pgdat_init(); + sparse_init(); + zone_sizes_init(); -#ifdef CONFIG_X86_SMP - /* - * setup to use the early static init tables during kernel startup - * X86_SMP will exclude sub-arches that don't deal well with it. - */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif + paravirt_post_allocator_init(); #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(); #endif -#ifdef CONFIG_ACPI - /* - * Parse the ACPI tables for possible boot-time SMP configuration. - */ - acpi_boot_table_init(); -#endif - early_quirks(); #ifdef CONFIG_ACPI acpi_boot_init(); - +#endif +#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) + if (smp_found_config) + get_smp_config(); +#endif #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) if (def_to_bigsmp) printk(KERN_WARNING "More than 8 CPUs detected and " "CONFIG_X86_PC cannot handle it.\nUse " "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); #endif -#endif -#ifdef CONFIG_X86_LOCAL_APIC - if (smp_found_config) - get_smp_config(); -#endif - e820_register_memory(); - e820_mark_nosave_regions(); + e820_reserve_resources(); + e820_mark_nosave_regions(max_low_pfn); + + request_resource(&iomem_resource, &video_ram_resource); + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + + e820_setup_gap(); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -940,25 +894,147 @@ void __init setup_arch(char **cmdline_p) #endif } -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) +static struct resource system_rom_resource = { + .name = "System ROM", + .start = 0xf0000, + .end = 0xfffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource extension_rom_resource = { + .name = "Extension ROM", + .start = 0xe0000, + .end = 0xeffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource adapter_rom_resources[] = { { + .name = "Adapter ROM", + .start = 0xc8000, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +} }; + +static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, + .end = 0xc7fff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +#define ROMSIGNATURE 0xaa55 + +static int __init romsignature(const unsigned char *rom) +{ + const unsigned short * const ptr = (const unsigned short *)rom; + unsigned short sig; + + return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; +} + +static int __init romchecksum(const unsigned char *rom, unsigned long length) +{ + unsigned char sum, c; + + for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + sum += c; + return !length && !sum; +} + +static void __init probe_roms(void) { + const unsigned char *rom; + unsigned long start, length, upper; + unsigned char c; int i; - printk(KERN_INFO "Setting up standard PCI resources\n"); - init_iomem_resources(&code_resource, &data_resource, &bss_resource); + /* video rom */ + upper = adapter_rom_resources[0].start; + for (start = video_rom_resource.start; start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; - request_resource(&iomem_resource, &video_ram_resource); + video_rom_resource.start = start; - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; + if (probe_kernel_address(rom + 2, c) != 0) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = c * 512; + + /* if checksum okay, trust length byte */ + if (length && romchecksum(rom, length)) + video_rom_resource.end = start + length - 1; + + request_resource(&iomem_resource, &video_rom_resource); + break; + } + + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; + if (start < upper) + start = upper; + + /* system rom */ + request_resource(&iomem_resource, &system_rom_resource); + upper = system_rom_resource.start; + + /* check for extension rom (ignore length byte!) */ + rom = isa_bus_to_virt(extension_rom_resource.start); + if (romsignature(rom)) { + length = extension_rom_resource.end - extension_rom_resource.start + 1; + if (romchecksum(rom, length)) { + request_resource(&iomem_resource, &extension_rom_resource); + upper = extension_rom_resource.start; + } + } + + /* check for adapter roms on 2k boundaries */ + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + if (probe_kernel_address(rom + 2, c) != 0) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = c * 512; + + /* but accept any length that fits if checksum okay */ + if (!length || start + length > upper || !romchecksum(rom, length)) + continue; + + adapter_rom_resources[i].start = start; + adapter_rom_resources[i].end = start + length - 1; + request_resource(&iomem_resource, &adapter_rom_resources[i]); + + start = adapter_rom_resources[i++].end & ~2047UL; + } } -subsys_initcall(request_standard_resources); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8..16ef53ab538 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -56,6 +56,7 @@ #include <asm/desc.h> #include <video/edid.h> #include <asm/e820.h> +#include <asm/mpspec.h> #include <asm/dma.h> #include <asm/gart.h> #include <asm/mpspec.h> @@ -71,6 +72,7 @@ #include <asm/topology.h> #include <asm/trampoline.h> #include <asm/pat.h> +#include <asm/mmconfig.h> #include <mach_apic.h> #ifdef CONFIG_PARAVIRT @@ -79,6 +81,8 @@ #define ARCH_SETUP #endif +#include "cpu/cpu.h" + /* * Machine setup.. */ @@ -95,8 +99,6 @@ int bootloader_type; unsigned long saved_video_mode; -int force_mwait __cpuinitdata; - /* * Early DMI memory */ @@ -118,7 +120,7 @@ EXPORT_SYMBOL_GPL(edid_info); extern int root_mountflags; -char __initdata command_line[COMMAND_LINE_SIZE]; +static char __initdata command_line[COMMAND_LINE_SIZE]; static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, @@ -164,6 +166,7 @@ static struct resource bss_resource = { .flags = IORESOURCE_RAM, }; +static void __init early_cpu_init(void); static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); #ifdef CONFIG_PROC_VMCORE @@ -243,7 +246,7 @@ static void __init reserve_crashkernel(void) return; } - if (reserve_bootmem(crash_base, crash_size, + if (reserve_bootmem_generic(crash_base, crash_size, BOOTMEM_EXCLUSIVE) < 0) { printk(KERN_INFO "crashkernel reservation failed - " "memory is in use\n"); @@ -265,46 +268,6 @@ static inline void __init reserve_crashkernel(void) {} #endif -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -void __attribute__((weak)) __init memory_setup(void) -{ - machine_specific_memory_setup(); -} - -static void __init parse_setup_data(void) -{ - struct setup_data *data; - unsigned long pa_data; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, PAGE_SIZE); - switch (data->type) { - default: - break; - } -#ifndef CONFIG_DEBUG_BOOT_PARAMS - free_early(pa_data, pa_data+sizeof(*data)+data->len); -#endif - pa_data = data->next; - early_iounmap(data, PAGE_SIZE); - } -} - -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - /* * setup_arch - architecture-specific boot-time initializations * @@ -329,13 +292,15 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL64", 4)) + "EL64", 4)) { efi_enabled = 1; + efi_reserve_early(); + } #endif ARCH_SETUP - memory_setup(); + setup_memory_map(); copy_edd(); if (!boot_params.hdr.root_flags) @@ -352,6 +317,7 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; + early_cpu_init(); early_identify_cpu(&boot_cpu_data); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); @@ -381,9 +347,13 @@ void __init setup_arch(char **cmdline_p) * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + + /* pre allocte 4k for mptable mpc */ + early_reserve_e820_mpc_new(); /* update e820 for memory not covered by WB MTRRs */ mtrr_bp_init(); if (mtrr_trim_uncached_memory(end_pfn)) { + remove_all_active_ranges(); e820_register_active_regions(0, 0, -1UL); end_pfn = e820_end_of_ram(); } @@ -392,7 +362,7 @@ void __init setup_arch(char **cmdline_p) check_efer(); - max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); + max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT)); if (efi_enabled) efi_init(); @@ -406,15 +376,6 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif -#ifdef CONFIG_SMP - /* setup to use the early static init tables during kernel startup */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif - #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). @@ -453,13 +414,12 @@ void __init setup_arch(char **cmdline_p) acpi_reserve_bootmem(); #endif - if (efi_enabled) - efi_reserve_bootmem(); - +#ifdef CONFIG_X86_MPPARSE /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -502,11 +462,13 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); +#ifdef CONFIG_X86_MPPARSE /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); +#endif init_apic_mappings(); ioapic_init_mappings(); @@ -516,7 +478,7 @@ void __init setup_arch(char **cmdline_p) * We trust e820 completely. No explicit ROM probing in memory. */ e820_reserve_resources(); - e820_mark_nosave_regions(); + e820_mark_nosave_regions(end_pfn); /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) @@ -537,7 +499,20 @@ void __init setup_arch(char **cmdline_p) check_enable_amd_mmconf_dmi(); } -static int __cpuinit get_model_name(struct cpuinfo_x86 *c) +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; + +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ + display_cacheinfo(c); +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; @@ -553,7 +528,7 @@ static int __cpuinit get_model_name(struct cpuinfo_x86 *c) } -static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, eax, ebx, ecx, edx; @@ -585,228 +560,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_NUMA -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - -#ifdef CONFIG_NUMA - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; - -#endif -} - -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) - return 1; - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - unsigned level; - -#ifdef CONFIG_SMP - unsigned long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 15) { - rdmsrl(MSR_K8_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); - } -#endif - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_cpu_cap(c, 0*32+31); - - /* On C+ stepping K8 rep microcode works well for copy/memset */ - level = cpuid_eax(1); - if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || - level >= 0x0f58)) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - level = get_model_name(c); - if (!level) { - switch (c->x86) { - case 15: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) - amd_detect_cmp(c); - - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - - if (c->x86 == 0x10) - fam10h_check_enable_mmcfg(); - - if (amd_apic_timer_broken()) - disable_apic_timer = 1; - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && - (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) - set_memory_4k((unsigned long)__va(tseg), 1); - } -} - void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP @@ -857,135 +610,59 @@ out: #endif } -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - - if (c->cpuid_level < 4) - return 1; - - cpuid_count(4, 0, &eax, &t, &t, &t); - - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit srat_detect_node(void) -{ -#ifdef CONFIG_NUMA - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { - /* Cache sizes */ - unsigned n; - - init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + this_cpu = cpu_devs[i]; + return; + } + } } - - if (cpu_has_ds) { - unsigned int l1, l2; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); } - - - if (cpu_has_bts) - ds_init_intel(c); - - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - /* CPUID workaround for Intel 0F34 CPU */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 0xF && c->x86_model == 0x3 && - c->x86_mask == 0x4) - c->x86_phys_bits = 36; - } - - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); - - srat_detect_node(); -} - -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x6 && c->x86_model >= 0xf) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + c->x86_vendor = X86_VENDOR_UNKNOWN; } -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +static void __init early_cpu_support_print(void) { - /* Cache sizes */ - unsigned n; + int i,j; + struct cpu_dev *cpu_devx; - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } - - if (c->x86 == 0x6 && c->x86_model >= 0xf) { - c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_REP_GOOD); + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } } - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) +static void __init early_cpu_init(void) { - char *v = c->x86_vendor_id; + struct cpu_vendor_dev *cvdev; - if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; + for (cvdev = __x86cpuvendor_start ; + cvdev < __x86cpuvendor_end ; + cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + early_cpu_support_print(); } /* Do some early cpuid on the boot CPU to get some parameter that are @@ -1066,17 +743,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - early_init_amd(c); - break; - case X86_VENDOR_INTEL: - early_init_intel(c); - break; - case X86_VENDOR_CENTAUR: - early_init_centaur(c); - break; - } + if (c->x86_vendor != X86_VENDOR_UNKNOWN && + cpu_devs[c->x86_vendor]->c_early_init) + cpu_devs[c->x86_vendor]->c_early_init(c); validate_pat_support(c); } @@ -1104,24 +773,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * At the end of this section, c->x86_capability better * indicate the features this CPU genuinely supports! */ - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - case X86_VENDOR_UNKNOWN: - default: - display_cacheinfo(c); - break; - } + if (this_cpu->c_init) + this_cpu->c_init(c); detect_ht(c); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 664a5db36d4..d3ad4e09455 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -59,7 +59,6 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> -#include <asm/nmi.h> #include <asm/vmi.h> #include <asm/genapic.h> #include <linux/mc146818rtc.h> @@ -68,22 +67,6 @@ #include <mach_wakecpu.h> #include <smpboot_hooks.h> -/* - * FIXME: For x86_64, those are defined in other files. But moving them here, - * would make the setup areas dependent on smp, which is a loss. When we - * integrate apic between arches, we can probably do a better job, but - * right now, they'll stay here -- glommer - */ - -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = - { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_cpu_to_apicid_early_ptr; - -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_bios_cpu_apicid_early_ptr; - #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; static int low_mappings; @@ -555,23 +538,6 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } -#ifdef CONFIG_X86_32 -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} -#endif - static void impress_friends(void) { int cpu; @@ -832,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +#ifdef CONFIG_X86_64 +/* + * Allocate node local memory for the AP pda. + * + * Must be called after the _cpu_pda pointer table is initialized. + */ +static int __cpuinit get_local_pda(int cpu) +{ + struct x8664_pda *oldpda, *newpda; + unsigned long size = sizeof(struct x8664_pda); + int node = cpu_to_node(cpu); + + if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) + return 0; + + oldpda = cpu_pda(cpu); + newpda = kmalloc_node(size, GFP_ATOMIC, node); + if (!newpda) { + printk(KERN_ERR "Could not allocate node local PDA " + "for CPU %d on node %d\n", cpu, node); + + if (oldpda) + return 0; /* have a usable pda */ + else + return -1; + } + + if (oldpda) { + memcpy(newpda, oldpda, size); + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, size); + } + + newpda->in_bootmem = 0; + cpu_pda(cpu) = newpda; + return 0; +} +#endif /* CONFIG_X86_64 */ + static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -857,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) } /* Allocate node local memory for AP pdas */ - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *newpda, *pda; - int node = cpu_to_node(cpu); - pda = cpu_pda(cpu); - newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, - node); - if (newpda) { - memcpy(newpda, pda, sizeof(struct x8664_pda)); - cpu_pda(cpu) = newpda; - } else - printk(KERN_ERR - "Could not allocate node local PDA for CPU %d on node %d\n", - cpu, node); + if (cpu > 0) { + boot_error = get_local_pda(cpu); + if (boot_error) + goto restore_state; + /* if can't get pda memory, can't start cpu */ } #endif @@ -988,11 +985,13 @@ do_rest: } } +restore_state: + if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); #ifdef CONFIG_X86_64 - clear_node_cpumask(cpu); /* was set by numa_add_cpu */ + numa_remove_cpu(cpu); /* was set by numa_add_cpu */ #endif cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ @@ -1156,9 +1155,11 @@ static int __init smp_sanity_check(unsigned max_cpus) * If SMP should be disabled, then really disable it! */ if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated," - "forcing use of dummy APIC emulation.\n"); + printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); + + localise_nmi_watchdog(); + #ifdef CONFIG_X86_32 connect_bsp_APIC(); #endif @@ -1362,6 +1363,8 @@ __init void prefill_possible_map(void) for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); + + nr_cpu_ids = possible; } static void __ref remove_cpu_from_maps(int cpu) @@ -1372,7 +1375,7 @@ static void __ref remove_cpu_from_maps(int cpu) cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ clear_bit(cpu, (unsigned long *)&cpu_initialized); - clear_node_cpumask(cpu); + numa_remove_cpu(cpu); #endif } diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index 70e4a374b4e..5978023b799 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c @@ -31,6 +31,7 @@ #include <asm/srat.h> #include <asm/topology.h> #include <asm/smp.h> +#include <asm/e820.h> /* * proximity macros and definitions @@ -41,7 +42,7 @@ #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) -static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ +static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ #define MAX_CHUNKS_PER_NODE 3 #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) @@ -52,16 +53,37 @@ struct node_memory_chunk_s { u8 nid; // which cnode contains this chunk? u8 bank; // which mem bank on this node }; -static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; +static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; -static int num_memory_chunks; /* total number of memory chunks */ +static int __initdata num_memory_chunks; /* total number of memory chunks */ static u8 __initdata apicid_to_pxm[MAX_APICID]; +int numa_off __initdata; +int acpi_numa __initdata; + +static __init void bad_srat(void) +{ + printk(KERN_ERR "SRAT: SRAT not used.\n"); + acpi_numa = -1; + num_memory_chunks = 0; +} + +static __init inline int srat_disabled(void) +{ + return numa_off || acpi_numa < 0; +} + /* Identify CPU proximity domains */ -static void __init parse_cpu_affinity_structure(char *p) +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) { - struct acpi_srat_cpu_affinity *cpu_affinity = - (struct acpi_srat_cpu_affinity *) p; + if (srat_disabled()) + return; + if (cpu_affinity->header.length != + sizeof(struct acpi_srat_cpu_affinity)) { + bad_srat(); + return; + } if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) return; /* empty entry */ @@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p) * Identify memory proximity domains and hot-remove capabilities. * Fill node memory chunk list structure. */ -static void __init parse_memory_affinity_structure (char *sratp) +void __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) { unsigned long long paddr, size; unsigned long start_pfn, end_pfn; u8 pxm; struct node_memory_chunk_s *p, *q, *pend; - struct acpi_srat_mem_affinity *memory_affinity = - (struct acpi_srat_mem_affinity *) sratp; + + if (srat_disabled()) + return; + if (memory_affinity->header.length != + sizeof(struct acpi_srat_mem_affinity)) { + bad_srat(); + return; + } if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) return; /* empty entry */ @@ -134,6 +163,14 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } +/* Callback for SLIT parsing */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ +} + +void acpi_numa_arch_fixup(void) +{ +} /* * The SRAT table always lists ascending addresses, so can always * assume that the first "start" address that you see is the real @@ -166,39 +203,13 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -/* Parse the ACPI Static Resource Affinity Table */ -static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) +int __init get_memcfg_from_srat(void) { - u8 *start, *end, *p; int i, j, nid; - start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ - p = start; - end = (u8 *)sratp + sratp->header.length; - - memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ - memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); - num_memory_chunks = 0; - while (p < end) { - switch (*p) { - case ACPI_SRAT_TYPE_CPU_AFFINITY: - parse_cpu_affinity_structure(p); - break; - case ACPI_SRAT_TYPE_MEMORY_AFFINITY: - parse_memory_affinity_structure(p); - break; - default: - printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); - break; - } - p += p[1]; - if (p[1] == 0) { - printk("acpi20_parse_srat: Entry length value is zero;" - " can't parse any further!\n"); - break; - } - } + if (srat_disabled()) + goto out_fail; if (num_memory_chunks == 0) { printk("could not finy any ACPI SRAT memory areas.\n"); @@ -244,115 +255,19 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); - add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); + e820_register_active_regions(chunk->nid, chunk->start_pfn, + min(chunk->end_pfn, max_pfn)); } - + for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; - unsigned long end = node_end_pfn[nid]; + unsigned long end = min(node_end_pfn[nid], max_pfn); memory_present(nid, start, end); node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); } return 1; out_fail: - return 0; -} - -struct acpi_static_rsdt { - struct acpi_table_rsdt table; - u32 padding[7]; /* Allow for 7 more table entries */ -}; - -int __init get_memcfg_from_srat(void) -{ - struct acpi_table_header *header = NULL; - struct acpi_table_rsdp *rsdp = NULL; - struct acpi_table_rsdt *rsdt = NULL; - acpi_native_uint rsdp_address = 0; - struct acpi_static_rsdt saved_rsdt; - int tables = 0; - int i = 0; - - rsdp_address = acpi_os_get_root_pointer(); - if (!rsdp_address) { - printk("%s: System description tables not found\n", - __func__); - goto out_err; - } - - printk("%s: assigning address to rsdp\n", __func__); - rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address; - if (!rsdp) { - printk("%s: Didn't find ACPI root!\n", __func__); - goto out_err; - } - - printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, - rsdp->oem_id); - - if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) { - printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__); - goto out_err; - } - - rsdt = (struct acpi_table_rsdt *) - early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); - - if (!rsdt) { - printk(KERN_WARNING - "%s: ACPI: Invalid root system description tables (RSDT)\n", - __func__); - goto out_err; - } - - header = &rsdt->header; - - if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { - printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); - goto out_err; - } - - /* - * The number of tables is computed by taking the - * size of all entries (header size minus total - * size of RSDT) divided by the size of each entry - * (4-byte table pointers). - */ - tables = (header->length - sizeof(struct acpi_table_header)) / 4; - - if (!tables) - goto out_err; - - memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); - - if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { - printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", - saved_rsdt.table.header.length); - goto out_err; - } - - printk("Begin SRAT table scan....\n"); - - for (i = 0; i < tables; i++) { - /* Map in header, then map in full table length. */ - header = (struct acpi_table_header *) - early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); - if (!header) - break; - header = (struct acpi_table_header *) - early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); - if (!header) - break; - - if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) - continue; - - /* we've found the srat table. don't need to look at any more tables */ - return acpi20_parse_srat((struct acpi_table_srat *)header); - } -out_err: - remove_all_active_ranges(); printk("failed to get NUMA memory information from SRAT table\n"); return 0; } diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ae751094eba..d67ce5f044b 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +#ifndef CONFIG_X86_NUMAQ static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; +#endif static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) { diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index d2ab52cc1d6..7066cb855a6 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -19,8 +19,8 @@ #include <linux/utsname.h> #include <linux/ipc.h> -#include <asm/uaccess.h> -#include <asm/unistd.h> +#include <linux/uaccess.h> +#include <linux/unistd.h> asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -103,7 +103,7 @@ asmlinkage int old_select(struct sel_arg_struct __user *arg) * * This is really horribly ugly. */ -asmlinkage int sys_ipc (uint call, int first, int second, +asmlinkage int sys_ipc(uint call, int first, int second, int third, void __user *ptr, long fifth) { int version, ret; @@ -113,24 +113,24 @@ asmlinkage int sys_ipc (uint call, int first, int second, switch (call) { case SEMOP: - return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); + return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); case SEMTIMEDOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, second, (const struct timespec __user *)fifth); case SEMGET: - return sys_semget (first, second, third); + return sys_semget(first, second, third); case SEMCTL: { union semun fourth; if (!ptr) return -EINVAL; if (get_user(fourth.__pad, (void __user * __user *) ptr)) return -EFAULT; - return sys_semctl (first, second, third, fourth); + return sys_semctl(first, second, third, fourth); } case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, + return sys_msgsnd(first, (struct msgbuf __user *) ptr, second, third); case MSGRCV: switch (version) { @@ -138,45 +138,45 @@ asmlinkage int sys_ipc (uint call, int first, int second, struct ipc_kludge tmp; if (!ptr) return -EINVAL; - + if (copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp))) + (struct ipc_kludge __user *) ptr, + sizeof(tmp))) return -EFAULT; - return sys_msgrcv (first, tmp.msgp, second, + return sys_msgrcv(first, tmp.msgp, second, tmp.msgtyp, third); } default: - return sys_msgrcv (first, + return sys_msgrcv(first, (struct msgbuf __user *) ptr, second, fifth, third); } case MSGGET: - return sys_msgget ((key_t) first, second); + return sys_msgget((key_t) first, second); case MSGCTL: - return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); + return sys_msgctl(first, second, (struct msqid_ds __user *) ptr); case SHMAT: switch (version) { default: { ulong raddr; - ret = do_shmat (first, (char __user *) ptr, second, &raddr); + ret = do_shmat(first, (char __user *) ptr, second, &raddr); if (ret) return ret; - return put_user (raddr, (ulong __user *) third); + return put_user(raddr, (ulong __user *) third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) return -EINVAL; /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ - return do_shmat (first, (char __user *) ptr, second, (ulong *) third); + return do_shmat(first, (char __user *) ptr, second, (ulong *) third); } - case SHMDT: - return sys_shmdt ((char __user *)ptr); + case SHMDT: + return sys_shmdt((char __user *)ptr); case SHMGET: - return sys_shmget (first, second, third); + return sys_shmget(first, second, third); case SHMCTL: - return sys_shmctl (first, second, + return sys_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; @@ -186,28 +186,28 @@ asmlinkage int sys_ipc (uint call, int first, int second, /* * Old cruft */ -asmlinkage int sys_uname(struct old_utsname __user * name) +asmlinkage int sys_uname(struct old_utsname __user *name) { int err; if (!name) return -EFAULT; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - return err?-EFAULT:0; + return err? -EFAULT:0; } -asmlinkage int sys_olduname(struct oldold_utsname __user * name) +asmlinkage int sys_olduname(struct oldold_utsname __user *name) { int error; if (!name) return -EFAULT; - if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; - - down_read(&uts_sem); - + + down_read(&uts_sem); + error = __copy_to_user(&name->sysname, &utsname()->sysname, __OLD_UTS_LEN); error |= __put_user(0, name->sysname + __OLD_UTS_LEN); @@ -223,9 +223,9 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) error |= __copy_to_user(&name->machine, &utsname()->machine, __OLD_UTS_LEN); error |= __put_user(0, name->machine + __OLD_UTS_LEN); - + up_read(&uts_sem); - + error = error ? -EFAULT : 0; return error; @@ -241,6 +241,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" : "=a" (__res) - : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); + : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); return __res; } diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 2ff21f39893..5f29f12da50 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -84,8 +84,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) if (timer_ack) { /* * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to reset the IRR bit for do_slow_gettimeoffset(). - * This will also deassert NMI lines for the watchdog if run + * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ spin_lock(&i8259A_lock); diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c737849e2ef..39ae8511a13 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -123,6 +123,8 @@ void __init time_init(void) (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) cpu_khz = calculate_cpu_khz(); + lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; + if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adeb..1106fac6024 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,7 +2,7 @@ #include <asm/trampoline.h> -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +/* ready for x86_64 and x86 */ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 08d752de4ee..cf37d20b1ba 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -756,9 +756,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; spin_lock(&nmi_print_lock); @@ -767,10 +767,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) * to get a message out: */ bust_spinlocks(1); - printk(KERN_EMERG "%s", msg); + printk(KERN_EMERG "%s", str); printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->ip); show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76ea97c..80ba6d37bfe 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -71,12 +71,13 @@ asmlinkage void general_protection(void); asmlinkage void page_fault(void); asmlinkage void coprocessor_error(void); asmlinkage void simd_coprocessor_error(void); -asmlinkage void reserved(void); asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); +int panic_on_unrecovered_nmi; static unsigned int code_bytes = 64; +static unsigned ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) { @@ -614,7 +615,9 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) * We are in trouble anyway, lets at least try * to get a message out. */ - printk(str, smp_processor_id()); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); show_registers(regs); if (kexec_should_crash(current)) crash_kexec(regs); @@ -702,12 +705,10 @@ DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) DO_ERROR( 4, SIGSEGV, "overflow", overflow) DO_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGSEGV, "reserved", reserved) /* Runs on IST stack */ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) @@ -865,6 +866,28 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + add_pda(__nmi_count, 1); + if (!ignore_nmis) + default_do_nmi(regs); + nmi_exit(); +} + +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 65b70637ad9..6240922e497 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -1,6 +1,7 @@ #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/workqueue.h> +#include <linux/delay.h> #include <linux/cpufreq.h> #include <linux/jiffies.h> #include <linux/init.h> @@ -286,7 +287,6 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz; static struct clocksource clocksource_tsc; /* @@ -404,6 +404,7 @@ static inline void check_geode_tsc_reliable(void) { } void __init tsc_init(void) { int cpu; + u64 lpj; if (!cpu_has_tsc || tsc_disabled > 0) return; @@ -416,6 +417,10 @@ void __init tsc_init(void) return; } + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; @@ -439,9 +444,8 @@ void __init tsc_init(void) unsynchronized_tsc(); check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, - clocksource_tsc.shift); + clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, + clocksource_tsc.shift); /* lower the rating if we already know its unstable: */ if (check_tsc_unstable()) { clocksource_tsc.rating = 0; diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 1784b8077a1..9898fb01edf 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -242,7 +242,7 @@ void __init tsc_calibrate(void) if (hpet) { printk(KERN_INFO "TSC calibrated against HPET\n"); if (hpet2 < hpet1) - hpet2 += 0x100000000; + hpet2 += 0x100000000UL; hpet2 -= hpet1; tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; } else { diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e..2674f579627 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -60,13 +60,6 @@ SECTIONS BUG_TABLE :text - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - RODATA /* writeable */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674b06a..fd246e22fe6 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -53,13 +53,6 @@ SECTIONS RODATA - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { @@ -177,6 +170,7 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; + . = ALIGN(16); __x86cpuvendor_start = .; .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { *(.x86cpuvendor.init) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index ba8c0b75ab0..0c029e8959c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -15,9 +15,12 @@ #include <linux/init.h> #include <linux/pci_ids.h> #include <linux/pci_regs.h> + +#include <asm/apic.h> #include <asm/pci-direct.h> #include <asm/io.h> #include <asm/paravirt.h> +#include <asm/setup.h> #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5c7e2fd5207..e72cf0793fb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void) /* The Linux bootloader header contains an "e820" memory map: the * Launcher populated the first entry with our memory limit. */ - add_memory_region(boot_params.e820_map[0].addr, + e820_add_region(boot_params.e820_map[0].addr, boot_params.e820_map[0].size, boot_params.e820_map[0].type); @@ -1012,6 +1012,7 @@ __init void lguest_init(void) * clobbered. The Launcher places our initial pagetables somewhere at * the top of our physical memory, so we don't need extra space: set * init_pg_tables_end to the end of the kernel. */ + init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); /* Load the %fs segment register (the per-cpu segment register) with @@ -1065,9 +1066,9 @@ __init void lguest_init(void) pm_power_off = lguest_power_off; machine_ops.restart = lguest_restart; - /* Now we're set up, call start_kernel() in init/main.c and we proceed + /* Now we're set up, call i386_start_kernel() in head32.c and we proceed * to boot as normal. It never returns. */ - start_kernel(); + i386_start_kernel(); } /* * This marks the end of stage II of our journey, The Guest. diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index d710f2d167b..ef691316f8b 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -3,6 +3,7 @@ * * Copyright (C) 1993 Linus Torvalds * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> * * The __delay function must _NOT_ be inlined as its execution time * depends wildly on alignment on many x86 processors. The additional @@ -28,16 +29,22 @@ /* simple loop based delay: */ static void delay_loop(unsigned long loops) { - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: decl %0 \n" + " jnz 2b \n" + "3: decl %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } /* TSC based delay: */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a071824..2f5e277686b 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -142,45 +142,3 @@ static int __init print_ipi_mode(void) late_initcall(print_ipi_mode); -/** - * machine_specific_memory_setup - Hook for machine specific memory setup. - * - * Description: - * This is included late in kernel/setup.c so that it can make - * use of all of the static functions. - **/ - -char * __init machine_specific_memory_setup(void) -{ - char *who; - - - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k - < boot_params.screen_info.ext_mem_k) { - mem_size = boot_params.screen_info.ext_mem_k; - who = "BIOS-88"; - } else { - mem_size = boot_params.alt_mem_k; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; -} diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile index 69dd4da218d..3ef8b43b62f 100644 --- a/arch/x86/mach-es7000/Makefile +++ b/arch/x86/mach-es7000/Makefile @@ -3,4 +3,3 @@ # obj-$(CONFIG_X86_ES7000) := es7000plat.o -obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d8b86..4354ce80488 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -52,6 +52,8 @@ static struct mip_reg *host_reg; static int mip_port; static unsigned long mip_addr, host_addr; +int es7000_plat; + /* * GSI override for ES7000 platforms. */ @@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) } #endif -/* - * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic - * arch already has got following function definitions (asm-generic/es7000.c) - * hence no need to define these for that case. - */ -#ifndef CONFIG_X86_GENERICARCH -void es7000_sw_apic(void); -void __init enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -__init int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif -#endif /* COFIG_X86_GENERICARCH */ - static void es7000_spin(int n) { diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 19d6d407737..0dbd7803a1d 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -2,7 +2,11 @@ # Makefile for the generic architecture # -EXTRA_CFLAGS := -Iarch/x86/kernel +EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := probe.o summit.o bigsmp.o es7000.o default.o -obj-y += ../../x86/mach-es7000/ +obj-y := probe.o default.o +obj-$(CONFIG_X86_NUMAQ) += numaq.o +obj-$(CONFIG_X86_SUMMIT) += summit.o +obj-$(CONFIG_X86_BIGSMP) += bigsmp.o +obj-$(CONFIG_X86_ES7000) += es7000.o +obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 95fc463056d..59d77171455 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */ static int hp_ht_bigsmp(const struct dmi_system_id *d) { -#ifdef CONFIG_X86_GENERICARCH printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); dmi_bigsmp = 1; -#endif return 0; } @@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { static int probe_bigsmp(void) { if (def_to_bigsmp) - dmi_bigsmp = 1; + dmi_bigsmp = 1; else dmi_check_system(bigsmp_dmi_table); return dmi_bigsmp; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c new file mode 100644 index 00000000000..8091e68764c --- /dev/null +++ b/arch/x86/mach-generic/numaq.c @@ -0,0 +1,41 @@ +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/smp.h> +#include <asm/mpspec.h> +#include <asm/genapic.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <asm/mach-numaq/mach_apic.h> +#include <asm/mach-numaq/mach_apicdef.h> +#include <asm/mach-numaq/mach_ipi.h> +#include <asm/mach-numaq/mach_mpparse.h> +#include <asm/mach-numaq/mach_wakecpu.h> +#include <asm/numaq.h> + +static int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +/* Hook from generic ACPI tables.c */ +static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} + +struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index c5ae751b994..5a7e4619e1c 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -16,6 +16,7 @@ #include <asm/apicdef.h> #include <asm/genapic.h> +extern struct genapic apic_numaq; extern struct genapic apic_summit; extern struct genapic apic_bigsmp; extern struct genapic apic_es7000; @@ -24,9 +25,18 @@ extern struct genapic apic_default; struct genapic *genapic = &apic_default; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_NUMAQ + &apic_numaq, +#endif +#ifdef CONFIG_X86_SUMMIT &apic_summit, +#endif +#ifdef CONFIG_X86_BIGSMP &apic_bigsmp, +#endif +#ifdef CONFIG_X86_ES7000 &apic_es7000, +#endif &apic_default, /* must be last */ NULL, }; @@ -54,6 +64,7 @@ early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { +#ifdef CONFIG_X86_BIGSMP /* * This routine is used to switch to bigsmp mode when * - There is no apic= option specified by the user @@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void) printk(KERN_INFO "Overriding APIC driver with %s\n", genapic->name); } +#endif } void __init generic_apic_probe(void) @@ -88,7 +100,8 @@ void __init generic_apic_probe(void) /* These functions can switch the APIC even after the initial ->probe() */ -int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) +int __init mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) { int i; for (i = 0; apic_probe[i]; ++i) { diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index 57484e91ab9..a2fb78c0d15 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c @@ -8,11 +8,6 @@ #include "cobalt.h" #include "mach_apic.h" -/* Have we found an MP table */ -int smp_found_config; - -int pic_mode; - extern unsigned int __cpuinitdata maxcpus; /* @@ -76,7 +71,9 @@ void __init find_smp_config(void) if (ncpus > maxcpus) ncpus = maxcpus; +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif while (ncpus--) MP_processor_info(mp++); diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c index de4c9dbd086..d67868ec9b7 100644 --- a/arch/x86/mach-visws/setup.c +++ b/arch/x86/mach-visws/setup.c @@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void) sgivwfb_mem_size &= ~((1 << 20) - 1); sgivwfb_mem_phys = mem_size - gfx_mem_size; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); + e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); return "PROM"; } diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466b9eb..6bbdd633864 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -62,6 +62,7 @@ void __init time_init_hook(void) char *__init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "NOT VOYAGER"; @@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void) e820.nr_map = 0; for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { - add_memory_region(addr, length, E820_RAM); + e820_add_region(addr, length, E820_RAM); } return who; } else if (voyager_level == 4) { @@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void) tom = (boot_params.screen_info.ext_mem_k) << 10; } who = "Voyager-TOM"; - add_memory_region(0, 0x9f000, E820_RAM); + e820_add_region(0, 0x9f000, E820_RAM); /* map from 1M to top of memory */ - add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, + e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, E820_RAM); /* FIXME: Should check the ASICs to see if I need to * take out the 8M window. Just do it at the moment * */ - add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024, + e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, E820_RESERVED); return who; } - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { - mem_size = boot_params.screen_info.ext_mem_k; - who = "BIOS-88"; - } else { - mem_size = boot_params.alt_mem_k; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; + return default_machine_specific_memory_setup(); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a..8dedd01e909 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0; * activity count. Finally exported by i386_ksyms.c */ static int voyager_extended_cpus = 1; -/* Have we found an SMP box - used by time.c to do the profiling - interrupt for timeslicing; do not set to 1 until the per CPU timer - interrupt is active */ -int smp_found_config = 0; - /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; @@ -1137,15 +1132,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, 0, 1, 1); } -/* used to set up the trampoline for other CPUs when the memory manager - * is sorted out */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - if (__pa(trampoline_base) >= 0x93000) - BUG(); -} - /* send a reschedule CPI to one CPU by physical CPU number*/ static void voyager_smp_send_reschedule(int cpu) { diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c index 04869e64b18..00548354912 100644 --- a/arch/x86/math-emu/reg_constant.c +++ b/arch/x86/math-emu/reg_constant.c @@ -16,8 +16,8 @@ #include "reg_constant.h" #include "control_w.h" -#define MAKE_REG(s,e,l,h) { l, h, \ - ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } +#define MAKE_REG(s, e, l, h) { l, h, \ + ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); #if 0 @@ -40,7 +40,7 @@ FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); /* Only the sign and significand (and tag) are used in internal NaNs */ -/* The 80486 never generates one of these +/* The 80486 never generates one of these FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); */ /* This is the real indefinite QNaN */ @@ -49,7 +49,7 @@ FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); /* Only the sign (and tag) is used in internal infinities */ FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); -static void fld_const(FPU_REG const *c, int adj, u_char tag) +static void fld_const(FPU_REG const * c, int adj, u_char tag) { FPU_REG *st_new_ptr; diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf98368..a2f73ba42b8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -38,6 +38,7 @@ #include <asm/setup.h> #include <asm/mmzone.h> #include <asm/bios_ebda.h> +#include <asm/proto.h> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; /* * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 256Mb break (each element of the array will - * represent 256Mb of memory and will be marked by the node id. so, + * numa node on a 64Mb break (each element of the array will + * represent 64Mb of memory and will be marked by the node id. so, * if the first gig is on node 0, and the second gig is on node 1 * physnode_map will contain: * - * physnode_map[0-3] = 0; - * physnode_map[4-7] = 1; - * physnode_map[8- ] = -1; + * physnode_map[0-15] = 0; + * physnode_map[16-31] = 1; + * physnode_map[32- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); @@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end) printk(KERN_DEBUG " "); for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk("%ld ", pfn); + printk(KERN_CONT "%ld ", pfn); } - printk("\n"); + printk(KERN_CONT "\n"); } unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, @@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, #endif extern unsigned long find_max_low_pfn(void); -extern void add_one_highpage_init(struct page *, int, int); extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) @@ -119,11 +119,11 @@ int __init get_memcfg_numa_flat(void) { printk("NUMA - single node, flat memory mode\n"); - /* Run the memory configuration and find the top of memory. */ - propagate_e820_map(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; + e820_register_active_regions(0, 0, max_pfn); memory_present(0, 0, max_pfn); + node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); /* Indicate there is one node available. */ nodes_clear(node_online_map); @@ -156,24 +156,29 @@ static void __init propagate_e820_map_node(int nid) */ static void __init allocate_pgdat(int nid) { - if (nid && node_has_online_mem(nid)) + if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); - min_low_pfn += PFN_UP(sizeof(pg_data_t)); + unsigned long pgdat_phys; + pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, + (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT, + sizeof(pg_data_t), + PAGE_SIZE); + NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); + reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), + "NODE_DATA"); } + printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", + nid, (unsigned long)NODE_DATA(nid)); } -#ifdef CONFIG_DISCONTIGMEM /* - * In the discontig memory model, a portion of the kernel virtual area (KVA) - * is reserved and portions of nodes are mapped using it. This is to allow - * node-local memory to be allocated for structures that would normally require - * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers - * should be prepared to allocate from the bootmem allocator instead. This KVA - * mechanism is incompatible with SPARSEMEM as it makes assumptions about the - * layout of memory that are broken if alloc_remap() succeeds for some of the - * map and fails for others + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; @@ -202,8 +207,12 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { + printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); + printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", + (unsigned long)vaddr, + node_remap_start_pfn[node] + pfn); set_pmd_pfn((ulong) vaddr, node_remap_start_pfn[node] + pfn, PAGE_KERNEL_LARGE); @@ -215,17 +224,21 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; - unsigned long pfn; for_each_online_node(nid) { - unsigned old_end_pfn = node_end_pfn[nid]; + u64 node_kva_target; + u64 node_kva_final; /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. */ + printk("node %d pfn: [%lx - %lx]\n", + nid, node_start_pfn[nid], node_end_pfn[nid]); if (node_start_pfn[nid] > max_pfn) continue; + if (!node_end_pfn[nid]) + continue; if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; @@ -237,39 +250,45 @@ static unsigned long calculate_numa_remap_pages(void) /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; - /* - * Validate the region we are allocating only contains valid - * pages. - */ - for (pfn = node_end_pfn[nid] - size; - pfn < node_end_pfn[nid]; pfn++) - if (!page_is_ram(pfn)) - break; - - if (pfn != node_end_pfn[nid]) - size = 0; + node_kva_target = round_down(node_end_pfn[nid] - size, + PTRS_PER_PTE); + node_kva_target <<= PAGE_SHIFT; + do { + node_kva_final = find_e820_area(node_kva_target, + ((u64)node_end_pfn[nid])<<PAGE_SHIFT, + ((u64)size)<<PAGE_SHIFT, + LARGE_PAGE_BYTES); + node_kva_target -= LARGE_PAGE_BYTES; + } while (node_kva_final == -1ULL && + (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); + + if (node_kva_final == -1ULL) + panic("Can not get kva ram\n"); - printk("Reserving %ld pages of KVA for lmem_map of node %d\n", - size, nid); node_remap_size[nid] = size; node_remap_offset[nid] = reserve_pages; reserve_pages += size; - printk("Shrinking node %d from %ld pages to %ld pages\n", - nid, node_end_pfn[nid], node_end_pfn[nid] - size); - - if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { - /* - * Align node_end_pfn[] and node_remap_start_pfn[] to - * pmd boundary. remap_numa_kva will barf otherwise. - */ - printk("Shrinking node %d further by %ld pages for proper alignment\n", - nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); - size += node_end_pfn[nid] & (PTRS_PER_PTE-1); - } + printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", + size, nid, node_kva_final>>PAGE_SHIFT); - node_end_pfn[nid] -= size; - node_remap_start_pfn[nid] = node_end_pfn[nid]; - shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); + /* + * prevent kva address below max_low_pfn want it on system + * with less memory later. + * layout will be: KVA address , KVA RAM + * + * we are supposed to only record the one less then max_low_pfn + * but we could have some hole in high memory, and it will only + * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide + * to use it as free. + * So reserve_early here, hope we don't run out of that array + */ + reserve_early(node_kva_final, + node_kva_final+(((u64)size)<<PAGE_SHIFT), + "KVA RAM"); + + node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; + remove_active_range(nid, node_remap_start_pfn[nid], + node_remap_start_pfn[nid] + size); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); @@ -287,35 +306,15 @@ static void init_remap_allocator(int nid) printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn - + node_remap_offset[nid] + node_remap_size[nid])); -} -#else -void *alloc_remap(int nid, unsigned long size) -{ - return NULL; + (ulong) node_remap_end_vaddr[nid]); } -static unsigned long calculate_numa_remap_pages(void) -{ - return 0; -} - -static void init_remap_allocator(int nid) -{ -} - -void __init remap_numa_kva(void) -{ -} -#endif /* CONFIG_DISCONTIGMEM */ - extern void setup_bootmem_allocator(void); unsigned long __init setup_memory(void) { int nid; unsigned long system_start_pfn, system_max_low_pfn; - unsigned long wasted_pages; + long kva_target_pfn; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -324,34 +323,38 @@ unsigned long __init setup_memory(void) * this space and use it to adjust the boundary between ZONE_NORMAL * and ZONE_HIGHMEM. */ + + /* call find_max_low_pfn at first, it could update max_pfn */ + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + + remove_all_active_ranges(); get_memcfg_numa(); - kva_pages = calculate_numa_remap_pages(); + kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); /* partially used pages are not usable - thus round upwards */ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - kva_start_pfn = find_max_low_pfn() - kva_pages; + kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); + do { + kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, + max_low_pfn<<PAGE_SHIFT, + kva_pages<<PAGE_SHIFT, + PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; + kva_target_pfn -= PTRS_PER_PTE; + } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); -#ifdef CONFIG_BLK_DEV_INITRD - /* Numa kva area is below the initrd */ - if (initrd_start) - kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) - - kva_pages; -#endif - - /* - * We waste pages past at the end of the KVA for no good reason other - * than how it is located. This is bad. - */ - wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1); - kva_start_pfn -= wasted_pages; - kva_pages += wasted_pages; + if (kva_start_pfn == -1UL) + panic("Can not get kva space\n"); - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); + + /* avoid clash with initrd */ + reserve_early(kva_start_pfn<<PAGE_SHIFT, + (kva_start_pfn + kva_pages)<<PAGE_SHIFT, + "KVA PG"); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > system_max_low_pfn) @@ -387,16 +390,8 @@ unsigned long __init setup_memory(void) return max_low_pfn; } -void __init numa_kva_reserve(void) -{ - if (kva_pages) - reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), - BOOTMEM_DEFAULT); -} - void __init zone_sizes_init(void) { - int nid; unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = @@ -406,27 +401,18 @@ void __init zone_sizes_init(void) max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; #endif - /* If SRAT has not registered memory, register it now */ - if (find_max_pfn_with_active_regions() == 0) { - for_each_online_node(nid) { - if (node_has_online_mem(nid)) - add_active_range(nid, node_start_pfn[nid], - node_end_pfn[nid]); - } - } - free_area_init_nodes(max_zone_pfns); return; } -void __init set_highmem_pages_init(int bad_ppro) +void __init set_highmem_pages_init(void) { #ifdef CONFIG_HIGHMEM struct zone *zone; - struct page *page; + int nid; for_each_zone(zone) { - unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + unsigned long zone_start_pfn, zone_end_pfn; if (!is_highmem(zone)) continue; @@ -434,16 +420,12 @@ void __init set_highmem_pages_init(int bad_ppro) zone_start_pfn = zone->zone_start_pfn; zone_end_pfn = zone_start_pfn + zone->spanned_pages; + nid = zone_to_nid(zone); printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone_to_nid(zone), - zone_start_pfn, zone_end_pfn); - - for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page, node_pfn, bad_ppro); - } + zone->name, nid, zone_start_pfn, zone_end_pfn); + + add_highpages_with_active_regions(nid, zone_start_pfn, + zone_end_pfn); } totalram_pages += totalhigh_pages; #endif @@ -476,3 +458,21 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif + +#if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT) +/* + * Dummy on 32-bit, for now: + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ +} + +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ +} + +void __init acpi_numa_arch_fixup(void) +{ +} +#endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2c24bea92c6..0bb0caed897 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = { { 0, "User Space" }, #ifdef CONFIG_X86_64 { 0x8000000000000000UL, "Kernel Space" }, - { 0xffff810000000000UL, "Low Kernel Mapping" }, + { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, { __START_KERNEL_map, "High Kernel Mapping" }, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8bcb6f40ccb..578b7681955 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ -#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { -#else - if (!user_mode(regs)) { -#endif preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -396,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "NULL pointer dereference"); else printk(KERN_CONT "paging request"); -#ifdef CONFIG_X86_32 - printk(KERN_CONT " at %08lx\n", address); -#else - printk(KERN_CONT " at %016lx\n", address); -#endif + printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_ALERT "IP:"); printk_address(regs->ip, 1); dump_pagetable(address); @@ -800,14 +792,10 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( -#ifdef CONFIG_X86_32 - "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", -#else - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", -#endif + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, regs->ip, - regs->sp, error_code); + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..65d55056b6e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pgd_t *pgd; pmd_t *pmd; pte_t *pte; + unsigned pages_2m = 0, pages_4k = 0; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; @@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; + pages_2m++; set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; @@ -213,18 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) if (is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; + pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } max_pfn_mapped = pfn; } } -} - -static inline int page_kills_ppro(unsigned long pagenr) -{ - if (pagenr >= 0x70000 && pagenr <= 0x7003F) - return 1; - return 0; + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } /* @@ -287,29 +285,60 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __init add_one_highpage_init(struct page *page, int pfn) { - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } else - SetPageReserved(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; } -#ifndef CONFIG_NUMA -static void __init set_highmem_pages_init(int bad_ppro) +struct add_highpages_data { + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static void __init add_highpages_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) { - int pfn; + int node_pfn; + struct page *page; + unsigned long final_start_pfn, final_end_pfn; + struct add_highpages_data *data; - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { - /* - * Holes under sparsemem might not have no mem_map[]: - */ - if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + data = (struct add_highpages_data *)datax; + + final_start_pfn = max(start_pfn, data->start_pfn); + final_end_pfn = min(end_pfn, data->end_pfn); + if (final_start_pfn >= final_end_pfn) + return; + + for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; + node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page, node_pfn); } + +} + +void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + struct add_highpages_data data; + + data.start_pfn = start_pfn; + data.end_pfn = end_pfn; + + work_with_active_regions(nid, add_highpages_work_fn, &data); +} + +#ifndef CONFIG_NUMA +static void __init set_highmem_pages_init(void) +{ + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); + totalram_pages += totalhigh_pages; } #endif /* !CONFIG_NUMA */ @@ -317,7 +346,7 @@ static void __init set_highmem_pages_init(int bad_ppro) #else # define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init(bad_ppro) do { } while (0) +# define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ pteval_t __PAGE_KERNEL = _PAGE_KERNEL; @@ -564,24 +593,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) { int codesize, reservedpages, datasize, initsize; - int tmp, bad_ppro; + int tmp; #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - bad_ppro = ppro_with_ram_bug(); - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } -#endif /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -593,7 +609,7 @@ void __init mem_init(void) if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) reservedpages++; - set_highmem_pages_init(bad_ppro); + set_highmem_pages_init(); codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; @@ -614,7 +630,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM @@ -655,7 +670,6 @@ void __init mem_init(void) #endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -784,3 +798,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) free_init_pages("initrd memory", start, end); } #endif + +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) +{ + return reserve_bootmem(phys, len, flags); +} diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 819dad973b1..18c6a006e40 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/smp.h> #include <linux/init.h> +#include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> @@ -47,6 +48,18 @@ #include <asm/numa.h> #include <asm/cacheflush.h> +/* + * PFN of last memory page. + */ +unsigned long end_pfn; + +/* + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access + * apertures, ACPI and other tables without having to play with fixmaps. + */ +unsigned long max_pfn_mapped; + static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -312,6 +325,8 @@ __meminit void early_iounmap(void *addr, unsigned long size) static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { + unsigned long pages = 0; + int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { @@ -328,9 +343,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) if (pmd_val(*pmd)) continue; + pages++; set_pte((pte_t *)pmd, pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); } + update_page_count(PG_LEVEL_2M, pages); return address; } @@ -350,6 +367,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { + unsigned long pages = 0; unsigned long last_map_addr = end; int i = pud_index(addr); @@ -374,6 +392,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) } if (direct_gbpages) { + pages++; set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; @@ -390,6 +409,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) unmap_low_page(pmd); } __flush_tlb_all(); + update_page_count(PG_LEVEL_1G, pages); return last_map_addr >> PAGE_SHIFT; } @@ -431,7 +451,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST_BOOTPARAM +#ifdef CONFIG_MEMTEST static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) @@ -493,7 +513,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, } -static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; +/* default is disabled */ +static int memtest_pattern __initdata; static int __init parse_memtest(char *arg) { @@ -799,12 +820,14 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init reserve_bootmem_generic(unsigned long phys, unsigned len) +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) { #ifdef CONFIG_NUMA int nid, next_nid; #endif unsigned long pfn = phys >> PAGE_SHIFT; + int ret; if (pfn >= end_pfn) { /* @@ -812,11 +835,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) * firmware tables: */ if (pfn < max_pfn_mapped) - return; + return -EFAULT; printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", phys, len); - return; + return -EFAULT; } /* Should check here against the e820 map to avoid double free */ @@ -824,9 +847,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) nid = phys_to_nid(phys); next_nid = phys_to_nid(phys + len - 1); if (nid == next_nid) - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); else - reserve_bootmem(phys, len, BOOTMEM_DEFAULT); + ret = reserve_bootmem(phys, len, flags); + + if (ret != 0) + return ret; + #else reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif @@ -835,6 +862,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) dma_reserve += len / PAGE_SIZE; set_dma_reserve(dma_reserve); } + + return 0; } int kern_addr_valid(unsigned long addr) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f9b68..416ea415f5c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't remap the low PCI/ISA area, it's always mapped.. */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + if (is_ISA_range(phys_addr, last_addr)) return (__force void __iomem *)phys_to_virt(phys_addr); /* @@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. @@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { - if (pat_wc_enabled) + if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, __builtin_return_address(0)); else @@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr) * vm_area and by simply returning an address into the kernel mapping * of ISA space. So handle that here. */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) + if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && + (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) return; addr = (volatile void __iomem *) @@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr) cpa takes care of the direct mappings. */ read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) { - if (p->addr == addr) + if (p->addr == (void __force *)addr) break; } read_unlock(&vmlist_lock); @@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr) free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ - o = remove_vm_area((void *)addr); + o = remove_vm_area((void __force *)addr); BUG_ON(p != o || o == NULL); kfree(p); } @@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void *)ioremap(start, PAGE_SIZE); + addr = (void __force *)ioremap(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e47784..317573ec925 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -22,6 +22,7 @@ #include <asm/numa.h> #include <asm/mpspec.h> #include <asm/apic.h> +#include <asm/k8.h> static __init int find_northbridge(void) { @@ -56,34 +57,33 @@ static __init void early_get_boot_cpu_id(void) /* * Find possible boot-time SMP configuration: */ +#ifdef CONFIG_X86_MPPARSE early_find_smp_config(); +#endif #ifdef CONFIG_ACPI /* * Read APIC information from ACPI tables. */ early_acpi_boot_init(); #endif +#ifdef CONFIG_X86_MPPARSE /* * get boot-time SMP configuration: */ if (smp_found_config) early_get_smp_config(); +#endif early_init_lapic_mapping(); } int __init k8_scan_nodes(unsigned long start, unsigned long end) { + unsigned numnodes, cores, bits, apicid_base; unsigned long prevbase; struct bootnode nodes[8]; - int nodeid, i, nb; unsigned char nodeids[8]; - int found = 0; - u32 reg; - unsigned numnodes; - unsigned cores; - unsigned bits; - int j; - unsigned apicid_base; + int i, j, nb, found = 0; + u32 nodeid, reg; if (!early_pci_allowed()) return -1; @@ -105,7 +105,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base, limit; - u32 nodeid; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5..c4557e25f60 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -27,30 +27,17 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -bootmem_data_t plat_node_bdata[MAX_NUMNODES]; +static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; -#ifdef CONFIG_SMP -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -void *x86_cpu_to_node_map_early_ptr; -EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); -#endif -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); - s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_to_cpumask_map); - int numa_off __initdata; -unsigned long __initdata nodemap_addr; -unsigned long __initdata nodemap_size; +static unsigned long __initdata nodemap_addr; +static unsigned long __initdata nodemap_size; /* * Given a shift value, try to populate memnodemap[] @@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start, void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { - unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; + unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); @@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, start, end); start_pfn = start >> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; + last_pfn = end >> PAGE_SHIFT; node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, SMP_CACHE_BYTES); @@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; /* * Find a place for the bootmem map @@ -226,14 +213,14 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, * early_node_mem will get that with find_e820_area instead * of alloc_bootmem, that could clash with reserved range */ - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); nid = phys_to_nid(nodedata_phys); if (nid == nodeid) bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); else bootmap_start = round_up(start, PAGE_SIZE); /* - * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like + * SMP_CACHE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE */ bootmap = early_node_mem(nodeid, bootmap_start, end, @@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_start >> PAGE_SHIFT, - start_pfn, end_pfn); + start_pfn, last_pfn); printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", bootmap_start, bootmap_start + bootmap_size - 1, @@ -309,7 +296,7 @@ void __init numa_init_array(void) #ifdef CONFIG_NUMA_EMU /* Numa emulation */ -char *cmdline __initdata; +static char *cmdline __initdata; /* * Setups up nid to range from addr to addr + size. If the end @@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, } /* - * Sets up the system RAM area from start_pfn to end_pfn according to the + * Sets up the system RAM area from start_pfn to last_pfn according to the * numa=fake command-line option. */ static struct bootnode nodes[MAX_NUMNODES] __initdata; -static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) +static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) { u64 size, addr = start_pfn << PAGE_SHIFT; - u64 max_addr = end_pfn << PAGE_SHIFT; + u64 max_addr = last_pfn << PAGE_SHIFT; int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; memset(&nodes, 0, sizeof(nodes)); @@ -527,7 +514,7 @@ out: } #endif /* CONFIG_NUMA_EMU */ -void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) +void __init numa_initmem_init(unsigned long start_pfn, unsigned long last_pfn) { int i; @@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) nodes_clear(node_online_map); #ifdef CONFIG_NUMA_EMU - if (cmdline && !numa_emulation(start_pfn, end_pfn)) + if (cmdline && !numa_emulation(start_pfn, last_pfn)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) #ifdef CONFIG_ACPI_NUMA if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT)) + last_pfn << PAGE_SHIFT)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) #ifdef CONFIG_K8_NUMA if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, - end_pfn<<PAGE_SHIFT)) + last_pfn<<PAGE_SHIFT)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) printk(KERN_INFO "Faking a node at %016lx-%016lx\n", start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT); + last_pfn << PAGE_SHIFT); /* setup dummy node covering all memory */ memnode_shift = 63; memnodemap = memnode.embedded_map; @@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) node_set(0, node_possible_map); for (i = 0; i < NR_CPUS; i++) numa_set_node(i, 0); - /* cpumask_of_cpu() may not be available during early startup */ - memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); - cpu_set(0, node_to_cpumask_map[0]); - e820_register_active_regions(0, start_pfn, end_pfn); - setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); -} - -__cpuinit void numa_add_cpu(int cpu) -{ - set_bit(cpu, - (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if(cpu_to_node_map) - cpu_to_node_map[cpu] = node; - else if(per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; - else - Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); + e820_register_active_regions(0, start_pfn, last_pfn); + setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); } unsigned long __init numa_free_all_bootmem(void) @@ -641,6 +607,7 @@ static __init int numa_setup(char *opt) } early_param("numa", numa_setup); +#ifdef CONFIG_NUMA /* * Setup early cpu_to_node. * @@ -652,14 +619,19 @@ early_param("numa", numa_setup); * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. + * + * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { - int i; + int cpu; + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - for (i = 0; i < NR_CPUS; i++) { + BUG_ON(cpu_to_apicid == NULL); + + for_each_possible_cpu(cpu) { int node; - u16 apicid = x86_cpu_to_apicid_init[i]; + u16 apicid = cpu_to_apicid[cpu]; if (apicid == BAD_APICID) continue; @@ -668,8 +640,9 @@ void __init init_cpu_to_node(void) continue; if (!node_online(node)) continue; - numa_set_node(i, node); + numa_set_node(cpu, node); } } +#endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37..afd40054d15 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -34,6 +34,41 @@ struct cpa_data { unsigned force_split : 1; }; +#ifdef CONFIG_PROC_FS +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void update_page_count(int level, unsigned long pages) +{ + unsigned long flags; + + /* Protect against CPA */ + spin_lock_irqsave(&pgd_lock, flags); + direct_pages_count[level] += pages; + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void split_page_count(int level) +{ + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; +} + +int arch_report_meminfo(char *page) +{ + int n = sprintf(page, "DirectMap4k: %8lu\n" + "DirectMap2M: %8lu\n", + direct_pages_count[PG_LEVEL_4K], + direct_pages_count[PG_LEVEL_2M]); +#ifdef CONFIG_X86_64 + n += sprintf(page + n, "DirectMap1G: %8lu\n", + direct_pages_count[PG_LEVEL_1G]); +#endif + return n; +} +#else +static inline void split_page_count(int level) { } +#endif + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -500,6 +535,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + if (address >= (unsigned long)__va(0) && + address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) + split_page_count(level); + /* * Install the new, split up pagetable. Important details here: * @@ -805,7 +844,7 @@ int _set_memory_wc(unsigned long addr, int numpages) int set_memory_wc(unsigned long addr, int numpages) { - if (!pat_wc_enabled) + if (!pat_enabled) return set_memory_uc(addr, numpages); if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 06b7a1c90fb..a885a1019b8 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -26,11 +26,11 @@ #include <asm/io.h> #ifdef CONFIG_X86_PAT -int __read_mostly pat_wc_enabled = 1; +int __read_mostly pat_enabled = 1; void __cpuinit pat_disable(char *reason) { - pat_wc_enabled = 0; + pat_enabled = 0; printk(KERN_INFO "%s\n", reason); } @@ -42,6 +42,19 @@ static int __init nopat(char *str) early_param("nopat", nopat); #endif + +static int debug_enable; +static int __init pat_debug_setup(char *str) +{ + debug_enable = 1; + return 0; +} +__setup("debugpat", pat_debug_setup); + +#define dprintk(fmt, arg...) \ + do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + + static u64 __read_mostly boot_pat_state; enum { @@ -53,24 +66,25 @@ enum { PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ }; -#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) +#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) void pat_init(void) { u64 pat; - if (!pat_wc_enabled) + if (!pat_enabled) return; /* Paranoia check. */ - if (!cpu_has_pat) { - printk(KERN_ERR "PAT enabled, but CPU feature cleared\n"); + if (!cpu_has_pat && boot_pat_state) { /* - * Panic if this happens on the secondary CPU, and we + * If this happens we are on a secondary CPU, but * switched to PAT on the boot CPU. We have no way to * undo PAT. - */ - BUG_ON(boot_pat_state); + */ + printk(KERN_ERR "PAT enabled, " + "but not supported by secondary CPU\n"); + BUG(); } /* Set PWT to Write-Combining. All other bits stay the same */ @@ -86,8 +100,8 @@ void pat_init(void) * 011 UC _PAGE_CACHE_UC * PAT bit unused */ - pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | - PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); /* Boot CPU check */ if (!boot_pat_state) @@ -103,11 +117,11 @@ void pat_init(void) static char *cattr_name(unsigned long flags) { switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - default: return "broken"; + case _PAGE_CACHE_UC: return "uncached"; + case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; + case _PAGE_CACHE_WB: return "write-back"; + case _PAGE_CACHE_WC: return "write-combining"; + default: return "broken"; } } @@ -145,47 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ * The intersection is based on "Effective Memory Type" tables in IA-32 * SDM vol 3a */ -static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, - unsigned long *ret_prot) +static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) { - unsigned long pat_type; - u8 mtrr_type; - - pat_type = prot & _PAGE_CACHE_MASK; - prot &= (~_PAGE_CACHE_MASK); - - /* - * We return the PAT request directly for types where PAT takes - * precedence with respect to MTRR and for UC_MINUS. - * Consistency checks with other PAT requests is done later - * while going through memtype list. - */ - if (pat_type == _PAGE_CACHE_WC) { - *ret_prot = prot | _PAGE_CACHE_WC; - return 0; - } else if (pat_type == _PAGE_CACHE_UC_MINUS) { - *ret_prot = prot | _PAGE_CACHE_UC_MINUS; - return 0; - } else if (pat_type == _PAGE_CACHE_UC) { - *ret_prot = prot | _PAGE_CACHE_UC; - return 0; - } - /* * Look for MTRR hint to get the effective type in case where PAT * request is for WB. */ - mtrr_type = mtrr_type_lookup(start, end); + if (req_type == _PAGE_CACHE_WB) { + u8 mtrr_type; + + mtrr_type = mtrr_type_lookup(start, end); + if (mtrr_type == MTRR_TYPE_UNCACHABLE) + return _PAGE_CACHE_UC; + if (mtrr_type == MTRR_TYPE_WRCOMB) + return _PAGE_CACHE_WC; + } - if (mtrr_type == MTRR_TYPE_UNCACHABLE) { - *ret_prot = prot | _PAGE_CACHE_UC; - } else if (mtrr_type == MTRR_TYPE_WRCOMB) { - *ret_prot = prot | _PAGE_CACHE_WC; - } else { - *ret_prot = prot | _PAGE_CACHE_WB; + return req_type; +} + +static int chk_conflict(struct memtype *new, struct memtype *entry, + unsigned long *type) +{ + if (new->type != entry->type) { + if (type) { + new->type = entry->type; + *type = entry->type; + } else + goto conflict; } + /* check overlaps with more than one entry in the list */ + list_for_each_entry_continue(entry, &memtype_list, nd) { + if (new->end <= entry->start) + break; + else if (new->type != entry->type) + goto conflict; + } return 0; + + conflict: + printk(KERN_INFO "%s:%d conflicting memory types " + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, + new->end, cattr_name(new->type), cattr_name(entry->type)); + return -EBUSY; } /* @@ -198,37 +215,36 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, * req_type will have a special case value '-1', when requester want to inherit * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. * - * If ret_type is NULL, function will return an error if it cannot reserve the - * region with req_type. If ret_type is non-null, function will return - * available type in ret_type in case of no error. In case of any error + * If new_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If new_type is non-NULL, function will return + * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ int reserve_memtype(u64 start, u64 end, unsigned long req_type, - unsigned long *ret_type) + unsigned long *new_type) { - struct memtype *new_entry = NULL; - struct memtype *parse; + struct memtype *new, *entry; unsigned long actual_type; + struct list_head *where; int err = 0; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + BUG_ON(start >= end); /* end is exclusive */ + + if (!pat_enabled) { /* This is identical to page table setting without PAT */ - if (ret_type) { - if (req_type == -1) { - *ret_type = _PAGE_CACHE_WB; - } else { - *ret_type = req_type; - } + if (new_type) { + if (req_type == -1) + *new_type = _PAGE_CACHE_WB; + else + *new_type = req_type & _PAGE_CACHE_MASK; } return 0; } /* Low ISA region is always mapped WB in page table. No need to track */ - if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { - if (ret_type) - *ret_type = _PAGE_CACHE_WB; - + if (is_ISA_range(start, end - 1)) { + if (new_type) + *new_type = _PAGE_CACHE_WB; return 0; } @@ -241,206 +257,92 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, */ u8 mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == MTRR_TYPE_WRBACK) { - req_type = _PAGE_CACHE_WB; + if (mtrr_type == MTRR_TYPE_WRBACK) actual_type = _PAGE_CACHE_WB; - } else { - req_type = _PAGE_CACHE_UC_MINUS; + else actual_type = _PAGE_CACHE_UC_MINUS; - } - } else { - req_type &= _PAGE_CACHE_MASK; - err = pat_x_mtrr_type(start, end, req_type, &actual_type); - } - - if (err) { - if (ret_type) - *ret_type = actual_type; - - return -EINVAL; - } + } else + actual_type = pat_x_mtrr_type(start, end, + req_type & _PAGE_CACHE_MASK); - new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new_entry) + new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!new) return -ENOMEM; - new_entry->start = start; - new_entry->end = end; - new_entry->type = actual_type; + new->start = start; + new->end = end; + new->type = actual_type; - if (ret_type) - *ret_type = actual_type; + if (new_type) + *new_type = actual_type; spin_lock(&memtype_lock); /* Search for existing mapping that overlaps the current range */ - list_for_each_entry(parse, &memtype_list, nd) { - struct memtype *saved_ptr; - - if (parse->start >= end) { - pr_debug("New Entry\n"); - list_add(&new_entry->nd, parse->nd.prev); - new_entry = NULL; + where = NULL; + list_for_each_entry(entry, &memtype_list, nd) { + if (end <= entry->start) { + where = entry->nd.prev; break; - } - - if (start <= parse->start && end >= parse->start) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; + } else if (start <= entry->start) { /* end > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = entry->nd.prev; } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - } - - if (err) { - break; - } - - pr_debug("Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, saved_ptr->nd.prev); - new_entry = NULL; break; - } - - if (start < parse->end) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - } - - if (err) { - break; + } else if (start < entry->end) { /* start > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = &entry->nd; } - - pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, &saved_ptr->nd); - new_entry = NULL; break; } } if (err) { - printk(KERN_INFO - "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(new_entry->type), - cattr_name(req_type)); - kfree(new_entry); + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " + "track %s, req %s\n", + start, end, cattr_name(new->type), cattr_name(req_type)); + kfree(new); spin_unlock(&memtype_lock); return err; } - if (new_entry) { - /* No conflict. Not yet added to the list. Add to the tail */ - list_add_tail(&new_entry->nd, &memtype_list); - pr_debug("New Entry\n"); - } - - if (ret_type) { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type), cattr_name(*ret_type)); - } else { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type)); - } + if (where) + list_add(&new->nd, where); + else + list_add_tail(&new->nd, &memtype_list); spin_unlock(&memtype_lock); + + dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", + start, end, cattr_name(new->type), cattr_name(req_type), + new_type ? cattr_name(*new_type) : "-"); + return err; } int free_memtype(u64 start, u64 end) { - struct memtype *ml; + struct memtype *entry; int err = -EINVAL; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + if (!pat_enabled) return 0; - } /* Low ISA region is always mapped WB. No need to track */ - if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { + if (is_ISA_range(start, end - 1)) return 0; - } spin_lock(&memtype_lock); - list_for_each_entry(ml, &memtype_list, nd) { - if (ml->start == start && ml->end == end) { - list_del(&ml->nd); - kfree(ml); + list_for_each_entry(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + list_del(&entry->nd); + kfree(entry); err = 0; break; } @@ -452,7 +354,7 @@ int free_memtype(u64 start, u64 end) current->comm, current->pid, start, end); } - pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } @@ -521,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_wc_enabled && - ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + if (!pat_enabled && + !(boot_cpu_has(X86_FEATURE_MTRR) || + boot_cpu_has(X86_FEATURE_K6_MTRR) || + boot_cpu_has(X86_FEATURE_CYRIX_ARR) || + boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { flags = _PAGE_CACHE_UC; } #endif @@ -548,7 +450,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, return 0; if (pfn <= max_pfn_mapped && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -586,4 +488,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } - diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f69..ee1d6d39edd 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - &ptep->pte); + (unsigned long *) &ptep->pte); if (ret) pte_update(vma->vm_mm, addr, ptep); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 99649dccad2..b67f5a16755 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (node == NUMA_NO_NODE) continue; if (!node_isset(node, node_possible_map)) - numa_set_node(i, NUMA_NO_NODE); + numa_clear_node(i); } numa_init_array(); return 0; @@ -495,6 +495,7 @@ int __node_distance(int a, int b) EXPORT_SYMBOL(__node_distance); +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start) { int i, ret = 0; @@ -506,4 +507,4 @@ int memory_add_physaddr_to_nid(u64 start) return ret; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - +#endif diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3fde54..2b6ad5b9f9d 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy) static void nmi_shutdown(void) { - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &get_cpu_var(cpu_msrs); nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); unregister_die_notifier(&profile_exceptions_nb); model->shutdown(msrs); free_msrs(); + put_cpu_var(cpu_msrs); } static void nmi_cpu_start(void *dummy) diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 89ec35d00ef..a34fbf55792 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -13,12 +13,14 @@ pci-y := fixup.o pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o -# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are +# Careful: VISWS overrule the pci-y above. The colons are # therefor correct. This needs a proper fix by distangling the code. pci-$(CONFIG_X86_VISWS) := visws.o fixup.o -pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o + +pci-$(CONFIG_X86_NUMAQ) += numa.o # Necessary for NUMAQ as well pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o +obj-y += amd_bus.o diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 8fbd19832cf..fd47068c95d 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-y += k8-bus_64.o +obj-y += amd_bus.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d95de2f199c..4fa52d3dc84 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (node != -1) set_mp_bus_to_node(busnum, node); else - node = get_mp_bus_to_node(busnum); #endif + node = get_mp_bus_to_node(busnum); + + if (node != -1 && !node_online(node)) + node = -1; /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. @@ -204,21 +207,22 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (!bus) kfree(sd); + if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA - if (bus) { - if (pxm >= 0) { + if (pxm >= 0) printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", - busnum, pxm, pxm_to_node(pxm)); - } - } + busnum, pxm, node); +#else + printk(KERN_DEBUG "bus %02x -> node %d\n", + busnum, node); #endif + } if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); return bus; } -extern int pci_routeirq; static int __init pci_acpi_init(void) { struct pci_dev *dev = NULL; diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/amd_bus.c index 5c2799c20e4..d02c598451e 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/amd_bus.c @@ -1,5 +1,9 @@ #include <linux/init.h> #include <linux/pci.h> +#include "pci.h" + +#ifdef CONFIG_X86_64 + #include <asm/pci-direct.h> #include <asm/mpspec.h> #include <linux/cpumask.h> @@ -384,7 +388,7 @@ static int __init early_fill_mp_bus_info(void) /* need to take out [0, TOM) for RAM*/ address = MSR_K8_TOP_MEM1; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); if (end < (1ULL<<32)) update_range(range, 0, end - 1); @@ -478,7 +482,7 @@ static int __init early_fill_mp_bus_info(void) /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); update_range(range, 1ULL<<32, end - 1); } @@ -526,3 +530,31 @@ static int __init early_fill_mp_bus_info(void) } postcore_initcall(early_fill_mp_bus_info); + +#endif + +/* common 32/64 bit code */ + +#define ENABLE_CF8_EXT_CFG (1ULL << 46) + +static void enable_pci_io_ecs_per_cpu(void *unused) +{ + u64 reg; + rdmsrl(MSR_AMD64_NB_CFG, reg); + if (!(reg & ENABLE_CF8_EXT_CFG)) { + reg |= ENABLE_CF8_EXT_CFG; + wrmsrl(MSR_AMD64_NB_CFG, reg); + } +} + +static int __init enable_pci_io_ecs(void) +{ + /* assume all cpus from fam10h have IO ECS */ + if (boot_cpu_data.x86 < 0x10) + return 0; + on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1); + pci_probe |= PCI_HAS_IO_ECS; + return 0; +} + +postcore_initcall(enable_pci_io_ecs); diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 21d1e0e0d53..9915293500f 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -8,18 +8,21 @@ #include "pci.h" /* - * Functions for accessing PCI configuration space with type 1 accesses + * Functions for accessing PCI base (first 256 bytes) and extended + * (4096 bytes per PCI function) configuration space with type 1 + * accesses. */ #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ + | (devfn << 8) | (reg & 0xFC)) static int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) { + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { *value = -1; return -EINVAL; } @@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); @@ -260,10 +263,18 @@ void __init pci_direct_init(int type) return; printk(KERN_INFO "PCI: Using configuration type %d for base access\n", type); - if (type == 1) + if (type == 1) { raw_pci_ops = &pci_direct_conf1; - else - raw_pci_ops = &pci_direct_conf2; + if (raw_pci_ext_ops) + return; + if (!(pci_probe & PCI_HAS_IO_ECS)) + return; + printk(KERN_INFO "PCI: Using configuration type 1 " + "for extended access\n"); + raw_pci_ext_ops = &pci_direct_conf1; + return; + } + raw_pci_ops = &pci_direct_conf2; } int __init pci_direct_probe(void) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 10fb308fded..6ccd7a108cd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -299,9 +299,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); - if (pat_wc_enabled && write_combine) + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; - else if (pat_wc_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index ca8df9c260b..f0859de23e2 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -11,8 +11,8 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/dmi.h> -#include <asm/io.h> -#include <asm/smp.h> +#include <linux/io.h> +#include <linux/smp.h> #include <asm/io_apic.h> #include <linux/irq.h> #include <linux/acpi.h> @@ -61,7 +61,7 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; * and perform checksum verification. */ -static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) { struct irq_routing_table *rt; int i; @@ -74,7 +74,7 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) rt->size < sizeof(struct irq_routing_table)) return NULL; sum = 0; - for (i=0; i < rt->size; i++) + for (i = 0; i < rt->size; i++) sum += addr[i]; if (!sum) { DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); @@ -100,7 +100,7 @@ static struct irq_routing_table * __init pirq_find_routing_table(void) return rt; printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); } - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { rt = pirq_check_routing_table(addr); if (rt) return rt; @@ -122,20 +122,20 @@ static void __init pirq_peer_trick(void) struct irq_info *e; memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { e = &rt->slots[i]; #ifdef DEBUG { int j; DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) + for (j = 0; j < 4; j++) DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); DBG("\n"); } #endif busmap[e->bus] = 1; } - for(i = 1; i < 256; i++) { + for (i = 1; i < 256; i++) { int node; if (!busmap[i] || pci_find_bus(0, i)) continue; @@ -285,7 +285,7 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; WARN_ON_ONCE(pirq > 4); - return read_config_nybble(router,0x43, pirqmap[pirq-1]); + return read_config_nybble(router, 0x43, pirqmap[pirq-1]); } static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -314,7 +314,7 @@ static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, /* * Cyrix: nibble offset 0x5C - * 0x5C bits 7:4 is INTB bits 3:0 is INTA + * 0x5C bits 7:4 is INTB bits 3:0 is INTA * 0x5D bits 7:4 is INTD bits 3:0 is INTC */ static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) @@ -350,7 +350,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * Apparently there are systems implementing PCI routing table using * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. * We try our best to handle both link mappings. - * + * * Currently (2003-05-21) it appears most SiS chipsets follow the * definition of routing registers from the SiS-5595 southbridge. * According to the SiS 5595 datasheets the revision id's of the @@ -370,7 +370,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * * 0x62: USBIRQ: * bit 6 OHCI function disabled (0), enabled (1) - * + * * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved * * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved @@ -487,9 +487,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq u8 irq; irq = 0; if (pirq <= 4) - { irq = read_config_nybble(router, 0x56, pirq - 1); - } printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", dev->vendor, dev->device, pirq, irq); return irq; @@ -497,12 +495,10 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) - { write_config_nybble(router, 0x56, pirq - 1, irq); - } return 1; } @@ -549,50 +545,49 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route if (pci_dev_present(pirq_440gx)) return 0; - switch(device) - { - case PCI_DEVICE_ID_INTEL_82371FB_0: - case PCI_DEVICE_ID_INTEL_82371SB_0: - case PCI_DEVICE_ID_INTEL_82371AB_0: - case PCI_DEVICE_ID_INTEL_82371MX: - case PCI_DEVICE_ID_INTEL_82443MX_0: - case PCI_DEVICE_ID_INTEL_82801AA_0: - case PCI_DEVICE_ID_INTEL_82801AB_0: - case PCI_DEVICE_ID_INTEL_82801BA_0: - case PCI_DEVICE_ID_INTEL_82801BA_10: - case PCI_DEVICE_ID_INTEL_82801CA_0: - case PCI_DEVICE_ID_INTEL_82801CA_12: - case PCI_DEVICE_ID_INTEL_82801DB_0: - case PCI_DEVICE_ID_INTEL_82801E_0: - case PCI_DEVICE_ID_INTEL_82801EB_0: - case PCI_DEVICE_ID_INTEL_ESB_1: - case PCI_DEVICE_ID_INTEL_ICH6_0: - case PCI_DEVICE_ID_INTEL_ICH6_1: - case PCI_DEVICE_ID_INTEL_ICH7_0: - case PCI_DEVICE_ID_INTEL_ICH7_1: - case PCI_DEVICE_ID_INTEL_ICH7_30: - case PCI_DEVICE_ID_INTEL_ICH7_31: - case PCI_DEVICE_ID_INTEL_ESB2_0: - case PCI_DEVICE_ID_INTEL_ICH8_0: - case PCI_DEVICE_ID_INTEL_ICH8_1: - case PCI_DEVICE_ID_INTEL_ICH8_2: - case PCI_DEVICE_ID_INTEL_ICH8_3: - case PCI_DEVICE_ID_INTEL_ICH8_4: - case PCI_DEVICE_ID_INTEL_ICH9_0: - case PCI_DEVICE_ID_INTEL_ICH9_1: - case PCI_DEVICE_ID_INTEL_ICH9_2: - case PCI_DEVICE_ID_INTEL_ICH9_3: - case PCI_DEVICE_ID_INTEL_ICH9_4: - case PCI_DEVICE_ID_INTEL_ICH9_5: - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: - case PCI_DEVICE_ID_INTEL_ICH10_0: - case PCI_DEVICE_ID_INTEL_ICH10_1: - case PCI_DEVICE_ID_INTEL_ICH10_2: - case PCI_DEVICE_ID_INTEL_ICH10_3: - r->name = "PIIX/ICH"; - r->get = pirq_piix_get; - r->set = pirq_piix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_1: + case PCI_DEVICE_ID_INTEL_ICH6_0: + case PCI_DEVICE_ID_INTEL_ICH6_1: + case PCI_DEVICE_ID_INTEL_ICH7_0: + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: + case PCI_DEVICE_ID_INTEL_ICH9_0: + case PCI_DEVICE_ID_INTEL_ICH9_1: + case PCI_DEVICE_ID_INTEL_ICH9_2: + case PCI_DEVICE_ID_INTEL_ICH9_3: + case PCI_DEVICE_ID_INTEL_ICH9_4: + case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: + case PCI_DEVICE_ID_INTEL_ICH10_0: + case PCI_DEVICE_ID_INTEL_ICH10_1: + case PCI_DEVICE_ID_INTEL_ICH10_2: + case PCI_DEVICE_ID_INTEL_ICH10_3: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; } return 0; } @@ -606,7 +601,7 @@ static __init int via_router_probe(struct irq_router *r, * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { - switch(router->device) { + switch (router->device) { case PCI_DEVICE_ID_VIA_82C686: /* * Asus k7m bios wrongly reports 82C686A @@ -631,7 +626,7 @@ static __init int via_router_probe(struct irq_router *r, } } - switch(device) { + switch (device) { case PCI_DEVICE_ID_VIA_82C586_0: r->name = "VIA"; r->get = pirq_via586_get; @@ -654,13 +649,12 @@ static __init int via_router_probe(struct irq_router *r, static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_VLSI_82C534: - r->name = "VLSI 82C534"; - r->get = pirq_vlsi_get; - r->set = pirq_vlsi_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; } return 0; } @@ -668,14 +662,13 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - case PCI_DEVICE_ID_SERVERWORKS_CSB5: - r->name = "ServerWorks"; - r->get = pirq_serverworks_get; - r->set = pirq_serverworks_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; } return 0; } @@ -684,7 +677,7 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, { if (device != PCI_DEVICE_ID_SI_503) return 0; - + r->name = "SIS"; r->get = pirq_sis_get; r->set = pirq_sis_set; @@ -693,47 +686,43 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_CYRIX_5520: - r->name = "NatSemi"; - r->get = pirq_cyrix_get; - r->set = pirq_cyrix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; } return 0; } static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_OPTI_82C700: - r->name = "OPTI"; - r->get = pirq_opti_get; - r->set = pirq_opti_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; } return 0; } static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_ITE_IT8330G_0: - r->name = "ITE"; - r->get = pirq_ite_get; - r->set = pirq_ite_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; } return 0; } static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { + switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); @@ -747,25 +736,24 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_AMD_VIPER_740B: - r->name = "AMD756"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7413: - r->name = "AMD766"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7443: - r->name = "AMD768"; - break; - default: - return 0; + switch (device) { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; } r->get = pirq_amd756_get; r->set = pirq_amd756_set; return 1; } - + static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { switch (device) { @@ -807,7 +795,7 @@ static struct pci_dev *pirq_router_dev; * FIXME: should we have an option to say "generic for * chipset" ? */ - + static void __init pirq_find_router(struct irq_router *r) { struct irq_routing_table *rt = pirq_table; @@ -826,7 +814,7 @@ static void __init pirq_find_router(struct irq_router *r) r->name = "default"; r->get = NULL; r->set = NULL; - + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); @@ -837,7 +825,7 @@ static void __init pirq_find_router(struct irq_router *r) return; } - for( h = pirq_routers; h->vendor; h++) { + for (h = pirq_routers; h->vendor; h++) { /* First look for a router match */ if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) break; @@ -889,7 +877,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - + DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { @@ -928,8 +916,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) */ newirq = dev->irq; if (newirq && !((1 << newirq) & mask)) { - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk("\n" KERN_WARNING + if (pci_probe & PCI_USE_PIRQ_MASK) + newirq = 0; + else + printk("\n" KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask " "- try pci=usepirqmask\n" KERN_DEBUG, newirq, pci_name(dev)); @@ -949,8 +939,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) irq = pirq & 0xf; DBG(" -> hardcoded IRQ %d\n", irq); msg = "Hardcoded"; - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { DBG(" -> got IRQ %d\n", irq); msg = "Found"; eisa_set_level_irq(irq); @@ -985,15 +975,15 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) continue; if (info->irq[pin].link == pirq) { /* We refuse to override the dev->irq information. Give a warning! */ - if ( dev2->irq && dev2->irq != irq && \ + if (dev2->irq && dev2->irq != irq && \ (!(pci_probe & PCI_USE_PIRQ_MASK) || \ - ((1 << dev2->irq) & mask)) ) { + ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", + printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", pci_name(dev2), dev2->irq, irq); #endif - continue; - } + continue; + } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) @@ -1031,8 +1021,7 @@ static void __init pcibios_fixup_irqs(void) /* * Recalculate IRQ numbers if we use the I/O APIC. */ - if (io_apic_assign_pci_irqs) - { + if (io_apic_assign_pci_irqs) { int irq; if (pin) { @@ -1045,10 +1034,10 @@ static void __init pcibios_fixup_irqs(void) * busses itself so we should get into this branch reliably. */ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", @@ -1138,7 +1127,7 @@ static int __init pcibios_irq_init(void) pirq_find_router(&pirq_router); if (pirq_table->exclusive_irqs) { int i; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (!(pirq_table->exclusive_irqs & (1 << i))) pirq_penalty[i] += 100; } @@ -1203,10 +1192,10 @@ static int pirq_enable_irq(struct pci_dev *dev) */ temp_dev = dev; while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 0cfebecf2a8..23faaa890ff 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -374,7 +374,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int early) +static void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index d9afbae5092..99f1ecd485b 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c @@ -6,45 +6,21 @@ #include <linux/init.h> #include <linux/nodemask.h> #include <mach_apic.h> +#include <asm/mpspec.h> #include "pci.h" #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -int mp_bus_id_to_node[MAX_MP_BUSSES]; #define BUS2QUAD(global) (mp_bus_id_to_node[global]) -int mp_bus_id_to_local[MAX_MP_BUSSES]; #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) -void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->mpc_busid, name, quad); -} - -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; -} /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -#ifdef CONFIG_X86_NUMAQ EXPORT_SYMBOL(xquad_portio); -#endif #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) @@ -179,6 +155,9 @@ static int __init pci_numa_init(void) { int quad; + if (!found_numaq) + return 0; + raw_pci_ops = &pci_direct_conf1_mq; if (pcibios_scanned++) diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 720c4c55453..ba263e626a6 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -27,6 +27,7 @@ #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 3fdd51497a8..19a6cfaf5db 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -16,7 +16,7 @@ #include "vextern.h" /* Just for VMAGIC. */ #undef VEXTERN -int vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = 1; extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6c388e593bc..c2cc9958087 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -12,3 +12,13 @@ config XEN This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the Xen hypervisor. + +config XEN_MAX_DOMAIN_MEMORY + int "Maximum allowed size of a domain in gigabytes" + default 8 + depends on XEN + help + The pseudo-physical to machine address array is sized + according to the maximum possible memory size of a Xen + domain. This array uses 1 page per gigabyte, so there's no + need to be too stingy here.
\ No newline at end of file diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5f..2ba2d164913 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o manage.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09c1c69c37..fe60aa9fed0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); -static /* __initdata */ struct shared_info dummy_shared_info; +struct shared_info xen_dummy_shared_info; /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. */ -struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; +struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; /* * Flag to determine whether vcpu info placement is available on all @@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; */ static int have_vcpu_info_placement = 1; -static void __init xen_vcpu_setup(int cpu) +static void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; struct vcpu_info *vcpup; - BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; if (!have_vcpu_info_placement) @@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu) } } +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + if (have_vcpu_info_placement) { + int cpu; + + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); + + xen_vcpu_setup(cpu); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); + } + + BUG_ON(!have_vcpu_info_placement); + } +} + static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); + printk(KERN_INFO "Hypervisor signature: %s%s\n", + xen_start_info->magic, + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } static void xen_cpuid(unsigned int *ax, unsigned int *bx, @@ -235,13 +265,13 @@ static void xen_irq_enable(void) { struct vcpu_info *vcpu; - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); + /* We don't need to worry about being preempted here, since + either a) interrupts are disabled, so no preemption, or b) + the caller is confused and is trying to re-enable interrupts + on an indeterminate processor. */ + vcpu = x86_read_percpu(xen_vcpu); vcpu->evtchn_upcall_mask = 0; - preempt_enable_no_resched(); /* Doesn't matter if we get preempted here, because any pending event will get dealt with anyway. */ @@ -254,7 +284,7 @@ static void xen_irq_enable(void) static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) BUG(); } @@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, xen_mc_issue(PARAVIRT_LAZY_MMU); } +static void xen_clts(void) +{ + struct multicall_space mcs; + + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_write_cr0(unsigned long cr0) +{ + struct multicall_space mcs; + + /* Only pay attention to cr0.TS; everything else is + ignored. */ + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + static void xen_write_cr2(unsigned long cr2) { x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; @@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void) static void xen_write_cr4(unsigned long cr4) { - /* Just ignore cr4 changes; Xen doesn't allow us to do - anything anyway. */ + cr4 &= ~X86_CR4_PGE; + cr4 &= ~X86_CR4_PSE; + + native_write_cr4(cr4); } static unsigned long xen_read_cr3(void) @@ -831,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -854,6 +910,8 @@ static __init void setup_shared_info(void) /* In UP this is as good a place as any to set up shared info */ xen_setup_vcpu_info_placement(); #endif + + xen_setup_mfn_list_list(); } static __init void xen_pagetable_setup_done(pgd_t *base) @@ -866,15 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } +static __init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; + + xen_mark_init_mm_pinned(); +} + /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; @@ -960,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { .banner = xen_banner, .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, - .post_allocator_init = xen_mark_init_mm_pinned, + .post_allocator_init = xen_post_allocator_init, }; static const struct pv_time_ops xen_time_ops __initdata = { @@ -978,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_debugreg = xen_set_debugreg, .get_debugreg = xen_get_debugreg, - .clts = native_clts, + .clts = xen_clts, .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, + .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, @@ -1072,9 +1138,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte = NULL, /* see xen_pagetable_setup_* */ .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, + .pte_flags = native_pte_val, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, @@ -1082,7 +1152,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud, + .set_pud = xen_set_pud_hyper, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, @@ -1114,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = { static void xen_reboot(int reason) { + struct sched_shutdown r = { .reason = reason }; + #ifdef CONFIG_SMP smp_send_stop(); #endif - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); } @@ -1173,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); + xen_setup_features(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; @@ -1182,20 +1256,24 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { + pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; + pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; + } + machine_ops = xen_machine_ops; #ifdef CONFIG_SMP smp_ops = xen_smp_ops; #endif - xen_setup_features(); - /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) - phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + xen_build_dynamic_phys_to_machine(); pgd = (pgd_t *)xen_start_info->pt_base; + init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; init_mm.pgd = pgd; /* use the Xen pagetables to start */ @@ -1232,9 +1310,12 @@ asmlinkage void __init xen_start_kernel(void) ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - if (!is_initial_xendomain()) + if (!is_initial_xendomain()) { + add_preferred_console("xenboot", 0, NULL); + add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } /* Start the world */ - start_kernel(); + i386_start_kernel(); } diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc..00000000000 --- a/arch/x86/xen/manage.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Handle extern requests for shutdown, reboot and sysrq - */ -#include <linux/kernel.h> -#include <linux/err.h> -#include <linux/reboot.h> -#include <linux/sysrq.h> - -#include <xen/xenbus.h> - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 - -/* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; - -static void shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - char *str; - struct xenbus_transaction xbt; - int err; - - if (shutting_down != SHUTDOWN_INVALID) - return; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - - str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); - /* Ignore read errors and empty reads. */ - if (XENBUS_IS_ERR_READ(str)) { - xenbus_transaction_end(xbt, 1); - return; - } - - xenbus_write(xbt, "control", "shutdown", ""); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) { - kfree(str); - goto again; - } - - if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) - orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) - ctrl_alt_del(); - else { - printk(KERN_INFO "Ignoring shutdown request: %s\n", str); - shutting_down = SHUTDOWN_INVALID; - } - - kfree(str); -} - -static void sysrq_handler(struct xenbus_watch *watch, const char **vec, - unsigned int len) -{ - char sysrq_key = '\0'; - struct xenbus_transaction xbt; - int err; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { - printk(KERN_ERR "Unable to read sysrq code in " - "control/sysrq\n"); - xenbus_transaction_end(xbt, 1); - return; - } - - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - - if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); -} - -static struct xenbus_watch shutdown_watch = { - .node = "control/shutdown", - .callback = shutdown_handler -}; - -static struct xenbus_watch sysrq_watch = { - .node = "control/sysrq", - .callback = sysrq_handler -}; - -static int setup_shutdown_watcher(void) -{ - int err; - - err = register_xenbus_watch(&shutdown_watch); - if (err) { - printk(KERN_ERR "Failed to set shutdown watcher\n"); - return err; - } - - err = register_xenbus_watch(&sysrq_watch); - if (err) { - printk(KERN_ERR "Failed to set sysrq watcher\n"); - return err; - } - - return 0; -} - -static int shutdown_event(struct notifier_block *notifier, - unsigned long event, - void *data) -{ - setup_shutdown_watcher(); - return NOTIFY_DONE; -} - -static int __init setup_shutdown_event(void) -{ - static struct notifier_block xenstore_notifier = { - .notifier_call = shutdown_event - }; - register_xenstore_notifier(&xenstore_notifier); - - return 0; -} - -subsys_initcall(setup_shutdown_event); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4e527e7893a..42b3b9ed641 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,6 +56,131 @@ #include "multicalls.h" #include "mmu.h" +#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) +#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) + +/* Placeholder for holes in the address space */ +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; + + /* Array of pointers to pages containing p2m entries */ +static unsigned long *p2m_top[TOP_ENTRIES] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; + +/* Arrays of p2m arrays expressed in mfns used for save/restore */ +static unsigned long p2m_top_mfn[TOP_ENTRIES] + __attribute__((section(".bss.page_aligned"))); + +static unsigned long p2m_top_mfn_list[ + PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] + __attribute__((section(".bss.page_aligned"))); + +static inline unsigned p2m_top_index(unsigned long pfn) +{ + BUG_ON(pfn >= MAX_DOMAIN_PAGES); + return pfn / P2M_ENTRIES_PER_PAGE; +} + +static inline unsigned p2m_index(unsigned long pfn) +{ + return pfn % P2M_ENTRIES_PER_PAGE; +} + +/* Build the parallel p2m_top_mfn structures */ +void xen_setup_mfn_list_list(void) +{ + unsigned pfn, idx; + + for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); + } + + for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { + unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; + p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); + } + + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(p2m_top_mfn_list); + HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; +} + +/* Set up p2m_top to point to the domain-builder provided p2m pages */ +void __init xen_build_dynamic_phys_to_machine(void) +{ + unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); + unsigned pfn; + + for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top[topidx] = &mfn_list[pfn]; + } +} + +unsigned long get_phys_to_machine(unsigned long pfn) +{ + unsigned topidx, idx; + + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) + return INVALID_P2M_ENTRY; + + topidx = p2m_top_index(pfn); + idx = p2m_index(pfn); + return p2m_top[topidx][idx]; +} +EXPORT_SYMBOL_GPL(get_phys_to_machine); + +static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) +{ + unsigned long *p; + unsigned i; + + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) + p[i] = INVALID_P2M_ENTRY; + + if (cmpxchg(pp, p2m_missing, p) != p2m_missing) + free_page((unsigned long)p); + else + *mfnp = virt_to_mfn(p); +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { + BUG_ON(mfn != INVALID_P2M_ENTRY); + return; + } + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == p2m_missing) { + /* no need to allocate a page to store an invalid entry */ + if (mfn == INVALID_P2M_ENTRY) + return; + alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); + } + + idx = p2m_index(pfn); + p2m_top[topidx][idx] = mfn; +} + xmaddr_t arbitrary_virt_to_machine(unsigned long address) { unsigned int level; @@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) } -void xen_set_pmd(pmd_t *ptr, pmd_t val) +static bool page_pinned(void *ptr) +{ + struct page *page = virt_to_page(ptr); + + return PagePinned(page); +} + +static void extend_mmu_update(const struct mmu_update *update) { struct multicall_space mcs; struct mmu_update *u; - preempt_disable(); + mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); + + if (mcs.mc != NULL) + mcs.mc->args[1]++; + else { + mcs = __xen_mc_entry(sizeof(*u)); + MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); + } - mcs = xen_mc_entry(sizeof(*u)); u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pmd_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + *u = *update; +} + +void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) +{ + struct mmu_update u; + + preempt_disable(); + + xen_mc_batch(); + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pmd_val_ma(val); + extend_mmu_update(&u); xen_mc_issue(PARAVIRT_LAZY_MMU); preempt_enable(); } +void xen_set_pmd(pmd_t *ptr, pmd_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pmd_hyper(ptr, val); +} + /* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. @@ -179,6 +340,26 @@ out: preempt_enable(); } +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + /* Just return the pte as-is. We preserve the bits on commit */ + return *ptep; +} + +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + struct mmu_update u; + + xen_mc_batch(); + + u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u.val = pte_val_ma(pte); + extend_mmu_update(&u); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + /* Assume pteval_t is equivalent to all the other *val_t types. */ static pteval_t pte_mfn_to_pfn(pteval_t val) { @@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd) return pte_mfn_to_pfn(pmd.pmd); } -void xen_set_pud(pud_t *ptr, pud_t val) +void xen_set_pud_hyper(pud_t *ptr, pud_t val) { - struct multicall_space mcs; - struct mmu_update *u; + struct mmu_update u; preempt_disable(); - mcs = xen_mc_entry(sizeof(*u)); - u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pud_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + xen_mc_batch(); + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pud_val_ma(val); + extend_mmu_update(&u); xen_mc_issue(PARAVIRT_LAZY_MMU); preempt_enable(); } +void xen_set_pud(pud_t *ptr, pud_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pud_hyper(ptr, val); +} + void xen_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; @@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) void xen_pmd_clear(pmd_t *pmdp) { - xen_set_pmd(pmdp, __pmd(0)); + set_pmd(pmdp, __pmd(0)); } pmd_t xen_make_pmd(pmdval_t pmd) @@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); @@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) spin_lock(&mm->page_table_lock); /* pgd may not be pinned in the error exit path of execve */ - if (PagePinned(virt_to_page(mm->pgd))) + if (page_pinned(mm->pgd)) xen_pgd_unpin(mm->pgd); spin_unlock(&mm->page_table_lock); diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe961caffd..297bf9f5b8b 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -25,10 +25,6 @@ enum pt_level { void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -void xen_set_pte(pte_t *ptep, pte_t pteval); -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); -void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); @@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t); pmd_t xen_make_pmd(pmdval_t); pgd_t xen_make_pgd(pgdval_t); +void xen_set_pte(pte_t *ptep, pte_t pteval); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud(pud_t *ptr, pud_t val); +void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); +void xen_set_pud_hyper(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e375..3c63c4da7ed 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -29,14 +29,14 @@ #define MC_DEBUG 1 #define MC_BATCH 32 -#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) +#define MC_ARGS (MC_BATCH * 16) struct mc_buffer { struct multicall_entry entries[MC_BATCH]; #if MC_DEBUG struct multicall_entry debug[MC_BATCH]; #endif - u64 args[MC_ARGS]; + unsigned char args[MC_ARGS]; struct callback { void (*fn)(void *); void *data; @@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) { struct mc_buffer *b = &__get_cpu_var(mc_buffer); struct multicall_space ret; - unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); + unsigned argidx = roundup(b->argidx, sizeof(u64)); BUG_ON(preemptible()); - BUG_ON(argspace > MC_ARGS); + BUG_ON(b->argidx > MC_ARGS); if (b->mcidx == MC_BATCH || - (b->argidx + argspace) > MC_ARGS) + (argidx + args) > MC_ARGS) { xen_mc_flush(); + argidx = roundup(b->argidx, sizeof(u64)); + } ret.mc = &b->entries[b->mcidx]; b->mcidx++; + ret.args = &b->args[argidx]; + b->argidx = argidx + args; + + BUG_ON(b->argidx > MC_ARGS); + return ret; +} + +struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) +{ + struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct multicall_space ret = { NULL, NULL }; + + BUG_ON(preemptible()); + BUG_ON(b->argidx > MC_ARGS); + + if (b->mcidx == 0) + return ret; + + if (b->entries[b->mcidx - 1].op != op) + return ret; + + if ((b->argidx + size) > MC_ARGS) + return ret; + + ret.mc = &b->entries[b->mcidx - 1]; ret.args = &b->args[b->argidx]; - b->argidx += argspace; + b->argidx += size; + BUG_ON(b->argidx > MC_ARGS); return ret; } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a..85893824161 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) /* Set up a callback to be called when the current batch is flushed */ void xen_mc_callback(void (*fn)(void *), void *data); +/* + * Try to extend the arguments of the previous multicall command. The + * previous command's op must match. If it does, then it attempts to + * extend the argument space allocated to the multicall entry by + * arg_size bytes. + * + * The returned multicall_space will return with mc pointing to the + * command on success, or NULL on failure, and args pointing to the + * newly allocated space. + */ +struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); + #endif /* _XEN_MULTICALLS_H */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752..a2957580320 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -16,6 +16,7 @@ #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/page.h> #include <xen/interface/callback.h> #include <xen/interface/physdev.h> #include <xen/features.h> @@ -27,8 +28,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -unsigned long *phys_to_machine_mapping; -EXPORT_SYMBOL(phys_to_machine_mapping); /** * machine_specific_memory_setup - Hook for machine specific memory setup. @@ -38,9 +37,11 @@ char * __init xen_memory_setup(void) { unsigned long max_pfn = xen_start_info->nr_pages; + max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); return "Xen"; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f98..d2e3c20127d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; @@ -65,6 +65,12 @@ static struct call_data_struct *call_data; */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { +#ifdef CONFIG_X86_32 + __get_cpu_var(irq_stat).irq_resched_count++; +#else + add_pda(irq_resched_count, 1); +#endif + return IRQ_HANDLED; } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 00000000000..251669a932d --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,45 @@ +#include <linux/types.h> + +#include <xen/interface/xen.h> +#include <xen/grant_table.h> +#include <xen/events.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + xen_setup_shared_info(); + + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + xen_vcpu_restore(); + xen_timer_resume(); + } + +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 41e217503c9..64f0038b955 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_timer_resume(void) +{ + int cpu; + + if (xen_clockevent != &xen_vcpuop_clockevent) + return; + + for_each_online_cpu(cpu) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + BUG(); + } +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6ec3b4f7719..7c0cf6320a0 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -7,6 +7,7 @@ #include <linux/init.h> #include <asm/boot.h> #include <xen/interface/elfnote.h> +#include <asm/xen/interface.h> __INIT ENTRY(startup_xen) @@ -32,5 +33,9 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, + .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae0803..9a055592a30 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,18 +9,26 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); extern struct start_info *xen_start_info; +extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; +void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); + char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void xen_vcpu_restore(void); + +void __init xen_build_dynamic_phys_to_machine(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); @@ -29,6 +37,7 @@ void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); +void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ |