aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig21
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/Makefile18
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/boot/cpu.c26
-rw-r--r--arch/x86/boot/mkcpustr.c49
-rw-r--r--arch/x86/ia32/ia32_aout.c3
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c47
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c61
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c5
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c83
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c107
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c35
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c74
-rw-r--r--arch/x86/kernel/cpuid.c52
-rw-r--r--arch/x86/kernel/efi.c57
-rw-r--r--arch/x86/kernel/efi_64.c22
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_64.S19
-rw-r--r--arch/x86/kernel/ldt.c3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c5
-rw-r--r--arch/x86/kernel/mpparse_32.c6
-rw-r--r--arch/x86/kernel/msr.c14
-rw-r--r--arch/x86/kernel/pci-calgary_64.c34
-rw-r--r--arch/x86/kernel/pci-gart_64.c53
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/quirks.c26
-rw-r--r--arch/x86/kernel/setup_32.c18
-rw-r--r--arch/x86/kernel/setup_64.c113
-rw-r--r--arch/x86/kernel/smpboot_32.c2
-rw-r--r--arch/x86/kernel/srat_32.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S4
-rw-r--r--arch/x86/kernel/test_nx.c14
-rw-r--r--arch/x86/kernel/trampoline_32.S7
-rw-r--r--arch/x86/kernel/trampoline_64.S3
-rw-r--r--arch/x86/kernel/traps_32.c15
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/bitops_32.c2
-rw-r--r--arch/x86/lib/bitops_64.c2
-rw-r--r--arch/x86/lib/bitstr_64.c28
-rw-r--r--arch/x86/lib/delay_32.c4
-rw-r--r--arch/x86/lib/delay_64.c4
-rw-r--r--arch/x86/lib/mmx_32.c31
-rw-r--r--arch/x86/lib/usercopy_32.c12
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mm/discontig_32.c3
-rw-r--r--arch/x86/mm/fault.c62
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c62
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/numa_64.c12
-rw-r--r--arch/x86/mm/pageattr-test.c68
-rw-r--r--arch/x86/mm/pageattr.c389
-rw-r--r--arch/x86/mm/pgtable_32.c73
-rw-r--r--arch/x86/mm/srat_64.c3
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/numa.c52
79 files changed, 1054 insertions, 965 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7109037bdf7..c95482b6b6d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,8 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select HAVE_OPROFILE
+ select HAVE_KPROBES
config GENERIC_LOCKBREAK
def_bool n
@@ -103,13 +105,12 @@ config GENERIC_TIME_VSYSCALL
bool
default X86_64
+config ARCH_HAS_CPU_RELAX
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
-config ARCH_SUPPORTS_OPROFILE
- bool
- default y
-
select HAVE_KVM
config ARCH_HIBERNATION_POSSIBLE
@@ -204,8 +205,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also the <file:Documentation/smp.txt>,
- <file:Documentation/i386/IO-APIC.txt>,
+ See also <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
@@ -309,6 +309,7 @@ config X86_RDC321X
select M486
select X86_REBOOTFIXUPS
select GENERIC_GPIO
+ select LEDS_CLASS
select LEDS_GPIO
help
This option is needed for RDC R-321x system-on-chip, also known
@@ -417,7 +418,7 @@ config HPET_TIMER
config HPET_EMULATE_RTC
def_bool y
- depends on HPET_TIMER && (RTC=y || RTC=m)
+ depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
# Mark as embedded because too many people got it wrong.
# The code disables itself when not needed.
@@ -467,6 +468,9 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
+config IOMMU_HELPER
+ def_bool (CALGARY_IOMMU || GART_IOMMU)
+
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
@@ -630,7 +634,6 @@ config TOSHIBA
config I8K
tristate "Dell laptop support"
- depends on X86_32
---help---
This adds a driver to safely access the System Management Mode
of the CPU on the Dell Inspiron 8000. The System Management Mode
@@ -1597,8 +1600,6 @@ source "drivers/firmware/Kconfig"
source "fs/Kconfig"
-source "kernel/Kconfig.instrumentation"
-
source "arch/x86/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e1e3af28c3..fa555148823 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
- bool "CPA self test code"
+ bool "CPA self-test code"
depends on DEBUG_KERNEL
help
- Do change_page_attr self tests at boot.
+ Do change_page_attr() self-tests every 30 seconds.
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98bed5..364865b1b08 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
LDFLAGS := -m elf_$(UTS_MACHINE)
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
# Speed up the build
KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a39c4..f88458e83ef 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
#RAMDISK := -DRAMDISK=512
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
-subdir- := compressed
+subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
-hostprogs-y := tools/build
+hostprogs-y := mkcpustr tools/build
-HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR $@
+ cmd_cpustr = $(obj)/mkcpustr > $@
+targets += cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+ $(call if_changed,cpustr)
# ---------------------------------------------------------------------------
@@ -80,6 +88,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
@@ -90,7 +99,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_setup.bin := -O binary
-
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
-FDARGS =
+FDARGS =
# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
FDINITRD =
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24ceabd90..d2b9f3bb87c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
$(call if_changed,ld)
@:
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a7f0d..e8657b98c90 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
- addl $(LARGE_PAGE_SIZE -1), %ebx
- andl $LARGE_PAGE_MASK, %ebx
+ addl $(PMD_PAGE_SIZE -1), %ebx
+ andl $PMD_PAGE_MASK, %ebx
#else
movl $CONFIG_PHYSICAL_START, %ebx
#endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(LARGE_PAGE_SIZE - 1), %rbp
- andq $LARGE_PAGE_MASK, %rbp
+ addq $(PMD_PAGE_SIZE - 1), %rbp
+ andq $PMD_PAGE_MASK, %rbp
movq %rbp, %rbx
#else
movq $CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32da585..00e19edd852 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
* ----------------------------------------------------------------------- */
/*
- * arch/i386/boot/cpu.c
+ * arch/x86/boot/cpu.c
*
* Check for obligatory CPU features and abort if the features are not
* present.
@@ -19,6 +19,8 @@
#include "bitops.h"
#include <asm/cpufeature.h>
+#include "cpustr.h"
+
static char *cpu_name(int level)
{
static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
+ const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
@@ -51,13 +54,26 @@ int validate_cpu(void)
puts("This kernel requires the following features "
"not present on the CPU:\n");
+ msg_strs = (const unsigned char *)x86_cap_strs;
+
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
- if (e & 1)
- printf("%d:%d ", i, j);
-
+ int n = (i << 5)+j;
+ if (*msg_strs < n) {
+ /* Skip to the next string */
+ do {
+ msg_strs++;
+ } while (*msg_strs);
+ msg_strs++;
+ }
+ if (e & 1) {
+ if (*msg_strs == n && msg_strs[1])
+ printf("%s ", msg_strs+1);
+ else
+ printf("%d:%d ", i, j);
+ }
e >>= 1;
}
}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 00000000000..bbe76953bae
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../kernel/cpu/feature_names.c"
+
+#if NCAPFLAGS > 8
+# error "Need to adjust the boot code handling of CPUID strings"
+#endif
+
+int main(void)
+{
+ int i;
+ const char *str;
+
+ printf("static const char x86_cap_strs[] = \n");
+
+ for (i = 0; i < NCAPINTS*32; i++) {
+ str = x86_cap_flags[i];
+
+ if (i == NCAPINTS*32-1) {
+ /* The last entry must be unconditional; this
+ also consumes the compiler-added null character */
+ if (!str)
+ str = "";
+ printf("\t\"\\x%02x\"\"%s\"\n", i, str);
+ } else if (str) {
+ printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+ "\t\"\\x%02x\"\"%s\\0\"\n"
+ "#endif\n",
+ i >> 5, i & 31, i, str);
+ }
+ }
+ printf("\t;\n");
+ return 0;
+}
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index e4c12079171..58cccb6483b 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -172,8 +172,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
has_dumped = 1;
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
- dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) -
- ((unsigned long)(&dump)));
+ dump.u_ar0 = offsetof(struct user32, regs);
dump.signal = signr;
dump_thread32(regs, &dump);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 0db0a6291bb..8022d3c695c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -722,7 +722,9 @@ ia32_sys_call_table:
.quad sys_epoll_pwait
.quad compat_sys_utimensat /* 320 */
.quad compat_sys_signalfd
- .quad compat_sys_timerfd
+ .quad sys_timerfd_create
.quad sys_eventfd
.quad sys32_fallocate
+ .quad compat_sys_timerfd_settime /* 325 */
+ .quad compat_sys_timerfd_gettime
ia32_syscall_end:
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f813009d44..21dc1a061bf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o
-obj-$(CONFIG_APM) += apm_32.o
+apm-y := apm_32.o
+obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o
obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o
@@ -74,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
obj-y += pcspeaker.o
endif
-obj-$(CONFIG_SCx200) += scx200_32.o
+obj-$(CONFIG_SCx200) += scx200.o
+scx200-y += scx200_32.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d2a58431a07..680b7300a48 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -78,7 +78,6 @@ int acpi_ht __initdata = 1; /* enable HT */
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
-EXPORT_SYMBOL(acpi_strict);
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
@@ -106,7 +105,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
#ifdef CONFIG_X86_64
/* rely on all ACPI tables being in the direct mapping */
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
{
if (!phys_addr || !size)
return NULL;
@@ -131,7 +130,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
* from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
* count idx down while incrementing the phys address.
*/
-char *__acpi_map_table(unsigned long phys, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
unsigned long base, offset, mapped_size;
int idx;
@@ -490,8 +489,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
return irq;
}
-EXPORT_SYMBOL(acpi_register_gsi);
-
/*
* ACPI based hotplug support for CPU
*/
@@ -587,25 +584,6 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
EXPORT_SYMBOL(acpi_unregister_ioapic);
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
struct acpi_table_boot *sb;
@@ -748,27 +726,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
return 0;
}
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi_enabled) {
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- return efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- return efi.acpi;
- }
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp(0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Parse LAPIC entries in MADT
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index a25db514c71..324eb0cab19 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -46,6 +46,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
buf[1] = 1;
buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+ /*
+ * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so
+ * that OSPM is capable of native ACPI throttling software
+ * coordination using BIOS supplied _TSD info.
+ */
+ buf[2] |= ACPI_PDC_SMP_T_SWCOORD;
if (cpu_has(c, X86_FEATURE_EST))
buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3bd76..a0c4d7c5dbd 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
#
obj-y := intel_cacheinfo.o addon_cpuid_features.o
+obj-y += feature_names.o
obj-$(CONFIG_X86_32) += common.o proc.o bugs.o
obj-$(CONFIG_X86_32) += amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142b58e..f86a3c4a266 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
* They will insert themselves into the cpu_devs structure.
* Then, when cpu_init() is called, we can just iterate over that array.
*/
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
void __init early_cpu_init(void)
{
intel_cpu_init();
@@ -647,7 +637,7 @@ void __init early_cpu_init(void)
}
/* Make sure %fs is initialized properly in idle threads */
-struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a5beb..e0b38c33d84 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
extern void early_init_intel(struct cpuinfo_x86 *c);
extern void early_init_amd(struct cpuinfo_x86 *c);
+/* Specific CPU type init functions */
+int intel_cpu_init(void);
+int amd_init_cpu(void);
+int cyrix_init_cpu(void);
+int nsc_init_cpu(void);
+int centaur_init_cpu(void);
+int transmeta_init_cpu(void);
+int nexgen_init_cpu(void);
+int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 151eda0a23f..cb7a5715596 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -29,7 +29,7 @@ config X86_ACPI_CPUFREQ
config ELAN_CPUFREQ
tristate "AMD Elan SC400 and SC410"
select CPU_FREQ_TABLE
- depends on X86_32 && X86_ELAN
+ depends on X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC400 and SC410
processors.
@@ -45,7 +45,7 @@ config ELAN_CPUFREQ
config SC520_CPUFREQ
tristate "AMD Elan SC520"
select CPU_FREQ_TABLE
- depends on X86_32 && X86_ELAN
+ depends on X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC520 processor.
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 326a4c81f68..39f8cb18296 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -23,6 +23,7 @@
#define EPS_BRAND_C7 1
#define EPS_BRAND_EDEN 2
#define EPS_BRAND_C3 3
+#define EPS_BRAND_C7D 4
struct eps_cpu_data {
u32 fsb;
@@ -54,6 +55,7 @@ static int eps_set_state(struct eps_cpu_data *centaur,
{
struct cpufreq_freqs freqs;
u32 lo, hi;
+ u8 current_multiplier, current_voltage;
int err = 0;
int i;
@@ -93,6 +95,15 @@ postchange:
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+ /* Print voltage and multiplier */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ current_voltage = lo & 0xff;
+ printk(KERN_INFO "eps: Current voltage = %dmV\n",
+ current_voltage * 16 + 700);
+ current_multiplier = (lo >> 8) & 0xff;
+ printk(KERN_INFO "eps: Current multiplier = %d\n",
+ current_multiplier);
+
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
return err;
}
@@ -141,9 +152,10 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
u8 current_multiplier, current_voltage;
u8 max_multiplier, max_voltage;
u8 min_multiplier, min_voltage;
- u8 brand;
+ u8 brand = 0;
u32 fsb;
struct eps_cpu_data *centaur;
+ struct cpuinfo_x86 *c = &cpu_data(0);
struct cpufreq_frequency_table *f_table;
int k, step, voltage;
int ret;
@@ -153,21 +165,36 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
return -ENODEV;
/* Check brand */
- printk("eps: Detected VIA ");
- rdmsr(0x1153, lo, hi);
- brand = (((lo >> 2) ^ lo) >> 18) & 3;
+ printk(KERN_INFO "eps: Detected VIA ");
+
+ switch (c->x86_model) {
+ case 10:
+ rdmsr(0x1153, lo, hi);
+ brand = (((lo >> 2) ^ lo) >> 18) & 3;
+ printk(KERN_CONT "Model A ");
+ break;
+ case 13:
+ rdmsr(0x1154, lo, hi);
+ brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
+ printk(KERN_CONT "Model D ");
+ break;
+ }
+
switch(brand) {
case EPS_BRAND_C7M:
- printk("C7-M\n");
+ printk(KERN_CONT "C7-M\n");
break;
case EPS_BRAND_C7:
- printk("C7\n");
+ printk(KERN_CONT "C7\n");
break;
case EPS_BRAND_EDEN:
- printk("Eden\n");
+ printk(KERN_CONT "Eden\n");
+ break;
+ case EPS_BRAND_C7D:
+ printk(KERN_CONT "C7-D\n");
break;
case EPS_BRAND_C3:
- printk("C3\n");
+ printk(KERN_CONT "C3\n");
return -ENODEV;
break;
}
@@ -179,7 +206,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
if (!(val & 1 << 16)) {
- printk("eps: Can't enable Enhanced PowerSaver\n");
+ printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}
}
@@ -187,19 +214,19 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
- printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
current_multiplier = (lo >> 8) & 0xff;
- printk("eps: Current multiplier = %d\n", current_multiplier);
+ printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
/* Print limits */
max_voltage = hi & 0xff;
- printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
max_multiplier = (hi >> 8) & 0xff;
- printk("eps: Highest multiplier = %d\n", max_multiplier);
+ printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
min_voltage = (hi >> 16) & 0xff;
- printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
min_multiplier = (hi >> 24) & 0xff;
- printk("eps: Lowest multiplier = %d\n", min_multiplier);
+ printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
/* Sanity checks */
if (current_multiplier == 0 || max_multiplier == 0
@@ -208,7 +235,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
if (current_multiplier > max_multiplier
|| max_multiplier <= min_multiplier)
return -EINVAL;
- if (current_voltage > 0x1c || max_voltage > 0x1c)
+ if (current_voltage > 0x1f || max_voltage > 0x1f)
return -EINVAL;
if (max_voltage < min_voltage)
return -EINVAL;
@@ -310,7 +337,7 @@ static int __init eps_init(void)
/* This driver will work only on Centaur C7 processors with
* Enhanced SpeedStep/PowerSaver registers */
if (c->x86_vendor != X86_VENDOR_CENTAUR
- || c->x86 != 6 || c->x86_model != 10)
+ || c->x86 != 6 || c->x86_model < 10)
return -ENODEV;
if (!cpu_has(c, X86_FEATURE_EST))
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 2ed7db2fd25..9d9eae82e60 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -181,8 +181,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
struct pci_dev *gx_pci = NULL;
/* check if CPU is a MediaGX or a Geode. */
- if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
- (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+ if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
+ (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
dprintk("error: no MediaGX/Geode processor found!\n");
return NULL;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index b5a9863d6cd..0a61159d7b7 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -460,7 +460,7 @@ static int powernow_decode_bios (int maxfid, int startvid)
latency = psb->settlingtime;
if (latency < 100) {
- printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
+ printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. "
"Should be at least 100. Correcting.\n", latency);
latency = 100;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a0522735dd9..c99d59d8ef2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -578,10 +578,9 @@ static void print_basics(struct powernow_k8_data *data)
for (j = 0; j < data->numps; j++) {
if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
if (cpu_family == CPU_HW_PSTATE) {
- printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n",
+ printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n",
j,
- (data->powernow_table[j].index & 0xff00) >> 8,
- (data->powernow_table[j].index & 0xff0000) >> 16,
+ data->powernow_table[j].index,
data->powernow_table[j].frequency/1000);
} else {
printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n",
@@ -827,7 +826,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
- u32 hi = 0, lo = 0;
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
@@ -1236,8 +1234,10 @@ static unsigned int powernowk8_get (unsigned int cpu)
struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
+ unsigned int first;
- data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu)));
+ first = first_cpu(per_cpu(cpu_core_map, cpu));
+ data = per_cpu(powernow_data, first);
if (!data)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index afd2b520d35..ab48cfed4d9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -47,7 +47,7 @@ struct powernow_k8_data {
#define CPUID_XFAM 0x0ff00000 /* extended family */
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
-#define CPUID_XMOD_REV_MASK 0x00080000
+#define CPUID_XMOD_REV_MASK 0x000c0000
#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 76c3ab0da46..98d4fdb7dc0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -189,10 +189,7 @@ static unsigned int pentium4_get_frequency(void)
printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
/* Multiplier. */
- if (c->x86_model < 2)
- mult = msr_lo >> 27;
- else
- mult = msr_lo >> 24;
+ mult = msr_lo >> 24;
dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 404a6a2d401..7139b026270 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -83,8 +83,6 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445";
* FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
*/
-extern void calibrate_delay(void) __init;
-
static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
{
unsigned long flags;
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 00000000000..ee975ac6bbc
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
+/*
+ * Strings for the various x86 capability flags.
+ *
+ * This file must not contain any executable code.
+ */
+
+#include "asm/cpufeature.h"
+
+/*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned. Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+const char * const x86_cap_flags[NCAPINTS*32] = {
+ /* Intel-defined */
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+ /* AMD-defined */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+ "3dnowext", "3dnow",
+
+ /* Transmeta-defined */
+ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Other (Linux-defined) */
+ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+ NULL, NULL, NULL, NULL,
+ "constant_tsc", "up", NULL, "arch_perfmon",
+ "pebs", "bts", NULL, NULL,
+ "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Intel-defined (#2) */
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+ NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* VIA/Cyrix/Centaur-defined */
+ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+ "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* AMD-defined (#2) */
+ "lahf_lm", "cmp_legacy", "svm", "extapic",
+ "cr8_legacy", "abm", "sse4a", "misalignsse",
+ "3dnowprefetch", "osvw", "ibs", "sse5",
+ "skinit", "wdt", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Auxiliary (Linux-defined) */
+ "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+const char *const x86_power_flags[32] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
+};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b018d..fae31ce747b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/ds.h>
+#include <asm/bugs.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c70f88..ff14c320040 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
#include "mtrr.h"
-int arr3_protected;
-
static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 4:
return replace_reg;
case 3:
- if (arr3_protected)
- break;
case 2:
case 1:
case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
} else {
for (i = 0; i < 7; i++) {
cyrix_get_arr(i, &lbase, &lsize, &ltype);
- if ((i == 3) && arr3_protected)
- continue;
if (lsize == 0)
return i;
}
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
post_set();
}
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- * if it is set, write a warning message: ARR3 cannot be changed!
- * (it cannot be changed until the next processor reset)
- * if it is reset, then we can change it, set all the needed bits:
- * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- * - disable access to SMM memory (CCR1 bit 2 reset)
- * - disable SMM mode (CCR1 bit 1 reset)
- * - disable write protection of ARR3 (CCR6 bit 1 reset)
- * - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
- struct set_mtrr_context ctxt;
- unsigned char ccr[7];
- int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
- int i;
-#endif
-
- /* flush cache and enable MAPEN */
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- /* Save all CCRs locally */
- ccr[0] = getCx86(CX86_CCR0);
- ccr[1] = getCx86(CX86_CCR1);
- ccr[2] = getCx86(CX86_CCR2);
- ccr[3] = ctxt.ccr3;
- ccr[4] = getCx86(CX86_CCR4);
- ccr[5] = getCx86(CX86_CCR5);
- ccr[6] = getCx86(CX86_CCR6);
-
- if (ccr[3] & 1) {
- ccrc[3] = 1;
- arr3_protected = 1;
- } else {
- /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
- * access to SMM memory through ARR3 (bit 7).
- */
- if (ccr[1] & 0x80) {
- ccr[1] &= 0x7f;
- ccrc[1] |= 0x80;
- }
- if (ccr[1] & 0x04) {
- ccr[1] &= 0xfb;
- ccrc[1] |= 0x04;
- }
- if (ccr[1] & 0x02) {
- ccr[1] &= 0xfd;
- ccrc[1] |= 0x02;
- }
- arr3_protected = 0;
- if (ccr[6] & 0x02) {
- ccr[6] &= 0xfd;
- ccrc[6] = 1; /* Disable write protection of ARR3 */
- setCx86(CX86_CCR6, ccr[6]);
- }
- /* Disable ARR3. This is safe now that we disabled SMM. */
- /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
- }
- /* If we changed CCR1 in memory, change it in the processor, too. */
- if (ccrc[1])
- setCx86(CX86_CCR1, ccr[1]);
-
- /* Enable ARR usage by the processor */
- if (!(ccr[5] & 0x20)) {
- ccr[5] |= 0x20;
- ccrc[5] = 1;
- setCx86(CX86_CCR5, ccr[5]);
- }
-#ifdef CONFIG_SMP
- for (i = 0; i < 7; i++)
- ccr_state[i] = ccr[i];
- for (i = 0; i < 8; i++)
- cyrix_get_arr(i,
- &arr_state[i].base, &arr_state[i].size,
- &arr_state[i].type);
-#endif
-
- set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
-
- if (ccrc[5])
- printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
- if (ccrc[3])
- printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
- if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
- if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
- if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
- if (ccrc[6])
- printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
static struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
// .init = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 71591958265..b6e136f23d3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
-#ifndef CONFIG_X86_64
-extern int arr3_protected;
-#else
-#define arr3_protected 0
-#endif
-
void set_mtrr_ops(struct mtrr_ops * ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
goto out;
}
- if (is_cpu(CYRIX) && !use_intel()) {
- if ((reg == 3) && arr3_protected) {
- printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
- goto out;
- }
- }
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
@@ -566,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
static void __init init_ifs(void)
{
#ifndef CONFIG_X86_64
@@ -675,7 +659,7 @@ static __init int amd_special_default_mtrr(void)
*/
int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
- unsigned long i, base, size, highest_addr = 0, def, dummy;
+ unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 trim_start, trim_size;
@@ -698,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
mtrr_if->get(i, &base, &size, &type);
if (type != MTRR_TYPE_WRBACK)
continue;
- base <<= PAGE_SHIFT;
- size <<= PAGE_SHIFT;
- if (highest_addr < base + size)
- highest_addr = base + size;
+ if (highest_pfn < base + size)
+ highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
- if (!highest_addr) {
+ if (!highest_pfn) {
printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
WARN_ON(1);
return 0;
}
- if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+ if (highest_pfn < end_pfn) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %LdMB of RAM.\n",
- (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+ " all of memory, losing %luMB of RAM.\n",
+ (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
- trim_start = highest_addr;
+ trim_start = highest_pfn;
+ trim_start <<= PAGE_SHIFT;
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c2081..2cc77eb6fea 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
+/* CPU specific mtrr init functions */
+int amd_init_mtrr(void);
+int cyrix_init_mtrr(void);
+int centaur_init_mtrr(void);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 02821326014..af11d31dce0 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
*/
static int show_cpuinfo(struct seq_file *m, void *v)
{
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char * const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char * const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* constant_tsc - moved to flags */
- /* nothing */
- };
struct cpuinfo_x86 *c = v;
int i, n = 0;
int fpu_exception;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d800f..288e7a6598a 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
* and then read in chunks of 16 bytes. A larger size means multiple
* reads of consecutive levels.
*
+ * The lower 32 bits of the file position is used as the incoming %eax,
+ * and the upper 32 bits of the file position as the incoming %ecx,
+ * the latter intended for "counting" eax levels like eax=4.
+ *
* This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
* an SMP box will direct the access to CPU %d.
*/
@@ -43,35 +47,24 @@
static struct class *cpuid_class;
-struct cpuid_command {
- u32 reg;
- u32 *data;
+struct cpuid_regs {
+ u32 eax, ebx, ecx, edx;
};
static void cpuid_smp_cpuid(void *cmd_block)
{
- struct cpuid_command *cmd = cmd_block;
-
- cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
- &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- struct cpuid_command cmd;
-
- cmd.reg = reg;
- cmd.data = data;
+ struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ cpuid_count(cmd->eax, cmd->ecx,
+ &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
}
static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
-
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
default:
ret = -EINVAL;
}
-
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
size_t count, loff_t * ppos)
{
char __user *tmp = buf;
- u32 data[4];
- u32 reg = *ppos;
+ struct cpuid_regs cmd;
int cpu = iminor(file->f_path.dentry->d_inode);
+ u64 pos = *ppos;
if (count % 16)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 16) {
- do_cpuid(cpu, reg, data);
- if (copy_to_user(tmp, &data, 16))
+ cmd.eax = pos;
+ cmd.ecx = pos >> 32;
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ if (copy_to_user(tmp, &cmd, 16))
return -EFAULT;
tmp += 16;
- *ppos = reg++;
+ *ppos = ++pos;
}
return tmp - buf;
@@ -193,7 +187,7 @@ static int __init cpuid_init(void)
}
for_each_online_cpu(i) {
err = cpuid_device_create(i);
- if (err != 0)
+ if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +202,7 @@ out_class:
}
class_destroy(cpuid_class);
out_chrdev:
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+ unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
out:
return err;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324a625..32dd62b36ff 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
#endif
}
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static void __init runtime_code_page_mkexec(void)
{
efi_memory_desc_t *md;
- unsigned long end;
void *p;
if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if (md->type == EFI_RUNTIME_SERVICES_CODE &&
- (end >> PAGE_SHIFT) <= max_pfn_mapped) {
- set_memory_x(md->virt_addr, md->num_pages);
- set_memory_uc(md->virt_addr, md->num_pages);
- }
+
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
+
+ set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
}
- __flush_tlb_all();
}
-#else
-static inline void __init runtime_code_page_mkexec(void) { }
-#endif
/*
* This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
{
efi_memory_desc_t *md;
efi_status_t status;
- unsigned long end;
- void *p;
+ unsigned long size;
+ u64 end, systab;
+ void *p, *va;
efi.systab = NULL;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if ((md->attribute & EFI_MEMORY_WB) &&
- ((end >> PAGE_SHIFT) <= max_pfn_mapped))
- md->virt_addr = (unsigned long)__va(md->phys_addr);
+
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ end = md->phys_addr + size;
+
+ if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
+ va = __va(md->phys_addr);
else
- md->virt_addr = (unsigned long)
- efi_ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!md->virt_addr)
+ va = efi_ioremap(md->phys_addr, size);
+
+ if (md->attribute & EFI_MEMORY_WB)
+ set_memory_uc(md->virt_addr, size);
+
+ md->virt_addr = (u64) (unsigned long) va;
+
+ if (!va) {
printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
(unsigned long long)md->phys_addr);
- if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab < end))
- efi.systab = (efi_system_table_t *)(unsigned long)
- (md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab);
+ continue;
+ }
+
+ systab = (u64) (unsigned long) efi_phys.systab;
+ if (md->phys_addr <= systab && systab < end) {
+ systab += md->virt_addr - md->phys_addr;
+ efi.systab = (efi_system_table_t *) (unsigned long) systab;
+ }
}
BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f2379480..09d5c233093 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
else
set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
__supported_pte_mask));
- if (level == 4)
- start = (start + PMD_SIZE) & PMD_MASK;
- else
+ if (level == PG_LEVEL_4K)
start = (start + PAGE_SIZE) & PAGE_MASK;
+ else
+ start = (start + PMD_SIZE) & PMD_MASK;
}
}
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
memmap.nr_map * memmap.desc_size);
}
-void __iomem * __init efi_ioremap(unsigned long offset,
- unsigned long size)
+void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
static unsigned pages_mapped;
- unsigned long last_addr;
unsigned i, pages;
- last_addr = offset + size - 1;
- offset &= PAGE_MASK;
- pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+ /* phys_addr and size must be page aligned */
+ if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+ return NULL;
+
+ pages = size >> PAGE_SHIFT;
if (pages_mapped + pages > MAX_EFI_IO_PAGES)
return NULL;
for (i = 0; i < pages; i++) {
__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
- offset, PAGE_KERNEL_EXEC_NOCACHE);
- offset += PAGE_SIZE;
+ phys_addr, PAGE_KERNEL);
+ phys_addr += PAGE_SIZE;
pages_mapped++;
}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bea8474744f..c7341e81941 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 0,8,0
-iret_label:
#ifdef CONFIG_PARAVIRT
INTERRUPT_RETURN
#endif
@@ -593,13 +592,22 @@ ENTRY(native_iret)
.quad native_iret, bad_iret
.previous
.section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- /* running with kernel gs */
bad_iret:
- movq $11,%rdi /* SIGSEGV */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
- jmp do_exit
+ /*
+ * The iret traps when the %cs or %ss being restored is bogus.
+ * We've lost the original trap vector and error code.
+ * #GPF is the most likely one to get for an invalid selector.
+ * So pretend we completed the iret and took the #GPF in user mode.
+ *
+ * We are now running with the kernel GS after exception recovery.
+ * But error_entry expects us to have user GS to match the user %cs,
+ * so swap back.
+ */
+ pushq $0
+
+ SWAPGS
+ jmp general_protection
+
.previous
/* edi: workmask, edx: work */
@@ -911,7 +919,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
+ leaq native_iret(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a36120..09b38d539b0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
/* Is the address not 2M aligned? */
movq %rbp, %rax
- andl $~LARGE_PAGE_MASK, %eax
+ andl $~PMD_PAGE_MASK, %eax
testl %eax, %eax
jnz bad_address
@@ -88,7 +88,7 @@ startup_64:
/* Add an Identity mapping if I am above 1G */
leaq _text(%rip), %rdi
- andq $LARGE_PAGE_MASK, %rdi
+ andq $PMD_PAGE_MASK, %rdi
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
@@ -250,18 +250,13 @@ ENTRY(secondary_startup_64)
lretq
/* SMP bootup changes these two */
-#ifndef CONFIG_HOTPLUG_CPU
- .pushsection .init.data
-#endif
+ __CPUINITDATA
.align 8
- .globl initial_code
-initial_code:
+ ENTRY(initial_code)
.quad x86_64_start_kernel
-#ifndef CONFIG_HOTPLUG_CPU
- .popsection
-#endif
- .globl init_rsp
-init_rsp:
+ __FINITDATA
+
+ ENTRY(init_rsp)
.quad init_thread_union+THREAD_SIZE-8
bad_address:
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c8394..0224c3637c7 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
- mincount = (mincount + 511) & (~511);
+ mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+ (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1cfd60639d..d0b234c9fc3 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -151,7 +151,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
-#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
+#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
#endif
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a1fef42f8cd..236d2f8f7dd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -234,5 +234,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_SYMBOL(init_level4_pgt);
+
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
}
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 67009cdd5ec..f349e68e45a 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -736,7 +736,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
smp_found_config = 1;
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
mpf, virt_to_phys(mpf));
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+ BOOTMEM_DEFAULT);
if (mpf->mpf_physptr) {
/*
* We cannot access to MPC table to compute
@@ -751,7 +752,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->mpf_physptr + size > end)
size = end - mpf->mpf_physptr;
- reserve_bootmem(mpf->mpf_physptr, size);
+ reserve_bootmem(mpf->mpf_physptr, size,
+ BOOTMEM_DEFAULT);
}
mpf_found = mpf;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850e651..af51ea8400b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
- loff_t ret = -EINVAL;
+ loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
case 1:
file->f_pos += offset;
ret = file->f_pos;
+ break;
+ default:
+ ret = -EINVAL;
}
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1fe7f043ebd..1b5464c2434 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -35,6 +35,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/tce.h>
@@ -260,22 +261,28 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
- unsigned int npages)
+static unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu_table *tbl,
+ unsigned int npages)
{
unsigned long flags;
unsigned long offset;
+ unsigned long boundary_size;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
BUG_ON(npages == 0);
spin_lock_irqsave(&tbl->it_lock, flags);
- offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
- tbl->it_size, npages);
+ offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint,
+ npages, 0, boundary_size, 0);
if (offset == ~0UL) {
tbl->chip_ops->tce_cache_blast(tbl);
- offset = find_next_zero_string(tbl->it_map, 0,
- tbl->it_size, npages);
+
+ offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
+ npages, 0, boundary_size, 0);
if (offset == ~0UL) {
printk(KERN_WARNING "Calgary: IOMMU full.\n");
spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -286,7 +293,6 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
}
}
- set_bit_string(tbl->it_map, offset, npages);
tbl->it_hint = offset + npages;
BUG_ON(tbl->it_hint > tbl->it_size);
@@ -295,13 +301,13 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
return offset;
}
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
- unsigned int npages, int direction)
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+ void *vaddr, unsigned int npages, int direction)
{
unsigned long entry;
dma_addr_t ret = bad_dma_address;
- entry = iommu_range_alloc(tbl, npages);
+ entry = iommu_range_alloc(dev, tbl, npages);
if (unlikely(entry == bad_dma_address))
goto error;
@@ -354,7 +360,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
badbit, tbl, dma_addr, entry, npages);
}
- __clear_bit_string(tbl->it_map, entry, npages);
+ iommu_area_free(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
@@ -438,7 +444,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
vaddr = (unsigned long) sg_virt(s);
npages = num_dma_pages(vaddr, s->length);
- entry = iommu_range_alloc(tbl, npages);
+ entry = iommu_range_alloc(dev, tbl, npages);
if (entry == bad_dma_address) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
@@ -476,7 +482,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
npages = num_dma_pages(uaddr, size);
if (translation_enabled(tbl))
- dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+ dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
else
dma_handle = virt_to_bus(vaddr);
@@ -516,7 +522,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
if (translation_enabled(tbl)) {
/* set up tces to cover the allocated range */
- mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
if (mapping == bad_dma_address)
goto free;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc718198..65f6acb025c 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -25,6 +25,7 @@
#include <linux/bitops.h>
#include <linux/kdebug.h>
#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
@@ -82,17 +83,24 @@ AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */
-static unsigned long alloc_iommu(int size)
+static unsigned long alloc_iommu(struct device *dev, int size)
{
unsigned long offset, flags;
+ unsigned long boundary_size;
+ unsigned long base_index;
+
+ base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
+ PAGE_SIZE) >> PAGE_SHIFT;
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
- iommu_pages, size);
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
+ size, base_index, boundary_size, 0);
if (offset == -1) {
need_flush = 1;
- offset = find_next_zero_string(iommu_gart_bitmap, 0,
- iommu_pages, size);
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
+ size, base_index, boundary_size, 0);
}
if (offset != -1) {
set_bit_string(iommu_gart_bitmap, offset, size);
@@ -114,7 +122,7 @@ static void free_iommu(unsigned long offset, int size)
unsigned long flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- __clear_bit_string(iommu_gart_bitmap, offset, size);
+ iommu_area_free(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -235,7 +243,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
unsigned long npages = to_pages(phys_mem, size);
- unsigned long iommu_page = alloc_iommu(npages);
+ unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
if (iommu_page == -1) {
@@ -355,10 +363,11 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
}
/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *start, int nelems,
- struct scatterlist *sout, unsigned long pages)
+static int __dma_map_cont(struct device *dev, struct scatterlist *start,
+ int nelems, struct scatterlist *sout,
+ unsigned long pages)
{
- unsigned long iommu_start = alloc_iommu(pages);
+ unsigned long iommu_start = alloc_iommu(dev, pages);
unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
@@ -394,8 +403,8 @@ static int __dma_map_cont(struct scatterlist *start, int nelems,
}
static inline int
-dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
- unsigned long pages, int need)
+dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
+ struct scatterlist *sout, unsigned long pages, int need)
{
if (!need) {
BUG_ON(nelems != 1);
@@ -403,7 +412,7 @@ dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
sout->dma_length = start->length;
return 0;
}
- return __dma_map_cont(start, nelems, sout, pages);
+ return __dma_map_cont(dev, start, nelems, sout, pages);
}
/*
@@ -416,6 +425,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
struct scatterlist *s, *ps, *start_sg, *sgmap;
int need = 0, nextneed, i, out, start;
unsigned long pages = 0;
+ unsigned int seg_size;
+ unsigned int max_seg_size;
if (nents == 0)
return 0;
@@ -426,6 +437,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
out = 0;
start = 0;
start_sg = sgmap = sg;
+ seg_size = 0;
+ max_seg_size = dma_get_max_seg_size(dev);
ps = NULL; /* shut up gcc */
for_each_sg(sg, s, nents, i) {
dma_addr_t addr = sg_phys(s);
@@ -443,11 +456,13 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
* offset.
*/
if (!iommu_merge || !nextneed || !need || s->offset ||
+ (s->length + seg_size > max_seg_size) ||
(ps->offset + ps->length) % PAGE_SIZE) {
- if (dma_map_cont(start_sg, i - start, sgmap,
- pages, need) < 0)
+ if (dma_map_cont(dev, start_sg, i - start,
+ sgmap, pages, need) < 0)
goto error;
out++;
+ seg_size = 0;
sgmap = sg_next(sgmap);
pages = 0;
start = i;
@@ -455,11 +470,12 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
}
}
+ seg_size += s->length;
need = nextneed;
pages += to_pages(s->offset, s->length);
ps = s;
}
- if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
+ if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
goto error;
out++;
flush_gart();
@@ -501,7 +517,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
}
a = aper + iommu_size;
- iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+ iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
@@ -731,7 +747,8 @@ void __init gart_iommu_init(void)
* the backing memory. The GART address is only used by PCI
* devices.
*/
- clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+ set_memory_np((unsigned long)__va(iommu_bus_base),
+ iommu_size >> PAGE_SHIFT);
/*
* Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371ab223..dabdbeff1f7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
* because it has nothing to do.
* Give all the remaining CPUS a kick.
*/
- smp_call_function_mask(map, do_nothing, 0, 0);
+ smp_call_function_mask(map, do_nothing, NULL, 0);
} while (!cpus_empty(map));
set_cpus_allowed(current, tmp);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 96286df1bb8..702c33efea8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task,
if (invalid_selector(value))
return -EIO;
- if (offset != offsetof(struct user_regs_struct, gs))
+ /*
+ * For %cs and %ss we cannot permit a null selector.
+ * We can permit a bogus selector as long as it has USER_RPL.
+ * Null selectors are fine for other segment registers, but
+ * we will never get back to user mode with invalid %cs or %ss
+ * and will take the trap in iret instead. Much code relies
+ * on user_mode() to distinguish a user trap frame (which can
+ * safely use invalid selectors) from a kernel trap frame.
+ */
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ss):
+ if (unlikely(value == 0))
+ return -EIO;
+
+ default:
*pt_regs_access(task_pt_regs(task), offset) = value;
- else {
+ break;
+
+ case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
@@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task,
* Can't actually change these in 64-bit mode.
*/
case offsetof(struct user_regs_struct,cs):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
break;
case offsetof(struct user_regs_struct,ss):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 3cd7a2dcd4f..6ba33ca8715 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -380,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
void force_hpet_resume(void)
{
switch (force_hpet_resume_type) {
- case ICH_FORCE_HPET_RESUME:
- return ich_force_hpet_resume();
-
- case OLD_ICH_FORCE_HPET_RESUME:
- return old_ich_force_hpet_resume();
-
- case VT8237_FORCE_HPET_RESUME:
- return vt8237_force_hpet_resume();
-
- case NVIDIA_FORCE_HPET_RESUME:
- return nvidia_force_hpet_resume();
-
- default:
+ case ICH_FORCE_HPET_RESUME:
+ ich_force_hpet_resume();
+ return;
+ case OLD_ICH_FORCE_HPET_RESUME:
+ old_ich_force_hpet_resume();
+ return;
+ case VT8237_FORCE_HPET_RESUME:
+ vt8237_force_hpet_resume();
+ return;
+ case NVIDIA_FORCE_HPET_RESUME:
+ nvidia_force_hpet_resume();
+ return;
+ default:
break;
}
}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 62adc5f20be..d1d8c347cc0 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -390,7 +390,7 @@ static void __init reserve_ebda_region(void)
unsigned int addr;
addr = get_bios_ebda();
if (addr)
- reserve_bootmem(addr, PAGE_SIZE);
+ reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -484,7 +484,8 @@ static void __init reserve_crashkernel(void)
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
+ reserve_bootmem(crash_base, crash_size,
+ BOOTMEM_DEFAULT);
} else
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
@@ -525,7 +526,7 @@ static void __init reserve_initrd(void)
}
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
- reserve_bootmem(ramdisk_image, ramdisk_size);
+ reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
return;
@@ -536,7 +537,7 @@ static void __init reserve_initrd(void)
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_bootmem(ramdisk_here, ramdisk_size);
+ reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -606,13 +607,14 @@ void __init setup_bootmem_allocator(void)
* bootmem allocator with an invalid RAM area.
*/
reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
+ BOOTMEM_DEFAULT);
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
- reserve_bootmem(0, PAGE_SIZE);
+ reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
/* reserve EBDA region, it's a 4K region */
reserve_ebda_region();
@@ -622,7 +624,7 @@ void __init setup_bootmem_allocator(void)
unless you have no PS/2 mouse plugged in. */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 == 6)
- reserve_bootmem(0xa0000 - 4096, 4096);
+ reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
#ifdef CONFIG_SMP
/*
@@ -630,7 +632,7 @@ void __init setup_bootmem_allocator(void)
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+ reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
#endif
#ifdef CONFIG_ACPI_SLEEP
/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c534b..a49f5f734a5 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -189,7 +189,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
- reserve_bootmem(bootmap, bootmap_size);
+ reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}
#endif
@@ -220,28 +220,35 @@ static inline void copy_edd(void)
#ifdef CONFIG_KEXEC
static void __init reserve_crashkernel(void)
{
- unsigned long long free_mem;
+ unsigned long long total_mem;
unsigned long long crash_size, crash_base;
int ret;
- free_mem =
- ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+ total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
- ret = parse_crashkernel(boot_command_line, free_mem,
+ ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret == 0 && crash_size) {
- if (crash_base > 0) {
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(free_mem >> 20));
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
- } else
+ if (crash_base <= 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
+ return;
+ }
+
+ if (reserve_bootmem(crash_base, crash_size,
+ BOOTMEM_EXCLUSIVE) < 0) {
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "memory is in use\n");
+ return;
+ }
+
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
}
}
#else
@@ -1068,82 +1075,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
struct cpuinfo_x86 *c = v;
int cpu = 0, i;
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char *const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char *const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
- };
-
-
#ifdef CONFIG_SMP
cpu = c->cpu_index;
#endif
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 5787a0c3e29..579b9b740c7 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -202,8 +202,6 @@ valid_k7:
;
}
-extern void calibrate_delay(void);
-
static atomic_t init_deasserted;
static void __cpuinit smp_callin(void)
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2bf6903cb44..b72e61359c3 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -274,7 +274,7 @@ int __init get_memcfg_from_srat(void)
int tables = 0;
int i = 0;
- rsdp_address = acpi_find_rsdp();
+ rsdp_address = acpi_os_get_root_pointer();
if (!rsdp_address) {
printk("%s: System description tables not found\n",
__FUNCTION__);
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8344c70adf6..adff5562f5f 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -321,6 +321,8 @@ ENTRY(sys_call_table)
.long sys_epoll_pwait
.long sys_utimensat /* 320 */
.long sys_signalfd
- .long sys_timerfd
+ .long sys_timerfd_create
.long sys_eventfd
.long sys_fallocate
+ .long sys_timerfd_settime /* 325 */
+ .long sys_timerfd_gettime
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e304c..10b8a6f69f8 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/uaccess.h>
+#include <asm/asm.h>
extern int rodata_test_data;
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
"2: mov %[zero], %[rslt]\n"
" ret\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
-#ifdef CONFIG_X86_32
- " .long 0b\n"
- " .long 2b\n"
-#else
- " .quad 0b\n"
- " .quad 2b\n"
-#endif
- ".previous\n"
+ _ASM_EXTABLE(0b,2b)
: [rslt] "=r" (result)
: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
);
@@ -147,7 +139,6 @@ static int test_NX(void)
* Until then, don't run them to avoid too many people getting scared
* by the error message
*/
-#if 0
#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
@@ -160,6 +151,7 @@ static int test_NX(void)
}
#endif
+#if 0
/* Test 4: Check if the .data section of a module is executable */
if (test_address(&test_data)) {
printk(KERN_ERR "test_nx: .data section is executable\n");
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6aca3..64580679861 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
- * We jump into the boot/compressed/head.S code. So you'd
- * better be running a compressed kernel image or you
- * won't get very far.
+ * We jump into arch/x86/kernel/head_32.S.
*
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c6a9f..4aedd0bcee4 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
* and IP is zero. Thus, data addresses need to be absolute
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3cf72977d01..b22c01e05a1 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1176,17 +1176,12 @@ void __init trap_init(void)
#endif
set_trap_gate(19,&simd_coprocessor_error);
+ /*
+ * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+ * Generate a build-time error if the alignment is wrong.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
if (cpu_has_fxsr) {
- /*
- * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
- * Generates a compile-time "error: zero width for bit-field" if
- * the alignment is wrong.
- */
- struct fxsrAlignAssert {
- int _:!(offsetof(struct task_struct,
- thread.i387.fxsave) & 15);
- };
-
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
printk("done.\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2c2e1..12affe1f9bc 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
u32 *idt_entry = (u32 *)g;
- vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
}
static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
const void *desc, int type)
{
u32 *gdt_entry = (u32 *)desc;
- vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+ vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
}
static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
u32 *ldt_entry = (u32 *)desc;
- vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
}
static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c83e1c9b512..41962e793c0 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -53,5 +53,6 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
+source drivers/virtio/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b89df..cf530814868 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1739,7 +1739,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
if (bytes == 8) {
gpa_t gpa;
struct page *page;
- char *addr;
+ char *kaddr;
u64 val;
down_read(&current->mm->mmap_sem);
@@ -1754,9 +1754,9 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
val = *(u64 *)new;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
- addr = kmap_atomic(page, KM_USER0);
- set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
- kunmap_atomic(addr, KM_USER0);
+ kaddr = kmap_atomic(page, KM_USER0);
+ set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
+ kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
emul_write:
up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4876182daf8..25df1c1989f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,7 +21,7 @@ else
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
- lib-y += bitstr_64.o bitops_64.o
+ lib-y += bitops_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
endif
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
index afd0045595d..b6544045985 100644
--- a/arch/x86/lib/bitops_32.c
+++ b/arch/x86/lib/bitops_32.c
@@ -2,7 +2,7 @@
#include <linux/module.h>
/**
- * find_next_bit - find the first set bit in a memory region
+ * find_next_bit - find the next set bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
index 95b6d9639fb..0e8f491e6cc 100644
--- a/arch/x86/lib/bitops_64.c
+++ b/arch/x86/lib/bitops_64.c
@@ -58,7 +58,7 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size)
}
/**
- * find_next_zero_bit - find the first zero bit in a memory region
+ * find_next_zero_bit - find the next zero bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c
deleted file mode 100644
index 7445caf1b5d..00000000000
--- a/arch/x86/lib/bitstr_64.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/* Find string of zero bits in a bitmap */
-unsigned long
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
-{
- unsigned long n, end, i;
-
- again:
- n = find_next_zero_bit(bitmap, nbits, start);
- if (n == -1)
- return -1;
-
- /* could test bitsliced, but it's hardly worth it */
- end = n+len;
- if (end > nbits)
- return -1;
- for (i = n+1; i < end; i++) {
- if (test_bit(i, bitmap)) {
- start = i+1;
- goto again;
- }
- }
- return n;
-}
-
-EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index aad9d95469d..4535e6d147a 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -12,8 +12,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
@@ -63,7 +65,7 @@ void use_tsc_delay(void)
delay_fn = delay_tsc;
}
-int read_current_timer(unsigned long *timer_val)
+int __devinit read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscl(*timer_val);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 45cdd3fbd91..bbc61051851 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -10,8 +10,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/delay.h>
#include <asm/msr.h>
@@ -20,7 +22,7 @@
#include <asm/smp.h>
#endif
-int read_current_timer(unsigned long *timer_value)
+int __devinit read_current_timer(unsigned long *timer_value)
{
rdtscll(*timer_value);
return 0;
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2e8dd..cc9b4a4450f 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
#include <linux/hardirq.h>
#include <linux/module.h>
+#include <asm/asm.h>
#include <asm/i387.h>
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5bedb..e849b9998b0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do { \
"3: movl %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do { \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
+ _ASM_EXTABLE(1b,2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f838c..0c89d1bb028 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do { \
"3: movq %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,3b\n"
- " .quad 1b,2b\n"
- ".previous"
+ _ASM_EXTABLE(0b,3b)
+ _ASM_EXTABLE(1b,2b)
: [size8] "=c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
[zero] "r" (0UL), [eight] "r" (8UL));
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index dffa786f61f..3cc8eb2f36a 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -444,8 +444,6 @@ static __u32 __init setup_trampoline(void)
static void __init start_secondary(void *unused)
{
__u8 cpuid = hard_smp_processor_id();
- /* external functions not defined in the headers */
- extern void calibrate_delay(void);
cpu_init();
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 04b1d20e261..c394ca0720b 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -391,7 +391,8 @@ unsigned long __init setup_memory(void)
void __init numa_kva_reserve(void)
{
if (kva_pages)
- reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
+ reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
+ BOOTMEM_DEFAULT);
}
void __init zone_sizes_init(void)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0abf8..621afb6343d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
pud = pud_offset(pgd, address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto ret;
pmd = pmd_offset(pud, address);
if (bad_address(pmd)) goto bad;
@@ -427,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
}
#endif
+static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+{
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
/*
* Handle a spurious fault caused by a stale TLB entry. This allows
* us to lazily refresh the TLB when increasing the permissions of a
@@ -456,20 +467,21 @@ static int spurious_fault(unsigned long address,
if (!pud_present(*pud))
return 0;
+ if (pud_large(*pud))
+ return spurious_fault_check(error_code, (pte_t *) pud);
+
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return 0;
+ if (pmd_large(*pmd))
+ return spurious_fault_check(error_code, (pte_t *) pmd);
+
pte = pte_offset_kernel(pmd, address);
if (!pte_present(*pte))
return 0;
- if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
- return 0;
-
- return 1;
+ return spurious_fault_check(error_code, pte);
}
/*
@@ -508,6 +520,10 @@ static int vmalloc_fault(unsigned long address)
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
/* Copy kernel mappings over when needed. This can also
happen within a race in page table update. In the later
case just flush. */
@@ -603,6 +619,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
+#else
+ if (unlikely(address >= TASK_SIZE64)) {
+#endif
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
return;
@@ -618,6 +637,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto bad_area_nosemaphore;
}
+
+#ifdef CONFIG_X86_32
/* It's safe to allow irq's after cr2 has been saved and the vmalloc
fault has been handled. */
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +651,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
#else /* CONFIG_X86_64 */
- if (unlikely(address >= TASK_SIZE64)) {
- /*
- * Don't check for the module range here: its PML4
- * is always initialized because it's shared with the main
- * kernel text. Only vmalloc may need PML4 syncups.
- */
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- ((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
-
- /* Can handle a stale RO->RW TLB */
- if (spurious_fault(address, error_code))
- return;
-
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
if (likely(regs->flags & X86_EFLAGS_IF))
local_irq_enable();
@@ -959,11 +958,12 @@ void vmalloc_sync_all(void)
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock(&pgd_lock);
+ spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -972,7 +972,7 @@ void vmalloc_sync_all(void)
else
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
- spin_unlock(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
set_bit(pgd_index(address), insync);
}
if (address == start)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f2f36f8dae5..d1bc04006d1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <asm/asm.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
"1: movb %1, %0 \n"
" xorl %2, %2 \n"
"2: \n"
- ".section __ex_table, \"a\"\n"
- " .align 4 \n"
- " .long 1b, 2b \n"
- ".previous \n"
+ _ASM_EXTABLE(1b,2b)
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed76c2..5fe880fc305 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
int i = pmd_index(address);
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
- unsigned long entry;
pmd_t *pmd = pmd_page + pmd_index(address);
if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
if (pmd_val(*pmd))
continue;
- entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
- entry &= __supported_pte_mask;
- set_pmd(pmd, __pmd(entry));
+ set_pte((pte_t *)pmd,
+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
}
}
@@ -435,49 +433,6 @@ void __init paging_init(void)
#endif
/*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd))
- continue;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
-
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
-
- if (!(pmd_val(*pmd) & _PAGE_PSE)) {
- /*
- * Could handle this, but it should not happen
- * currently:
- */
- printk(KERN_ERR "clear_kernel_mapping: "
- "mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-/*
* Memory hotplug specific functions
*/
void online_page(struct page *page)
@@ -636,10 +591,17 @@ void mark_rodata_ro(void)
if (end <= start)
return;
- set_memory_ro(start, (end - start) >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+
+ /*
+ * The rodata section (but not the kernel text!) should also be
+ * not-executable.
+ */
+ start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ set_memory_nx(start, (end - start) >> PAGE_SHIFT);
rodata_test();
@@ -682,9 +644,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
- reserve_bootmem_node(NODE_DATA(nid), phys, len);
+ reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
#else
- reserve_bootmem(phys, len);
+ reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94608f..ee6648fe6b1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
*/
-static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum ioremap_mode mode)
{
- unsigned long vaddr = (unsigned long)__va(paddr);
unsigned long nrpages = size >> PAGE_SHIFT;
- unsigned int level;
int err;
- /* No change for pages after the last mapping */
- if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
- return 0;
-
- /*
- * If there is no identity map for this address,
- * change_page_attr_addr is unnecessary
- */
- if (!lookup_address(vaddr, &level))
- return 0;
-
switch (mode) {
case IOR_MODE_UNCACHED:
default:
@@ -114,9 +101,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
enum ioremap_mode mode)
{
- void __iomem *addr;
+ unsigned long pfn, offset, last_addr, vaddr;
struct vm_struct *area;
- unsigned long offset, last_addr;
pgprot_t prot;
/* Don't allow wraparound or zero size */
@@ -133,9 +119,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
- (offset << PAGE_SHIFT) < last_addr; offset++) {
- if (page_is_ram(offset))
+ for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
+ (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+ if (page_is_ram(pfn) && pfn_valid(pfn) &&
+ !PageReserved(pfn_to_page(pfn)))
return NULL;
}
@@ -163,19 +150,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
if (!area)
return NULL;
area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, prot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ vaddr = (unsigned long) area->addr;
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+ remove_vm_area((void *)(vaddr & PAGE_MASK));
return NULL;
}
- if (ioremap_change_attr(phys_addr, size, mode) < 0) {
- vunmap(addr);
+ if (ioremap_change_attr(vaddr, size, mode) < 0) {
+ vunmap(area->addr);
return NULL;
}
- return (void __iomem *) (offset + (char __iomem *)addr);
+ return (void __iomem *) (vaddr + offset);
}
/**
@@ -254,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
return;
}
- /* Reset the direct mapping. Can block */
- ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
-
/* Finally remove it */
o = remove_vm_area((void *)addr);
BUG_ON(p != o || o == NULL);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09b919..1aecc658cd7 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
if (node_data[nodeid] == NULL)
return;
nodedata_phys = __pa(node_data[nodeid]);
+ printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+ nodedata_phys + pgdat_size - 1);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,17 +227,21 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
return;
}
bootmap_start = __pa(bootmap);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn);
+ printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
+ bootmap_start, bootmap_start + bootmap_size - 1,
+ bootmap_pages);
+
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
+ BOOTMEM_DEFAULT);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT);
+ bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e786d2f..ed820160035 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,6 +5,7 @@
* and compares page tables forwards and afterwards.
*/
#include <linux/bootmem.h>
+#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -14,8 +15,13 @@
#include <asm/pgtable.h>
#include <asm/kdebug.h>
+/*
+ * Only print the results of the first pass:
+ */
+static __read_mostly int print = 1;
+
enum {
- NTEST = 4000,
+ NTEST = 400,
#ifdef CONFIG_X86_64
LPS = (1 << PMD_SHIFT),
#elif defined(CONFIG_X86_PAE)
@@ -31,7 +37,7 @@ struct split_state {
long min_exec, max_exec;
};
-static __init int print_split(struct split_state *s)
+static int print_split(struct split_state *s)
{
long i, expected, missed = 0;
int printed = 0;
@@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s)
s->max_exec = addr;
}
}
- printk(KERN_INFO
- "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
- s->spg, s->lpg, s->gpg, s->exec,
- s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+ if (print) {
+ printk(KERN_INFO
+ " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+ s->spg, s->lpg, s->gpg, s->exec,
+ s->min_exec != ~0UL ? s->min_exec : 0,
+ s->max_exec, missed);
+ }
expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
if (expected != i) {
@@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s)
return err;
}
-static unsigned long __initdata addr[NTEST];
-static unsigned int __initdata len[NTEST];
+static unsigned long addr[NTEST];
+static unsigned int len[NTEST];
/* Change the global bit on random pages in the direct mapping */
-static __init int exercise_pageattr(void)
+static int pageattr_test(void)
{
struct split_state sa, sb, sc;
unsigned long *bm;
@@ -110,7 +119,8 @@ static __init int exercise_pageattr(void)
int i, k;
int err;
- printk(KERN_INFO "CPA exercising pageattr\n");
+ if (print)
+ printk(KERN_INFO "CPA self-test:\n");
bm = vmalloc((max_pfn_mapped + 7) / 8);
if (!bm) {
@@ -137,7 +147,8 @@ static __init int exercise_pageattr(void)
for (k = 0; k < len[i]; k++) {
pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
- if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+ if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
+ !(pte_val(*pte) & _PAGE_PRESENT)) {
addr[i] = 0;
break;
}
@@ -185,7 +196,6 @@ static __init int exercise_pageattr(void)
failed += print_split(&sb);
- printk(KERN_INFO "CPA reverting everything\n");
for (i = 0; i < NTEST; i++) {
if (!addr[i])
continue;
@@ -213,12 +223,40 @@ static __init int exercise_pageattr(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
+ printk(KERN_ERR "NOT PASSED. Please report.\n");
WARN_ON(1);
+ return -EINVAL;
} else {
- printk(KERN_INFO "CPA selftests PASSED\n");
+ if (print)
+ printk(KERN_INFO "ok.\n");
}
return 0;
}
-module_init(exercise_pageattr);
+
+static int do_pageattr_test(void *__unused)
+{
+ while (!kthread_should_stop()) {
+ schedule_timeout_interruptible(HZ*30);
+ if (pageattr_test() < 0)
+ break;
+ if (print)
+ print--;
+ }
+ return 0;
+}
+
+static int start_pageattr_test(void)
+{
+ struct task_struct *p;
+
+ p = kthread_create(do_pageattr_test, NULL, "pageattr-test");
+ if (!IS_ERR(p))
+ wake_up_process(p);
+ else
+ WARN_ON(1);
+
+ return 0;
+}
+
+module_init(start_pageattr_test);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e51..8493c855582 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+ unsigned long vaddr;
+ pgprot_t mask_set;
+ pgprot_t mask_clr;
+ int numpages;
+ int flushtlb;
+};
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
static void __cpa_flush_all(void *arg)
{
+ unsigned long cache = (unsigned long)arg;
+
/*
* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
__flush_tlb_all();
- if (boot_cpu_data.x86_model >= 4)
+ if (cache && boot_cpu_data.x86_model >= 4)
wbinvd();
}
-static void cpa_flush_all(void)
+static void cpa_flush_all(unsigned long cache)
{
BUG_ON(irqs_disabled());
- on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+ on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
}
static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
__flush_tlb_all();
}
-static void cpa_flush_range(unsigned long start, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
{
unsigned int i, level;
unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+ if (!cache)
+ return;
+
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
/*
* Only flush present addresses:
*/
- if (pte && pte_present(*pte))
+ if (pte && (pte_val(*pte) & _PAGE_PRESENT))
clflush_cache_range((void *) addr, PAGE_SIZE);
}
}
+#define HIGH_MAP_START __START_KERNEL_map
+#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+
+/*
+ * Converts a virtual address to a X86-64 highmap address
+ */
+static unsigned long virt_to_highmap(void *address)
+{
+#ifdef CONFIG_X86_64
+ return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
+#else
+ return (unsigned long)address;
+#endif
+}
+
/*
* Certain areas of memory on x86 require very specific protection flags,
* for example the BIOS area or kernel text. Callers don't always get this
@@ -129,19 +161,36 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
*/
if (within(address, (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+ pgprot_val(forbidden) |= _PAGE_NX;
-#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
pgprot_val(forbidden) |= _PAGE_RW;
-#endif
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(__start_rodata),
+ virt_to_highmap(__end_rodata)))
+ pgprot_val(forbidden) |= _PAGE_RW;
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
}
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
pte_t *lookup_address(unsigned long address, int *level)
{
pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +201,31 @@ pte_t *lookup_address(unsigned long address, int *level)
if (pgd_none(*pgd))
return NULL;
+
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return NULL;
+
+ *level = PG_LEVEL_1G;
+ if (pud_large(*pud) || !pud_present(*pud))
+ return (pte_t *)pud;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
*level = PG_LEVEL_2M;
- if (pmd_large(*pmd))
+ if (pmd_large(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
*level = PG_LEVEL_4K;
+
return pte_offset_kernel(pmd, address);
}
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
/* change init_mm */
@@ -189,18 +248,103 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
#endif
}
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+ struct cpa_data *cpa)
+{
+ unsigned long nextpage_addr, numpages, pmask, psize, flags;
+ pte_t new_pte, old_pte, *tmp;
+ pgprot_t old_prot, new_prot;
+ int level, do_split = 1;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ switch (level) {
+ case PG_LEVEL_2M:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#ifdef CONFIG_X86_64
+ case PG_LEVEL_1G:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#endif
+ default:
+ do_split = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Calculate the number of pages, which fit into this large
+ * page starting at address:
+ */
+ nextpage_addr = (address + psize) & pmask;
+ numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+ if (numpages < cpa->numpages)
+ cpa->numpages = numpages;
+
+ /*
+ * We are safe now. Check whether the new pgprot is the same:
+ */
+ old_pte = *kpte;
+ old_prot = new_prot = pte_pgprot(old_pte);
+
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+ new_prot = static_protections(new_prot, address);
+
+ /*
+ * If there are no changes, return. maxpages has been updated
+ * above:
+ */
+ if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+ do_split = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * We need to change the attributes. Check, whether we can
+ * change the large page in one go. We request a split, when
+ * the address is not aligned and the number of pages is
+ * smaller than the number of pages in the large page. Note
+ * that we limited the number of possible pages already to
+ * the number of pages in the large page.
+ */
+ if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+ /*
+ * The address is aligned and the number of pages
+ * covers the full page.
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+ __set_pmd_pte(kpte, address, new_pte);
+ cpa->flushtlb = 1;
+ do_split = 0;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ return do_split;
+}
+
static int split_large_page(pte_t *kpte, unsigned long address)
{
- pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ unsigned long flags, pfn, pfninc = 1;
gfp_t gfp_flags = GFP_KERNEL;
- unsigned long flags;
- unsigned long addr;
+ unsigned int i, level;
pte_t *pbase, *tmp;
+ pgprot_t ref_prot;
struct page *base;
- unsigned int i, level;
#ifdef CONFIG_DEBUG_PAGEALLOC
- gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
#endif
base = alloc_pages(gfp_flags, 0);
@@ -213,30 +357,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte) {
- WARN_ON_ONCE(1);
+ if (tmp != kpte)
goto out_unlock;
- }
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
#endif
+ ref_prot = pte_pgprot(pte_clrhuge(*kpte));
- pgprot_val(ref_prot) &= ~_PAGE_NX;
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+#ifdef CONFIG_X86_64
+ if (level == PG_LEVEL_1G) {
+ pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ }
+#endif
+
+ /*
+ * Get the target pfn from the original entry:
+ */
+ pfn = pte_pfn(*kpte);
+ for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+ set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
/*
- * Install the new, split up pagetable. Important detail here:
+ * Install the new, split up pagetable. Important details here:
*
* On Intel the NX bit of all levels must be cleared to make a
* page executable. See section 4.13.2 of Intel 64 and IA-32
* Architectures Software Developer's Manual).
+ *
+ * Mark the entry present. The current mapping might be
+ * set to not present, which we preserved above.
*/
ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+ pgprot_val(ref_prot) |= _PAGE_PRESENT;
__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
base = NULL;
@@ -249,18 +404,12 @@ out_unlock:
return 0;
}
-static int
-__change_page_attr(unsigned long address, unsigned long pfn,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
+ int level, do_split, err;
struct page *kpte_page;
- int level, err = 0;
pte_t *kpte;
-#ifdef CONFIG_X86_32
- BUG_ON(pfn > max_low_pfn);
-#endif
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -271,23 +420,62 @@ repeat:
BUG_ON(PageCompound(kpte_page));
if (level == PG_LEVEL_4K) {
- pgprot_t new_prot = pte_pgprot(*kpte);
pte_t new_pte, old_pte = *kpte;
+ pgprot_t new_prot = pte_pgprot(old_pte);
+
+ if(!pte_val(old_pte)) {
+ printk(KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", address,
+ cpa->vaddr);
+ WARN_ON(1);
+ return -EINVAL;
+ }
- pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
- pgprot_val(new_prot) |= pgprot_val(mask_set);
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
new_prot = static_protections(new_prot, address);
- new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
- BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+ /*
+ * We need to keep the pfn from the existing PTE,
+ * after all we're only going to change it's attributes
+ * not the memory it points to
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
- set_pte_atomic(kpte, new_pte);
- } else {
- err = split_large_page(kpte, address);
- if (!err)
- goto repeat;
+ /*
+ * Do we really change anything ?
+ */
+ if (pte_val(old_pte) != pte_val(new_pte)) {
+ set_pte_atomic(kpte, new_pte);
+ cpa->flushtlb = 1;
+ }
+ cpa->numpages = 1;
+ return 0;
}
+
+ /*
+ * Check, whether we can keep the large page intact
+ * and just change the pte:
+ */
+ do_split = try_preserve_large_page(kpte, address, cpa);
+ /*
+ * When the range fits into the existing large page,
+ * return. cp->numpages and cpa->tlbflush have been updated in
+ * try_large_page:
+ */
+ if (do_split <= 0)
+ return do_split;
+
+ /*
+ * We have to split the large page:
+ */
+ err = split_large_page(kpte, address);
+ if (!err) {
+ cpa->flushtlb = 1;
+ goto repeat;
+ }
+
return err;
}
@@ -304,19 +492,14 @@ repeat:
*
* Modules and drivers should use the set_memory_* APIs instead.
*/
-
-#define HIGH_MAP_START __START_KERNEL_map
-#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
-
-static int
-change_page_attr_addr(unsigned long address, pgprot_t mask_set,
- pgprot_t mask_clr)
+static int change_page_attr_addr(struct cpa_data *cpa)
{
- unsigned long phys_addr = __pa(address);
- unsigned long pfn = phys_addr >> PAGE_SHIFT;
int err;
+ unsigned long address = cpa->vaddr;
#ifdef CONFIG_X86_64
+ unsigned long phys_addr = __pa(address);
+
/*
* If we are inside the high mapped kernel range, then we
* fixup the low mapping first. __va() returns the virtual
@@ -326,7 +509,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
address = (unsigned long) __va(phys_addr);
#endif
- err = __change_page_attr(address, pfn, mask_set, mask_clr);
+ err = __change_page_attr(address, cpa);
if (err)
return err;
@@ -339,42 +522,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
/*
* Calc the high mapping address. See __phys_addr()
* for the non obvious details.
+ *
+ * Note that NX and other required permissions are
+ * checked in static_protections().
*/
address = phys_addr + HIGH_MAP_START - phys_base;
- /* Make sure the kernel mappings stay executable */
- pgprot_val(mask_clr) |= _PAGE_NX;
/*
* Our high aliases are imprecise, because we check
* everything between 0 and KERNEL_TEXT_SIZE, so do
* not propagate lookup failures back to users:
*/
- __change_page_attr(address, pfn, mask_set, mask_clr);
+ __change_page_attr(address, cpa);
}
#endif
return err;
}
-static int __change_page_attr_set_clr(unsigned long addr, int numpages,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr_set_clr(struct cpa_data *cpa)
{
- unsigned int i;
- int ret;
+ int ret, numpages = cpa->numpages;
- for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
- ret = change_page_attr_addr(addr, mask_set, mask_clr);
+ while (numpages) {
+ /*
+ * Store the remaining nr of pages for the large page
+ * preservation check.
+ */
+ cpa->numpages = numpages;
+ ret = change_page_attr_addr(cpa);
if (ret)
return ret;
- }
+ /*
+ * Adjust the number of pages with the result of the
+ * CPA operation. Either a large page has been
+ * preserved or a single page update happened.
+ */
+ BUG_ON(cpa->numpages > numpages);
+ numpages -= cpa->numpages;
+ cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ }
return 0;
}
+static inline int cache_attr(pgprot_t attr)
+{
+ return pgprot_val(attr) &
+ (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+}
+
static int change_page_attr_set_clr(unsigned long addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr)
{
- int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
- mask_clr);
+ struct cpa_data cpa;
+ int ret, cache;
+
+ /*
+ * Check, if we are requested to change a not supported
+ * feature:
+ */
+ mask_set = canon_pgprot(mask_set);
+ mask_clr = canon_pgprot(mask_clr);
+ if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+ return 0;
+
+ cpa.vaddr = addr;
+ cpa.numpages = numpages;
+ cpa.mask_set = mask_set;
+ cpa.mask_clr = mask_clr;
+ cpa.flushtlb = 0;
+
+ ret = __change_page_attr_set_clr(&cpa);
+
+ /*
+ * Check whether we really changed something:
+ */
+ if (!cpa.flushtlb)
+ return ret;
+
+ /*
+ * No need to flush, when we did not set any of the caching
+ * attributes:
+ */
+ cache = cache_attr(mask_set);
/*
* On success we use clflush, when the CPU supports it to
@@ -383,9 +613,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* wbindv):
*/
if (!ret && cpu_has_clflush)
- cpa_flush_range(addr, numpages);
+ cpa_flush_range(addr, numpages, cache);
else
- cpa_flush_all();
+ cpa_flush_all(cache);
return ret;
}
@@ -489,37 +719,26 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
-static inline int __change_page_attr_set(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-}
-
-static inline int __change_page_attr_clear(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-}
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
static int __set_pages_p(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(0)};
- return __change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_PRESENT | _PAGE_RW));
+ return __change_page_attr_set_clr(&cpa);
}
static int __set_pages_np(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(0),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
- return __change_page_attr_clear(addr, numpages,
- __pgprot(_PAGE_PRESENT));
+ return __change_page_attr_set_clr(&cpa);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa470249..6c1914622a8 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
list_del(&page->lru);
}
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
-static void pgd_ctor(void *pgd)
+static void pgd_ctor(void *p)
{
+ pgd_t *pgd = p;
unsigned long flags;
- /* !PAE, no pagetable sharing */
+ /* Clear usermode parts of PGD */
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
- /* must happen under lock */
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
- /* PAE, kernel PMD may be shared */
-
- if (SHARED_KERNEL_PMD) {
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ /* If the pgd points to a shared pagetable level (either the
+ ptes in non-PAE, or shared PMD in PAE), then just copy the
+ references from swapper_pg_dir. */
+ if (PAGETABLE_LEVELS == 2 ||
+ (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+ clone_pgd_range(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
- } else {
- unsigned long flags;
+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ }
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
+ /* list required to sync kernel mapping updates */
+ if (!SHARED_KERNEL_PMD)
pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
-#endif /* PTRS_PER_PMD */
static void pgd_dtor(void *pgd)
{
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
#ifdef CONFIG_X86_PAE
/*
* Mop up any pmd pages which may still be attached to the pgd.
@@ -286,7 +272,7 @@ static void pgd_dtor(void *pgd)
* preallocate which never got a corresponding vma will need to be
* freed manually.
*/
-static void pgd_mop_up_pmds(pgd_t *pgdp)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
int i;
@@ -299,7 +285,7 @@ static void pgd_mop_up_pmds(pgd_t *pgdp)
pgdp[i] = native_make_pgd(0);
paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
- pmd_free(pmd);
+ pmd_free(mm, pmd);
}
}
}
@@ -327,7 +313,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
pmd_t *pmd = pmd_alloc_one(mm, addr);
if (!pmd) {
- pgd_mop_up_pmds(pgd);
+ pgd_mop_up_pmds(mm, pgd);
return 0;
}
@@ -347,7 +333,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
return 1;
}
-static void pgd_mop_up_pmds(pgd_t *pgd)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
}
#endif /* CONFIG_X86_PAE */
@@ -366,9 +352,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
}
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- pgd_mop_up_pmds(pgd);
+ pgd_mop_up_pmds(mm, pgd);
quicklist_free(0, pgd_dtor, pgd);
}
@@ -387,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
- /* This is called just after the pmd has been detached from
- the pgd, which requires a full tlb flush to be recognized
- by the CPU. Rather than incurring multiple tlb flushes
- while the address space is being pulled down, make the tlb
- gathering machinery do a full flush when we're done. */
- tlb->fullmm = 1;
-
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 65416f843e5..ecd91ea8a8a 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -488,7 +488,8 @@ void __init srat_reserve_add_area(int nodeid)
printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
"pre-allocated memory.\n", (unsigned long long)total_mb);
reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
- nodes_add[nodeid].end - nodes_add[nodeid].start);
+ nodes_add[nodeid].end - nodes_add[nodeid].start,
+ BOOTMEM_DEFAULT);
}
}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 42ba0e2da1a..103b9dff121 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,7 +72,7 @@ pcibios_align_resource(void *data, struct resource *res,
}
}
}
-
+EXPORT_SYMBOL(pcibios_align_resource);
/*
* Handle resources of PCI devices. If the world were perfect, we could
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f69e0..55270c26237 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/nodemask.h>
+#include <mach_apic.h>
#include "pci.h"
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
+
#define BUS2QUAD(global) (mp_bus_id_to_node[global])
#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+extern void *xquad_portio; /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+ unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+ if (xquad_portio)
+ writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+ else
+ outl(val, 0xCF8);
+}
+
static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readb(adr + (reg & 3));
+ else
+ *value = inb(0xCFC + (reg & 3));
break;
case 2:
- *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readw(adr + (reg & 2));
+ else
+ *value = inw(0xCFC + (reg & 2));
break;
case 4:
- *value = inl_quad(0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readl(adr);
+ else
+ *value = inl(0xCFC);
break;
}
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ writeb(value, adr + (reg & 3));
+ else
+ outb((u8)value, 0xCFC + (reg & 3));
break;
case 2:
- outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ writew(value, adr + (reg & 2));
+ else
+ outw((u16)value, 0xCFC + (reg & 2));
break;
case 4:
- outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ writel(value, adr + reg);
+ else
+ outl((u32)value, 0xCFC);
break;
}