From f8dfd5ed149ae340451f25847b434297c20d4645 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Apr 2008 19:19:54 +0200 Subject: x86: KGDB build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 24362ecf5f9..f47f0eb886b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,11 +46,7 @@ #include #include -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif +#include /* * Put the error code here just in case the user cares: -- cgit v1.2.3 From 4a3575fd436aa98957184afd745e4ada8f1542d8 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Mon, 25 Feb 2008 15:18:37 +0800 Subject: x86: EFI_PAGE_SHIFT fix Make x86 EFI code works when EFI_PAGE_SHIFT != PAGE_SHIFT. The memrage_efi_to_native() provided in this patch can be used on other EFI platform such as IA64 too. This patch has been tested on Intel x86_64 platform with EFI 64/32 firmware. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 18 +++++++++++++----- arch/x86/kernel/efi_64.c | 12 ++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 759e02bec07..77d424cf68b 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; + u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - set_memory_x(md->virt_addr, md->num_pages); + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); } } @@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + if (PFN_UP(end) <= max_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); @@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) - set_memory_uc(md->virt_addr, md->num_pages); + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d143a1e76b3..d0060fdccca 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void) void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped; + static unsigned pages_mapped __initdata; unsigned i, pages; + unsigned long offset; - /* phys_addr and size must be page aligned */ - if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) - return NULL; + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; - pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; @@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) } return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)); + (pages_mapped - pages)) + offset; } -- cgit v1.2.3 From 2b8106a0a3d3c1e5b69f091192bc99019ff4d81d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:51:22 -0700 Subject: x86_64: do not reserve ramdisk two times ramdisk is reserved via reserve_early in x86_64_start_kernel, later early_res_to_bootmem() will convert to reservation in bootmem. so don't need to reserve that again. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/setup_64.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d6d54faa84d..993c7677325 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +#ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data) unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } +#endif reserve_ebda_region(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 674ef3510cd..0aa291bff4e 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -420,11 +420,14 @@ void __init setup_arch(char **cmdline_p) unsigned long end_of_mem = end_pfn << PAGE_SHIFT; if (ramdisk_end <= end_of_mem) { - reserve_bootmem_generic(ramdisk_image, ramdisk_size); + /* + * don't need to reserve again, already reserved early + * in x86_64_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; } else { - /* Assumes everything on node 0 */ free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", -- cgit v1.2.3 From 8ce116e5993cf64729a4d2b3dc2c0f072852654b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:16 +0100 Subject: x86: clean up cpu capabilities accesses, p4-clockmod.c Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 14791ec55cf..199e4e05e5d 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) + if (!test_cpu_cap(c, X86_FEATURE_ACPI) || + !test_cpu_cap(c, X86_FEATURE_ACC)) return -ENODEV; ret = cpufreq_register_driver(&p4clockmod_driver); -- cgit v1.2.3 From cf9b111c170733dde39139e8989b676ec8b81573 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 8 Mar 2008 18:15:06 +0800 Subject: x86: remove pointless comments Remove old comments that include the old arch/i386 directory. Signed-off-by: WANG Cong Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/a20.c | 2 -- arch/x86/boot/apm.c | 2 -- arch/x86/boot/bitops.h | 2 -- arch/x86/boot/boot.h | 2 -- arch/x86/boot/cmdline.c | 2 -- arch/x86/boot/copy.S | 2 -- arch/x86/boot/cpucheck.c | 2 -- arch/x86/boot/edd.c | 2 -- arch/x86/boot/install.sh | 2 -- arch/x86/boot/main.c | 2 -- arch/x86/boot/mca.c | 2 -- arch/x86/boot/memory.c | 2 -- arch/x86/boot/pm.c | 2 -- arch/x86/boot/pmjump.S | 2 -- arch/x86/boot/printf.c | 2 -- arch/x86/boot/string.c | 2 -- arch/x86/boot/tty.c | 2 -- arch/x86/boot/version.c | 2 -- arch/x86/boot/video-bios.c | 2 -- arch/x86/boot/video-vesa.c | 2 -- arch/x86/boot/video-vga.c | 2 -- arch/x86/boot/video.c | 2 -- arch/x86/boot/video.h | 2 -- arch/x86/boot/voyager.c | 2 -- arch/x86/kernel/acpi/cstate.c | 2 -- arch/x86/kernel/acpi/processor.c | 2 -- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 - arch/x86/kernel/entry_32.S | 1 - arch/x86/kernel/head_32.S | 1 - arch/x86/mach-visws/visws_apic.c | 2 -- arch/x86/mach-voyager/voyager_basic.c | 2 -- arch/x86/mach-voyager/voyager_cat.c | 2 -- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/mach-voyager/voyager_thread.c | 2 -- arch/x86/mm/init_32.c | 1 - arch/x86/mm/pgtable_32.c | 4 ---- arch/x86/video/fbdev.c | 1 - 37 files changed, 71 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 31348d054fc..90943f83e84 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/a20.c - * * Enable A20 gate (return -1 on failure) */ diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index c117c7fb859..7aa6033001f 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -12,8 +12,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/apm.c - * * Get APM BIOS information */ diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8dcc8dc7db8..878e4b9940d 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/bitops.h - * * Very simple bitops for the boot code. */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 09578070bfb..a34b9982c7c 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/boot.h - * * Header file for the real-mode kernel code */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 680408a0f46..a1d35634bce 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cmdline.c - * * Simple command-line parser for early boot. */ diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef127e56a3c..ef50c84e8b4 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/copy.S - * * Memory copy routines */ diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 2462c88689e..7804389ee00 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cpucheck.c - * * Check for obligatory CPU features and abort if the features are not * present. This code should be compilable as 16-, 32- or 64-bit * code, so be very careful with types and inline assembly. diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 8721dc46a0b..d84a48ece78 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/edd.c - * * Get EDD BIOS disk information */ diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 88d77761d01..8d60ee15dfd 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -1,7 +1,5 @@ #!/bin/sh # -# arch/i386/boot/install.sh -# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 7828da5cfd0..77569a4a3be 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/main.c - * * Main module for the real-mode kernel code */ diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 68222f2d4b6..911eaae5d69 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/mca.c - * * Get the MCA system description table */ diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index e77d89f9e8a..acad32eb429 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/memory.c - * * Memory detection code */ diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index a93cb8bded4..328956fdb59 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pm.c - * * Prepare the machine for transition to protected mode. */ diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index f5402d51f7c..ab049d40a88 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pmjump.S - * * The actual transition into protected mode */ diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 7e7e890699b..c1d00c0274c 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/printf.c - * * Oh, it's a waste of space, but oh-so-yummy for debugging. This * version of printf() does not include 64-bit support. "Live with * it." diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 481a2209778..f94b7a0c2ab 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/string.c - * * Very basic string functions */ diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index f3f14bd2637..0be77b39328 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/tty.c - * * Very simple screen I/O * XXX: Probably should add very simple serial I/O? */ diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index c61462f7d9a..2723d9b5ce4 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/version.c - * * Kernel version string */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 39e247e9617..49f26aaaebc 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-bios.c - * * Standard video BIOS modes * * We have two options for this; silent and scanned. diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 5d5a3f6e8b5..401ad998ad0 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vesa.c - * * VESA text modes */ diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 330d6589a2a..40ecb8d7688 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vga.c - * * Common all-VGA modes */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index c1c47ba069e..83598b23093 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.c - * * Select video mode */ diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index d69347f79e8..ee63f5d1446 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.h - * * Header file for the real-mode video probing code */ diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c index 6499e3239b4..433909d61e5 100644 --- a/arch/x86/boot/voyager.c +++ b/arch/x86/boot/voyager.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/voyager.c - * * Get the Voyager config information */ diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d5..9366fb68d8d 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/cstate.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for SMP C-states on Intel CPUs diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 324eb0cab19..de2d2e4ebad 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/processor.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for platforms with Intel CPUs diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 9b7e01daa1c..1f4cc48c14c 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ba49a26dff..f0f8934fc30 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 826988a6e96..90f038af3ad 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. * * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index 710faf71a65..cef9cb1d15a 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/mach-visws/visws_apic.c - * * Copyright (C) 1999 Bent Hagemark, Ingo Molnar * * SGI Visual Workstation interrupt controller diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c index 6a949e4edde..46d6f806769 100644 --- a/arch/x86/mach-voyager/voyager_basic.c +++ b/arch/x86/mach-voyager/voyager_basic.c @@ -2,8 +2,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager.c - * * This file contains all the voyager specific routines for getting * initialisation of the architecture to function. For additional * features see: diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index 17a7904f75b..ecab9fff0fd 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_cat.c - * * This file contains all the logic for manipulating the CAT bus * in a level 5 machine. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index be7235bf105..96f60c7cd12 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_smp.c - * * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c index c69c931818e..15464a20fb3 100644 --- a/arch/x86/mach-voyager/voyager_thread.c +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_thread.c - * * This module provides the machine status monitor thread for the * voyager architecture. This allows us to monitor the machine * environment (temp, voltage, fan function) and the front panel and diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1500dc8d63e..9ec62da85fd 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds * diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3165ec0672b..6fb9e7c6893 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -1,7 +1,3 @@ -/* - * linux/arch/i386/mm/pgtable.c - */ - #include #include #include diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 48fb38d7d2c..4db42bff8c6 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -1,5 +1,4 @@ /* - * arch/i386/video/fbdev.c - i386 Framebuffer * * Copyright (C) 2007 Antonino Daplas * -- cgit v1.2.3 From 120d5bf128906c790df810e159d2e1239d08fef1 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Wed, 9 Apr 2008 22:53:50 +0200 Subject: x86: remove vm86.h inclusion from process_32.c I've made a small investigation about vm86.h inclusion rules and it looks like everything is more or less ok. Files that rely on asm/vm86.h symbols are: - kprobes.c - process_32.c - signal_32.c - traps_32.c - vm86_32.c File process_32.c includes vm86.h explicitly. We can remove that include and it won't break anything. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3903a8f2eb9..91e147b486d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -45,7 +45,6 @@ #include #include #include -#include #ifdef CONFIG_MATH_EMULATION #include #endif -- cgit v1.2.3 From 1a7a34af78923f8807d054a15133a8fcf47e385e Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Thu, 10 Apr 2008 13:40:57 +0200 Subject: x86: e820_64, fix section mismatch warning fix section mismatch warnings which occurs on my x86_64 box while compiling linux-next-20080410: Warning messages: WARNING: arch/x86/kernel/built-in.o(.text+0x7bc2): Section mismatch in reference from the function bad_addr() to the variable .init.data:early_res The function bad_addr() references the variable __initdata early_res. This is often because bad_addr lacks a __initdata annotation or the annotation of early_res is wrong. WARNING: arch/x86/kernel/built-in.o(.text+0x7c3b): Section mismatch in reference from the function bad_addr_size() to the variable .init.data:early_res The function bad_addr_size() references the variable __initdata early_res. This is often because bad_addr_size lacks a __initdata annotation or the annotation of early_res is wrong. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 7f6c0c85c8f..cbd42e51cb0 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; @@ -116,7 +116,7 @@ again: } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) { int i; -- cgit v1.2.3 From 4c8337ac425b220594fec45ad6d3ac76d3ce2b90 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 10 Apr 2008 15:09:50 -0700 Subject: x86: fix arch/x86/mm/ioremap.c warning Fix printk formats in x86/mm/ioremap.c: next-20080410/arch/x86/mm/ioremap.c:137: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'resource_size_t' next-20080410/arch/x86/mm/ioremap.c:188: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'resource_size_t' next-20080410/arch/x86/mm/ioremap.c:188: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'long unsigned int' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c590fd200e2..3a4baf95e24 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, if (!phys_addr_valid(phys_addr)) { printk(KERN_WARNING "ioremap: invalid physical address %llx\n", - phys_addr); + (unsigned long long)phys_addr); WARN_ON_ONCE(1); return NULL; } @@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, new_prot_val == _PAGE_CACHE_WB)) { pr_debug( "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", - phys_addr, phys_addr + size, + (unsigned long long)phys_addr, + (unsigned long long)(phys_addr + size), prot_val, new_prot_val); free_memtype(phys_addr, phys_addr + size); return NULL; -- cgit v1.2.3 From 7c53976404e2f906c60b69cc5793add87ee49c6a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 8 Apr 2008 12:54:30 +0200 Subject: x86: cleanup boot-heap usage The kernel decompressor wrapper uses memory located beyond the end of the image. This might lead to hard to debug problems, but even if it can be proven to be safe, it is at the very least unclean. I don't see any advantages either, unless you count it not being zeroed out as an advantage. This patch moves the boot-heap area to the bss segment. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_32.S | 15 +++++++++------ arch/x86/boot/compressed/head_64.S | 22 +++++++++++++--------- arch/x86/boot/compressed/misc.c | 8 +------- 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 036e635f18a..ba7736cf2ec 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -130,7 +130,7 @@ relocated: /* * Setup the stack for the decompressor */ - leal stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. @@ -142,8 +142,8 @@ relocated: pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data - leal _end(%ebx), %eax - pushl %eax # end of the image as third argument + leal boot_heap(%ebx), %eax + pushl %eax # heap area as third argument pushl %esi # real mode pointer as second arg call decompress_kernel addl $20, %esp @@ -181,7 +181,10 @@ relocated: jmp *%ebp .bss +/* Stack and heap for uncompression */ .balign 4 -stack: - .fill 4096, 1, 0 -stack_end: +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e8657b98c90..7a212a62db3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ startup_32: subl $1b, %ebp /* setup a stack and make sure cpu supports long mode. */ - movl $user_stack_end, %eax + movl $boot_stack_end, %eax addl %ebp, %eax movl %eax, %esp @@ -274,7 +275,7 @@ relocated: stosb /* Setup the stack */ - leaq user_stack_end(%rip), %rsp + leaq boot_stack_end(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -285,7 +286,7 @@ relocated: */ pushq %rsi # Save the real mode argument movq %rsi, %rdi # real mode address - leaq _heap(%rip), %rsi # _heap + leaq boot_heap(%rip), %rsi # malloc area for uncompression leaq input_data(%rip), %rdx # input_data movl input_len(%rip), %eax movq %rax, %rcx # input_len @@ -310,9 +311,12 @@ gdt: .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ gdt_end: - .bss -/* Stack for uncompression */ - .balign 4 -user_stack: - .fill 4096,4,0 -user_stack_end: + +.bss +/* Stack and heap for uncompression */ +.balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dad4e699f5a..90456cee47c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -217,12 +217,6 @@ static void putstr(const char *); static memptr free_mem_ptr; static memptr free_mem_end_ptr; -#ifdef CONFIG_X86_64 -#define HEAP_SIZE 0x7000 -#else -#define HEAP_SIZE 0x4000 -#endif - static char *vidmem; static int vidport; static int lines, cols; @@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ - free_mem_end_ptr = heap + HEAP_SIZE; + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; inbuf = input_data; /* Input buffer */ insize = input_len; inptr = 0; -- cgit v1.2.3 From 4a9f54cfd21f313b9858f951783512d3f14e58a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 10 Apr 2008 15:06:38 -0700 Subject: x86: cleanup: change _end to end_before_pgt cleanup: change the _end in compressed vmlinux_64.lds. also change _heap to _ebss that is not needed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/boot/compressed/vmlinux_64.lds | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 7a212a62db3..d8819efac81 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -244,9 +244,9 @@ ENTRY(startup_64) /* Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end(%rip), %r8 - leaq _end(%rbx), %r9 - movq $_end /* - $startup_32 */, %rcx + leaq _end_before_pgt(%rip), %r8 + leaq _end_before_pgt(%rbx), %r9 + movq $_end_before_pgt /* - $startup_32 */, %rcx 1: subq $8, %r8 subq $8, %r9 movq 0(%r8), %rax @@ -268,7 +268,7 @@ relocated: */ xorq %rax, %rax leaq _edata(%rbx), %rdi - leaq _end(%rbx), %rcx + leaq _end_before_pgt(%rbx), %rcx subq %rdi, %rcx cld rep diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds index 7e5c7209f6c..bef1ac891bc 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux_64.lds @@ -39,10 +39,10 @@ SECTIONS *(.bss.*) *(COMMON) . = ALIGN(8); - _end = . ; + _end_before_pgt = . ; . = ALIGN(4096); pgtable = . ; . = . + 4096 * 6; - _heap = .; + _ebss = .; } } -- cgit v1.2.3 From f5a1b191b37ac2609e2babeec1b21f411da93e4d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 12 Apr 2008 10:28:25 +0200 Subject: x86: fix exec mappings comments - noexec32 is on by default for years already - add noexec32 to kernel-parameters and fix noexec typo in there Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 9042fb0e36f..aee0e820077 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -74,8 +74,8 @@ int force_personality32 = 0; Control non executable heap for 32bit processes. To control the stack too use noexec=off -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC */ static int __init nonx32_setup(char *str) { -- cgit v1.2.3 From 4bd01600b214275a80a69b44393d7e81d43c2faa Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 19 Feb 2008 11:02:30 +0100 Subject: x86: clean up =0 initializations in arch/x86/kernel/tsc_32.c Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3d7e6e9fa6c..06af8cf8251 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); * if the CPU frequency is scaled, TSC-based delays will need a different * loops_per_jiffy value to function properly. */ -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long cpu_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -283,7 +283,7 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz = 0; +static unsigned long current_tsc_khz; static cycle_t read_tsc(void) { -- cgit v1.2.3 From 5deb45e39b946901ae028ccd3a1d0b35fa387475 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 19 Apr 2008 19:19:55 +0200 Subject: ftrace: add notrace annotations for NMI routines This annotates NMI functions with notrace. Some tracers may be able to live with this, but some cannot. The safest is to turn it off, it's not particularly interesting anyway. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi_32.c | 3 ++- arch/x86/kernel/nmi_64.c | 6 ++++-- arch/x86/kernel/traps_32.c | 12 ++++++------ arch/x86/kernel/traps_64.c | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 8421d0ac6f2..11b14bbaa61 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 11f9130ac51..5a29ded994f 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -313,7 +313,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { int sum; int touched = 0; @@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) static unsigned ignore_nmis; -asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 65791ca2824..dc4273010f2 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -681,7 +681,7 @@ gp_in_kernel: } } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG @@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) clear_mem_error(reason); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs *regs, long error_code) +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 79aa6fc0815..6d883b13ef4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err) oops_end(flags, regs, SIGSEGV); } -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; @@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, die("general protection fault", regs, error_code); } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", @@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); @@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; -- cgit v1.2.3 From 529e25f646e08901a6dad5768f681efffd77225e Mon Sep 17 00:00:00 2001 From: Erik Bosman Date: Mon, 14 Apr 2008 00:24:18 +0200 Subject: x86: implement prctl PR_GET_TSC and PR_SET_TSC This patch implements the PR_GET_TSC and PR_SET_TSC prctl() commands on the x86 platform (both 32 and 64 bit.) These commands control the ability to read the timestamp counter from userspace (the RDTSC instruction.) While the RDTSC instuction is a useful profiling tool, it is also the source of some non-determinism in ring-3. For deterministic replay applications it is useful to be able to trap and emulate (and record the outcome of) this instruction. This patch uses code earlier used to disable the timestamp counter for the SECCOMP framework. A side-effect of this patch is that the SECCOMP environment will now also disable the timestamp counter on x86_64 due to the addition of the TIF_NOTSC define on this platform. The code which enables/disables the RDTSC instruction during context switches is in the __switch_to_xtra function, which already handles other unusual conditions, so normal performance should not have to suffer from this change. Signed-off-by: Erik Bosman Acked-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 43 +++++++++++++++++++++++++--- arch/x86/kernel/process_64.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 91e147b486d..a3790a3f8a8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -523,11 +524,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { write_cr4(read_cr4() | X86_CR4_TSD); } + void disable_TSC(void) { preempt_disable(); @@ -539,11 +540,47 @@ void disable_TSC(void) hard_disable_TSC(); preempt_enable(); } + static void hard_enable_TSC(void) { write_cr4(read_cr4() & ~X86_CR4_TSD); } -#endif /* CONFIG_SECCOMP */ + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, @@ -577,7 +614,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_debugreg(next->debugreg7, 7); } -#ifdef CONFIG_SECCOMP if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ @@ -586,7 +622,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else hard_enable_TSC(); } -#endif #ifdef X86_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e75ccc8a2b8..4c13b1406c7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -535,6 +536,64 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + /* * This special macro can be used to load a debugging register */ @@ -572,6 +631,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, loaddebug(next, 7); } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. -- cgit v1.2.3 From d8bb6f4c1670c8324e4135c61ef07486f7f17379 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Apr 2008 19:45:18 +0200 Subject: x86: tsc prevent time going backwards We already catch most of the TSC problems by sanity checks, but there is a subtle bug which has been in the code forever. This can cause time jumps in the range of hours. This was reported in: http://lkml.org/lkml/2007/8/23/96 and http://lkml.org/lkml/2008/3/31/23 I was able to reproduce the problem with a gettimeofday loop test on a dual core and a quad core machine which both have sychronized TSCs. The TSCs seems not to be perfectly in sync though, but the kernel is not able to detect the slight delta in the sync check. Still there exists an extremly small window where this delta can be observed with a real big time jump. So far I was only able to reproduce this with the vsyscall gettimeofday implementation, but in theory this might be observable with the syscall based version as well. CPU 0 updates the clock source variables under xtime/vyscall lock and CPU1, where the TSC is slighty behind CPU0, is reading the time right after the seqlock was unlocked. The clocksource reference data was updated with the TSC from CPU0 and the value which is read from TSC on CPU1 is less than the reference data. This results in a huge delta value due to the unsigned subtraction of the TSC value and the reference value. This algorithm can not be changed due to the support of wrapping clock sources like pm timer. The huge delta is converted to nanoseconds and added to xtime, which is then observable by the caller. The next gettimeofday call on CPU1 will show the correct time again as now the TSC has advanced above the reference value. To prevent this TSC specific wreckage we need to compare the TSC value against the reference value and return the latter when it is larger than the actual TSC value. I pondered to mark the TSC unstable when the readout is smaller than the reference value, but this would render an otherwise good and fast clocksource unusable without a real good reason. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_32.c | 15 ++++++++++++++- arch/x86/kernel/tsc_64.c | 23 ++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 06af8cf8251..e4790728b22 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -284,14 +284,27 @@ core_initcall(cpufreq_tsc); /* clock source code */ static unsigned long current_tsc_khz; +static struct clocksource clocksource_tsc; +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp issue. This can be observed in + * a very small window right after one CPU updated cycle_last under + * xtime lock and the other CPU reads a TSC value which is smaller + * than the cycle_last reference value due to a TSC which is slighty + * behind. This delta is nowhere else observable, but in that case it + * results in a forward time jump in the range of hours due to the + * unsigned delta calculation of the time keeping core code, which is + * necessary to support wrapping clocksources like pm timer. + */ static cycle_t read_tsc(void) { cycle_t ret; rdtscll(ret); - return ret; + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index ceeba01e7f4..fcc16e58609 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -11,6 +11,7 @@ #include #include #include +#include static int notsc __initdata = 0; @@ -287,18 +288,34 @@ int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +static struct clocksource clocksource_tsc; -/* clock source code: */ +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ static cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles(); - return ret; + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); - return ret; + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; } static struct clocksource clocksource_tsc = { -- cgit v1.2.3 From fa5c4639419668cbb18ca3d20c1253559a3b43ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 16 Apr 2008 02:29:42 +0200 Subject: x86: rename find_max_pfn() to propagate_e820_map() this function doesnt just 'find' the max_pfn - it also has other side-effects such as registering sparse memory maps. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_32.c | 4 ++-- arch/x86/kernel/setup_32.c | 4 ++-- arch/x86/mm/discontig_32.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 0240cd77836..ed733e7cf4e 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map) /* * Find the highest page frame number we have available */ -void __init find_max_pfn(void) +void __init propagate_e820_map(void) { int i; @@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg) * size before original memory map is * reset. */ - find_max_pfn(); + propagate_e820_map(); saved_max_pfn = max_pfn; #endif e820.nr_map = 0; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5b0bffb7fcc..1c4799e6871 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* update e820 for memory not covered by WB MTRRs */ - find_max_pfn(); + propagate_e820_map(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - find_max_pfn(); + propagate_e820_map(); max_low_pfn = setup_memory(); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index eba0bbede7a..18378850e25 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void) printk("NUMA - single node, flat memory mode\n"); /* Run the memory configuration and find the top of memory. */ - find_max_pfn(); + propagate_e820_map(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; memory_present(0, 0, max_pfn); @@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void) /* * Find the highest page frame number we have available for the node */ -static void __init find_max_pfn_node(int nid) +static void __init propagate_e820_map_node(int nid) { if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; @@ -379,7 +379,7 @@ unsigned long __init setup_memory(void) printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); for_each_online_node(nid) - find_max_pfn_node(nid); + propagate_e820_map_node(nid); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); NODE_DATA(0)->bdata = &node0_bdata; -- cgit v1.2.3 From 61c4628b538608c1a85211ed8438136adfeb9a95 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Mar 2008 15:28:04 -0700 Subject: x86, fpu: split FPU state from task struct - v5 Split the FPU save area from the task struct. This allows easy migration of FPU context, and it's generally cleaner. It also allows the following two optimizations: 1) only allocate when the application actually uses FPU, so in the first lazy FPU trap. This could save memory for non-fpu using apps. Next patch does this lazy allocation. 2) allocate the right size for the actual cpu rather than 512 bytes always. Patches enabling xsave/xrstor support (coming shortly) will take advantage of this. Signed-off-by: Suresh Siddha Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/i387.c | 80 ++++++++++++++++++++++++------------------ arch/x86/kernel/process.c | 35 ++++++++++++++++++ arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/traps_32.c | 6 +--- arch/x86/kernel/traps_64.c | 6 +++- arch/x86/math-emu/fpu_entry.c | 4 +-- arch/x86/math-emu/fpu_system.h | 26 +++++++------- arch/x86/math-emu/reg_ld_str.c | 4 +-- 10 files changed, 107 insertions(+), 59 deletions(-) create mode 100644 arch/x86/kernel/process.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c3920ea8ac5..7a2a2e93e84 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-y += process.o obj-y += i387.o obj-y += ptrace.o obj-y += ds.o diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8f8102d967b..baf632b221d 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,17 +36,18 @@ #endif static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +static struct i387_fxsave_struct fx_scratch __cpuinitdata; -void mxcsr_feature_mask_init(void) +void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; clts(); if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : : "m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; } @@ -53,6 +55,17 @@ void mxcsr_feature_mask_init(void) stts(); } +void __init init_thread_xstate(void) +{ + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); +#ifdef CONFIG_X86_32 + else + xstate_size = sizeof(struct i387_fsave_struct); +#endif + init_task.thread.xstate = alloc_bootmem(xstate_size); +} + #ifdef CONFIG_X86_64 /* * Called at bootup to set up the initial FPU state that is later cloned @@ -61,10 +74,6 @@ void mxcsr_feature_mask_init(void) void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); - extern void __bad_fxsave_alignment(void); - - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) - __bad_fxsave_alignment(); set_in_cr4(X86_CR4_OSFXSR); set_in_cr4(X86_CR4_OSXMMEXCPT); @@ -93,18 +102,19 @@ void init_fpu(struct task_struct *tsk) } if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; + fx->mxcsr = MXCSR_DEFAULT; } else { - memset(&tsk->thread.i387.fsave, 0, - sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; + memset(fp, 0, xstate_size); + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; } /* * Only the device not available exception or ptrace can call init_fpu. @@ -132,7 +142,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, init_fpu(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -148,12 +158,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; return ret; } @@ -233,7 +243,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) static void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -273,7 +283,7 @@ static void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -310,7 +320,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, + -1); } if (kbuf && pos == 0 && count == sizeof(env)) { @@ -338,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, -1); } if (pos > 0 || count < sizeof(env)) @@ -358,11 +369,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; unlazy_fpu(tsk); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if (__copy_to_user(buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct))) + fp->status = fp->swd; + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; return 1; } @@ -370,6 +381,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; struct user_i387_ia32_struct env; int err = 0; @@ -379,12 +391,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (__copy_to_user(buf, &env, sizeof(env))) return -1; - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); + err |= __put_user(fx->swd, &buf->status); err |= __put_user(X86_FXSR_MAGIC, &buf->magic); if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + if (__copy_to_user(&buf->_fxsr_env[0], fx, sizeof(struct i387_fxsave_struct))) return -1; return 1; @@ -417,7 +429,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; clear_fpu(tsk); - return __copy_from_user(&tsk->thread.i387.fsave, buf, + return __copy_from_user(&tsk->thread.xstate->fsave, buf, sizeof(struct i387_fsave_struct)); } @@ -428,10 +440,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int err; clear_fpu(tsk); - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) return 1; convert_to_fxsr(tsk, &env); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c new file mode 100644 index 00000000000..ead24efbcba --- /dev/null +++ b/arch/x86/kernel/process.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *task_xstate_cachep; + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + *dst = *src; + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + return 0; +} + +void free_thread_info(struct thread_info *ti) +{ + kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); + ti->task->thread.xstate = NULL; + + free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); +} + +void arch_task_cache_init(void) +{ + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC, NULL); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3790a3f8a8..3890a5dd25f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -703,7 +703,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c13b1406c7..b795e831afd 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -682,7 +682,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter>5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0, LDT and the page table pointer: diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index dc4273010f2..8d136a73ce8 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1208,11 +1208,6 @@ void __init trap_init(void) #endif set_trap_gate(19, &simd_coprocessor_error); - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generate a build-time error if the alignment is wrong. - */ - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); @@ -1233,6 +1228,7 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 6d883b13ef4..dc0cb497eec 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1128,7 +1128,7 @@ asmlinkage void math_state_restore(void) if (!used_math()) init_fpu(me); - restore_fpu_checking(&me->thread.i387.fxsave); + restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } @@ -1163,6 +1163,10 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif + /* + * initialize the per thread extended state: + */ + init_thread_xstate(); /* * Should be a barrier for any external CPU state. */ diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 4bab3b14539..6e38d877ea7 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; @@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index a3ae28c49dd..13488fa153e 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -35,8 +35,8 @@ #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ == (1 << 10)) -#define I387 (current->thread.i387) -#define FPU_info (I387.soft.info) +#define I387 (current->thread.xstate) +#define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) #define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) @@ -46,25 +46,25 @@ #define FPU_EIP (FPU_info->___eip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) -#define FPU_lookahead (I387.soft.lookahead) +#define FPU_lookahead (I387->soft.lookahead) /* nz if ip_offset and cs_selector are not to be set for the current instruction. */ -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) -#define FPU_rm (*(u_char *)&(I387.soft.rm)) +#define no_ip_update (*(u_char *)&(I387->soft.no_update)) +#define FPU_rm (*(u_char *)&(I387->soft.rm)) /* Number of bytes of data which can be legally accessed by the current instruction. This only needs to hold a number <= 108, so a byte will do. */ -#define access_limit (*(u_char *)&(I387.soft.alimit)) +#define access_limit (*(u_char *)&(I387->soft.alimit)) -#define partial_status (I387.soft.swd) -#define control_word (I387.soft.cwd) -#define fpu_tag_word (I387.soft.twd) -#define registers (I387.soft.st_space) -#define top (I387.soft.ftop) +#define partial_status (I387->soft.swd) +#define control_word (I387->soft.cwd) +#define fpu_tag_word (I387->soft.twd) +#define registers (I387->soft.st_space) +#define top (I387->soft.ftop) -#define instruction_address (*(struct address *)&I387.soft.fip) -#define operand_address (*(struct address *)&I387.soft.foo) +#define instruction_address (*(struct address *)&I387->soft.fip) +#define operand_address (*(struct address *)&I387->soft.foo) #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ math_abort(FPU_info,SIGSEGV) diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index 02af772a24d..d597fe7423c 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) control_word |= 0xffff0040; partial_status = status_word() | 0xffff0000; fpu_tag_word |= 0xffff0000; - I387.soft.fcs &= ~0xf8000000; - I387.soft.fos |= 0xffff0000; + I387->soft.fcs &= ~0xf8000000; + I387->soft.fos |= 0xffff0000; #endif /* PECULIAR_486 */ if (__copy_to_user(d, &control_word, 7 * 4)) FPU_abort; -- cgit v1.2.3 From aa283f49276e7d840a40fb01eee6de97eaa7e012 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Mar 2008 15:28:05 -0700 Subject: x86, fpu: lazy allocation of FPU area - v5 Only allocate the FPU area when the application actually uses FPU, i.e., in the first lazy FPU trap. This could save memory for non-fpu using apps. for example: on my system after boot, there are around 300 processes, with only 17 using FPU. Signed-off-by: Suresh Siddha Cc: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i387.c | 38 ++++++++++++++++++++++++++++++-------- arch/x86/kernel/process.c | 28 +++++++++++++++++++--------- arch/x86/kernel/process_32.c | 4 ++++ arch/x86/kernel/process_64.c | 4 ++++ arch/x86/kernel/traps_32.c | 17 +++++++++++++++-- arch/x86/kernel/traps_64.c | 19 ++++++++++++++++--- 6 files changed, 88 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index baf632b221d..db6839b5319 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -63,7 +62,6 @@ void __init init_thread_xstate(void) else xstate_size = sizeof(struct i387_fsave_struct); #endif - init_task.thread.xstate = alloc_bootmem(xstate_size); } #ifdef CONFIG_X86_64 @@ -93,12 +91,22 @@ void __cpuinit fpu_init(void) * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ -void init_fpu(struct task_struct *tsk) +int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if (tsk == current) unlazy_fpu(tsk); - return; + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + if (!tsk->thread.xstate) { + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!tsk->thread.xstate) + return -ENOMEM; } if (cpu_has_fxsr) { @@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk) * Only the device not available exception or ptrace can call init_fpu. */ set_stopped_child_used_math(tsk); + return 0; } int fpregs_active(struct task_struct *target, const struct user_regset *regset) @@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + int ret; + if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fxsave, 0, -1); @@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct user_i387_ia32_struct env; + int ret; if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); if (!cpu_has_fxsr) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ead24efbcba..0e613e7e7b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -5,24 +5,34 @@ #include #include -static struct kmem_cache *task_xstate_cachep; +struct kmem_cache *task_xstate_cachep; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!dst->thread.xstate) - return -ENOMEM; - WARN_ON((unsigned long)dst->thread.xstate & 15); - memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + if (src->thread.xstate) { + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + } return 0; } -void free_thread_info(struct thread_info *ti) +void free_thread_xstate(struct task_struct *tsk) { - kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); - ti->task->thread.xstate = NULL; + if (tsk->thread.xstate) { + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); + tsk->thread.xstate = NULL; + } +} + +void free_thread_info(struct thread_info *ti) +{ + free_thread_xstate(ti->task); free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3890a5dd25f..7adad088e37 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -521,6 +521,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b795e831afd..891af1a1b48 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -533,6 +533,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ss = __USER_DS; regs->flags = 0x200; set_fs(USER_DS); + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8d136a73ce8..471e694d671 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + clts(); /* Allow maths ops (or we recurse) */ - if (!tsk_used_math(tsk)) - init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index dc0cb497eec..adff76ea97c 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1124,10 +1124,23 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) asmlinkage void math_state_restore(void) { struct task_struct *me = current; - clts(); /* Allow maths ops (or we recurse) */ - if (!used_math()) - init_fpu(me); + if (!used_math()) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(me)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + clts(); /* Allow maths ops (or we recurse) */ restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; -- cgit v1.2.3 From 1679f2710ac58df580d3716fab1f42ae50a226eb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 16 Apr 2008 10:27:53 +0200 Subject: x86: fpu xstate split cleanup Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0e613e7e7b5..3004d716539 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -29,11 +29,10 @@ void free_thread_xstate(struct task_struct *tsk) } } - void free_thread_info(struct thread_info *ti) { free_thread_xstate(ti->task); - free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); + free_pages((unsigned long)ti, get_order(THREAD_SIZE)); } void arch_task_cache_init(void) -- cgit v1.2.3 From 8705a49c35be088a50b8d5fc5e1aa24d6711fd5b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 14 Apr 2008 12:19:30 -0700 Subject: x86 vDSO: compile with -g, 64-bit The 64-bit vDSO's sources are compiled with -g0 for no good reason. Using -g when enabled lets their separate debug files be used at runtime via build ID matching, same as we can see 32-bit vDSO's assembly sources. Signed-off-by: Roland McGrath Acked-by: Sam Ravnborg Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/vdso/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 17a6b057856..b7ad9f89d21 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -37,7 +37,8 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(vobjs): KBUILD_CFLAGS += $(CFL) -- cgit v1.2.3 From 6ec6e0d9f2fd7cb6ca6bc3bfab5ae7b5cdd8c36f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 25 Mar 2008 10:14:35 -0700 Subject: srat, x86: add support for nodes spanning other nodes For example, If the physical address layout on a two node system with 8 GB memory is something like: node 0: 0-2GB, 4-6GB node 1: 2-4GB, 6-8GB Current kernels fail to boot/detect this NUMA topology. ACPI SRAT tables can expose such a topology which needs to be supported. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 9 +++++++++ arch/x86/mm/k8topology_64.c | 2 +- arch/x86/mm/numa_64.c | 16 +++++++++++----- arch/x86/mm/srat_64.c | 32 +++++++++++++++++++++----------- 4 files changed, 42 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a59dbb2824..07cf7711356 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA help Enable ACPI SRAT based node topology detection. +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on X86_64_ACPI_NUMA + config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 7a2ebce87df..86808e666f9 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) if (!found) return -1; - memnode_shift = compute_hash_shift(nodes, 8); + memnode_shift = compute_hash_shift(nodes, 8, NULL); if (memnode_shift < 0) { printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); return -1; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2ea56f48f29..cb317018635 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size; * -1 if node overlap or lost ram (shift too big) */ static int __init populate_memnodemap(const struct bootnode *nodes, - int numnodes, int shift) + int numnodes, int shift, int *nodeids) { unsigned long addr, end; int i, res = -1; @@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes, do { if (memnodemap[addr >> shift] != NUMA_NO_NODE) return -1; - memnodemap[addr >> shift] = i; + + if (!nodeids) + memnodemap[addr >> shift] = i; + else + memnodemap[addr >> shift] = nodeids[i]; + addr += (1UL << shift); } while (addr < end); res = 1; @@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, return i; } -int __init compute_hash_shift(struct bootnode *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes, + int *nodeids) { int shift; @@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); - if (populate_memnodemap(nodes, numnodes, shift) != 1) { + if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { printk(KERN_INFO "Your memory is not aligned you need to " "rebuild your kernel with a bigger NODEMAPSIZE " "shift=%d\n", shift); @@ -462,7 +468,7 @@ done: } } out: - memnode_shift = compute_hash_shift(nodes, num_nodes); + memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); if (memnode_shift < 0) { memnode_shift = 0; printk(KERN_ERR "No NUMA hash function found. NUMA emulation " diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1bae9c855ce..fb43d89f46f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; int hotadd_percent __initdata = 0; +static int num_node_memblks __initdata; +static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; +static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; + /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ #define NODE_MIN_SIZE (4*1024*1024) @@ -41,17 +45,17 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -static __init int conflicting_nodes(unsigned long start, unsigned long end) +static __init int conflicting_memblks(unsigned long start, unsigned long end) { int i; - for_each_node_mask(i, nodes_parsed) { - struct bootnode *nd = &nodes[i]; + for (i = 0; i < num_node_memblks; i++) { + struct bootnode *nd = &node_memblk_range[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return i; + return memblk_nodeid[i]; if (nd->end == end && nd->start == start) - return i; + return memblk_nodeid[i]; } return -1; } @@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) bad_srat(); return; } - i = conflicting_nodes(start, end); + i = conflicting_memblks(start, end); if (i == node) { printk(KERN_WARNING "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", @@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) nd->end = end; } - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - nd->start, nd->end); - e820_register_active_regions(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); + printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, + start, end); + e820_register_active_regions(node, start >> PAGE_SHIFT, + end >> PAGE_SHIFT); push_node_boundaries(node, nd->start >> PAGE_SHIFT, nd->end >> PAGE_SHIFT); @@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); } + + node_memblk_range[num_node_memblks].start = start; + node_memblk_range[num_node_memblks].end = end; + memblk_nodeid[num_node_memblks] = node; + num_node_memblks++; } /* Sanity check to catch more bad SRATs (they are amazingly common). @@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, + memblk_nodeid); if (memnode_shift < 0) { printk(KERN_ERR "SRAT: No NUMA node hash function found. Contact maintainer\n"); -- cgit v1.2.3 From 752bea4abbff5e3ffef36802b860e80d0b632990 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 7 Mar 2008 15:02:50 -0800 Subject: x86: reserve dma32 early for gart a system with 256 GB of RAM, when NUMA is disabled crashes the following way: Your BIOS doesn't leave a aperture memory hole Please enable the IOMMU option in the BIOS setup This costs you 64 MB of RAM Cannot allocate aperture memory hole (ffff8101c0000000,65536K) Kernel panic - not syncing: Not enough memory for aperture Pid: 0, comm: swapper Not tainted 2.6.25-rc4-x86-latest.git #33 Call Trace: [] panic+0xb2/0x190 [] ? release_console_sem+0x7c/0x250 [] ? __alloc_bootmem_nopanic+0x48/0x90 [] ? free_bootmem+0x29/0x50 [] gart_iommu_hole_init+0x5e7/0x680 [] ? alloc_large_system_hash+0x16b/0x310 [] ? _etext+0x0/0x1 [] pci_iommu_alloc+0x1c/0x40 [] mem_init+0x45/0x1a0 [] start_kernel+0x295/0x380 [] _sinittext+0x1c2/0x230 the root cause is : memmap PMD is too big, [ffffe200e0600000-ffffe200e07fffff] PMD ->ffff81383c000000 on node 0 almost near 4G..., and vmemmap_alloc_block will use up the ram under 4G. solution will be: 1. make memmap allocation get memory above 4G... 2. reserve some dma32 range early before we try to set up memmap for all. and release that before pci_iommu_alloc, so gart or swiotlb could get some range under 4g limit for sure. the patch is using method 2. because method1 may need more code to handle SPARSEMEM and SPASEMEM_VMEMMAP will get Your BIOS doesn't leave a aperture memory hole Please enable the IOMMU option in the BIOS setup This costs you 64 MB of RAM Mapping aperture over 65536 KB of RAM @ 4000000 Memory: 264245736k/268959744k available (8484k kernel code, 4187464k reserved, 4004k data, 724k init) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 2 ++ 2 files changed, 51 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index ada5a060499..e4fffaabe53 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -286,8 +288,55 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + void __init pci_iommu_alloc(void) { + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); /* * The order of these functions is important for * fall-back/fail-over reasons diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 0aa291bff4e..6b8e11f0c15 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -398,6 +398,8 @@ void __init setup_arch(char **cmdline_p) early_res_to_bootmem(); + dma32_reserve_bootmem(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. -- cgit v1.2.3 From 22456b97148be300e25e9cb97244656775972475 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:21 -0300 Subject: x86: implement dma_map_single through dma_ops That's already the name of the game for x86_64. For i386, we add a pci-base_32.c, that will hold the default operations. The function call itself goes through dma-mapping.h , the common header Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/pci-base_32.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 arch/x86/kernel/pci-base_32.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7a2a2e93e84..edd5c54ffde 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,6 +26,7 @@ obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o obj-y += quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o +obj-$(CONFIG_X86_32) += pci-base_32.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c new file mode 100644 index 00000000000..b613d735f76 --- /dev/null +++ b/arch/x86/kernel/pci-base_32.c @@ -0,0 +1,20 @@ +#include +#include +#include +#include +#include + +static dma_addr_t pci32_map_single(struct device *dev, void *ptr, + size_t size, int direction) +{ + WARN_ON(size == 0); + flush_write_buffers(); + return virt_to_phys(ptr); +} + +static const struct dma_mapping_ops pci32_dma_ops = { + .map_single = pci32_map_single, +}; + +const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; +EXPORT_SYMBOL(dma_ops); -- cgit v1.2.3 From 0cb0ae68323657663e4e8c0c1ce82a5af6621bbb Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:22 -0300 Subject: x86: move dma_unmap_single to common header i386 base does not need it, so it gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index b613d735f76..a8a7c7f2d23 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -14,6 +14,7 @@ static dma_addr_t pci32_map_single(struct device *dev, void *ptr, static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, + .unmap_single = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 16a3ce9bae667178f79a4951fc0ba8b515b5b733 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:23 -0300 Subject: x86: move dma_map_sg to common header the old i386 implementation is moved to pci-base_32.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index a8a7c7f2d23..24741525901 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -12,9 +12,28 @@ static dma_addr_t pci32_map_single(struct device *dev, void *ptr, return virt_to_phys(ptr); } +static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, + int nents, int direction) +{ + struct scatterlist *sg; + int i; + + WARN_ON(nents == 0 || sglist[0].length == 0); + + for_each_sg(sglist, sg, nents, i) { + BUG_ON(!sg_page(sg)); + + sg->dma_address = sg_phys(sg); + } + + flush_write_buffers(); + return nents; +} + static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, + .map_sg = pci32_dma_map_sg, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 72c784f82c378df1903676acd2efc5eeb5cac579 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:24 -0300 Subject: x86: move dma_unmap_sg to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 24741525901..920530438d8 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -34,6 +34,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, .map_sg = pci32_dma_map_sg, + .unmap_sg = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From c01dd8cf7d19b869af1668c80a34a955c871f607 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:25 -0300 Subject: x86: move dma_sync_single_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 920530438d8..dce03c81bb2 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -35,6 +35,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .unmap_single = NULL, .map_sg = pci32_dma_map_sg, .unmap_sg = NULL, + .sync_single_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 9231b269e09ed60910c159cf668f887623b7ac58 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:26 -0300 Subject: x86: move dma_sync_single_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index dce03c81bb2..36488245e36 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -36,6 +36,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .map_sg = pci32_dma_map_sg, .unmap_sg = NULL, .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 627610fcb70164991ed0d11110a56c43b15b9312 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:27 -0300 Subject: x86: move dma_sync_single_range_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 36488245e36..c501599a450 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -37,6 +37,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .unmap_sg = NULL, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 713623326c816b145105769f174ec237815e53f1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:28 -0300 Subject: x86: move dma_sync_single_range_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index c501599a450..4512c307b60 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -38,6 +38,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From ed435dee9cb470082e4550edbfcbc7e81132e976 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:29 -0300 Subject: x86: move dma_sync_sg_for_cpu to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 4512c307b60..d876600aaeb 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -39,6 +39,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_for_device = NULL, .sync_single_range_for_cpu = NULL, .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From e7f3a913f91b7bfef3a93dff27930f24bdfcd2c0 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:30 -0300 Subject: x86: move dma_sync_sg_for_device to common header i386 gets an empty function. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index d876600aaeb..033d94ec500 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -40,6 +40,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_range_for_cpu = NULL, .sync_single_range_for_device = NULL, .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 2be621498d461b63ca6124f86e3b9582e1a8e722 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Apr 2008 19:19:56 +0200 Subject: x86: dma-ops on highmem fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 4 ++-- arch/x86/kernel/pci-calgary_64.c | 3 ++- arch/x86/kernel/pci-dma_64.c | 2 +- arch/x86/kernel/pci-gart_64.c | 15 +++++++-------- arch/x86/kernel/pci-nommu_64.c | 4 ++-- arch/x86/kernel/pci-swiotlb_64.c | 9 ++++++++- 6 files changed, 22 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 033d94ec500..cf4bb28dfc6 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -4,12 +4,12 @@ #include #include -static dma_addr_t pci32_map_single(struct device *dev, void *ptr, +static dma_addr_t pci32_map_single(struct device *dev, phys_addr_t ptr, size_t size, int direction) { WARN_ON(size == 0); flush_write_buffers(); - return virt_to_phys(ptr); + return ptr; } static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1b5464c2434..adb91e4b62d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -470,10 +470,11 @@ error: return 0; } -static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, +static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t dma_handle = bad_dma_address; + void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e4fffaabe53..f97a08d0a8f 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -141,7 +141,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, memory, + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 700e4647dd3..c07455d1695 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, } static dma_addr_t -gart_map_simple(struct device *dev, char *buf, size_t size, int dir) +gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + dma_addr_t map = dma_map_area(dev, paddr, size, dir); flush_gart(); @@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir) /* Map a single area into the IOMMU */ static dma_addr_t -gart_map_single(struct device *dev, void *addr, size_t size, int dir) +gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - unsigned long phys_mem, bus; + unsigned long bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); - if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + if (!need_iommu(dev, paddr, size)) + return paddr; - bus = gart_map_simple(dev, addr, size, dir); + bus = gart_map_simple(dev, paddr, size, dir); return bus; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index ab08e183222..6e330769d01 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -26,10 +26,10 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) } static dma_addr_t -nommu_map_single(struct device *hwdev, void *ptr, size_t size, +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t bus = virt_to_bus(ptr); + dma_addr_t bus = paddr; if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; return bus; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82a0a674a00..490da7f4b8d 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -11,11 +11,18 @@ int swiotlb __read_mostly; +static dma_addr_t +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); +} + const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, + .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, -- cgit v1.2.3 From 802c1f6648aeb3eea670b4ef8b10014169b65699 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:34 -0300 Subject: x86: move dma_supported and dma_set_mask to pci-dma_32.c This is the way x86_64 does, so this make them equal. They have to be extern now in the header, and the extern definition is moved to the common dma-mapping.h header. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 51330321a5d..453b4bda271 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -156,6 +156,39 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); int forbid_dac; EXPORT_SYMBOL(forbid_dac); +int +dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + /* Work around chipset bugs */ + if (forbid_dac > 0 && mask > 0xffffffffULL) + return 0; + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + return 1; +} + +int +dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} + + static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { -- cgit v1.2.3 From 7c18341665917b493fa40eeb3c7ff6c1a5ac47db Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:36 -0300 Subject: x86: provide a bad_dma_address symbol for i386 It's initially 0, since we don't expect any DMA there. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 453b4bda271..55ab3c874d8 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,6 +14,10 @@ #include #include +/* For i386, we make it point to the NULL address */ +dma_addr_t bad_dma_address __read_mostly = 0x0; +EXPORT_SYMBOL(bad_dma_address); + struct dma_coherent_mem { void *virt_base; u32 device_base; -- cgit v1.2.3 From c786df08f6df2833e34e78cee5ef62558e3b5346 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 25 Mar 2008 18:36:37 -0300 Subject: x86: unify dma_mapping_error We provide a map_error function in pci-base_32.c to make sure i386 keeps with the same behaviour it used to. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index cf4bb28dfc6..7caf5c211f2 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -30,6 +30,12 @@ static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, return nents; } +/* Make sure we keep the same behaviour */ +static int pci32_map_error(dma_addr_t dma_addr) +{ + return 0; +} + static const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, @@ -41,6 +47,7 @@ static const struct dma_mapping_ops pci32_dma_ops = { .sync_single_range_for_device = NULL, .sync_sg_for_cpu = NULL, .sync_sg_for_device = NULL, + .mapping_error = pci32_map_error, }; const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -- cgit v1.2.3 From 19e395afb44746ce7422a9eabcf883d5eec2bb80 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 27 Mar 2008 11:03:15 +0000 Subject: x86: move dma_supported and dma_set_mask to pci-dma_32.c, fix ERROR: "dma_supported" [drivers/ssb/ssb.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/qla2xxx/qla2xxx.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/aic7xxx/aic7xxx.ko] undefined! ERROR: "dma_set_mask" [drivers/scsi/aic7xxx/aic79xx.ko] undefined! ERROR: "dma_supported" [drivers/net/pcnet32.ko] undefined! ERROR: "dma_supported" [drivers/media/video/saa7134/saa7134.ko] undefined! ERROR: "dma_set_mask" [drivers/media/video/meye.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx8802.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx8800.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx88/cx88-alsa.ko] undefined! ERROR: "dma_supported" [drivers/media/video/cx23885/cx23885.ko] undefined! They just need to be exported like on x86_64. dma_supported() and dma_set_mask() were previously inlined, but are now moved to pci-dma_32.c. Since they're used by various drivers, they need to be exported. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 55ab3c874d8..be6b1f6aa1a 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -180,6 +180,7 @@ dma_supported(struct device *dev, u64 mask) return 1; } +EXPORT_SYMBOL(dma_supported); int dma_set_mask(struct device *dev, u64 mask) @@ -191,6 +192,7 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +EXPORT_SYMBOL(dma_set_mask); static __devinit void via_no_dac(struct pci_dev *dev) -- cgit v1.2.3 From 459121c9ec1e6c5d701f6520f4170719ac008951 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:43 -0300 Subject: x86: introduce pci-dma.c This patch introduces pci-dma.c, a common file for pci dma between i386 and x86_64. As a start, dma_set_mask() is the same between architectures, and is placed there. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pci-dma.c | 14 ++++++++++++++ arch/x86/kernel/pci-dma_32.c | 12 ------------ arch/x86/kernel/pci-dma_64.c | 9 --------- 4 files changed, 15 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/pci-dma.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index edd5c54ffde..1799f76a6a9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o -obj-y += quirks.o i8237.o topology.o kdebugfs.o +obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o obj-$(CONFIG_X86_32) += pci-base_32.o diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c new file mode 100644 index 00000000000..f1c24d8e794 --- /dev/null +++ b/arch/x86/kernel/pci-dma.c @@ -0,0 +1,14 @@ +#include + +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + + diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index be6b1f6aa1a..9e8297657c3 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -182,18 +182,6 @@ dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int -dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - *dev->dma_mask = mask; - - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - static __devinit void via_no_dac(struct pci_dev *dev) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index f97a08d0a8f..e697b865c1a 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -213,15 +213,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - /* * See for the iommu kernel parameter * documentation. -- cgit v1.2.3 From d5df63f48a67400a26eba15624aa883897a4f4d1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:44 -0300 Subject: x86: delete empty functions from pci-nommu_64.c This functions are now called conditionally on their existence in the struct. So just delete them, instead of keeping an empty implementation. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 6e330769d01..90a7c40aa98 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -35,10 +35,6 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, return bus; } -static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction) -{ -} /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the @@ -71,20 +67,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) -{ -} - const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, - .unmap_single = nommu_unmap_single, .map_sg = nommu_map_sg, - .unmap_sg = nommu_unmap_sg, .is_phys = 1, }; -- cgit v1.2.3 From 9f9ab46d557c32b9cad49c31d094d659ec3b59c0 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:45 -0300 Subject: x86: implement mapping_error in pci-nommu_64.c This patch implements mapping_error for pci-nommu_64.c. It takes care to keep the same compatible behaviour it already had. Although this file is not (yet) used for i386, we introduce the i386 version here. Again, care is taken, even at the expense of an ifdef, to keep the same behaviour inconditionally. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 90a7c40aa98..a4e8ccfae4c 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -67,9 +67,21 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif +} + + const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, + .mapping_error = nommu_mapping_error, .is_phys = 1, }; -- cgit v1.2.3 From e4dcdd6b4fa33efee94e89cccd75e871c570c510 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:46 -0300 Subject: x86: Add flush_write_buffers in nommu functions This patch adds flush_write_buffers() in some functions of pci-nommu_64.c They are added anywhere i386 would also have it. This is not a problem for x86_64, since flush_rite_buffers() an nop for it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index a4e8ccfae4c..1da9cf9be3a 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -32,6 +32,7 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, dma_addr_t bus = paddr; if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; + flush_write_buffers(); return bus; } @@ -64,6 +65,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } + flush_write_buffers(); return nents; } -- cgit v1.2.3 From 30db2cbf38d68f466fd34488f8312a151225c9ac Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:47 -0300 Subject: x86: use sg_phys in x86_64 To make the code usable in i386, where we have high memory mappings, we drop te virt_to_bus(sg_virt()) construction in favour of sg_phys. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 1da9cf9be3a..c6901e75177 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -60,7 +60,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); - s->dma_address = virt_to_bus(sg_virt(s)); + s->dma_address = sg_phys(s); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; -- cgit v1.2.3 From 5b3e5b7273435f8a7f83d3556a09adfd6f247e36 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:49 -0300 Subject: x86: use WARN_ON in mapping functions In the very same way i386 do, we use WARN_ON functions in map_simple and map_sg. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-nommu_64.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index c6901e75177..8d036aee2a8 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -30,6 +30,7 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t bus = paddr; + WARN_ON(size == 0); if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; flush_write_buffers(); @@ -58,6 +59,8 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, struct scatterlist *s; int i; + WARN_ON(nents == 0 || sg[0].length == 0); + for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); s->dma_address = sg_phys(s); -- cgit v1.2.3 From d741bde26dc3444eaeb269051d3f0b623b24de13 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:48 -0300 Subject: x86: use dma_length in i386 This is done to get the code closer to x86_64. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 7caf5c211f2..837bbe91043 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -24,6 +24,7 @@ static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, BUG_ON(!sg_page(sg)); sg->dma_address = sg_phys(sg); + sg->dma_length = sg->length; } flush_write_buffers(); -- cgit v1.2.3 From 85c246ee16fe00bf7bf9e7ff09a5d17d9a83cf71 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:50 -0300 Subject: x86: move definition to pci-dma.c Move dma_ops structure definition to pci-dma.c, where it belongs. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-base_32.c | 11 ++++++++--- arch/x86/kernel/pci-dma.c | 3 +++ arch/x86/mm/init_64.c | 3 --- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c index 837bbe91043..b44ea517fcf 100644 --- a/arch/x86/kernel/pci-base_32.c +++ b/arch/x86/kernel/pci-base_32.c @@ -37,7 +37,7 @@ static int pci32_map_error(dma_addr_t dma_addr) return 0; } -static const struct dma_mapping_ops pci32_dma_ops = { +const struct dma_mapping_ops pci32_dma_ops = { .map_single = pci32_map_single, .unmap_single = NULL, .map_sg = pci32_dma_map_sg, @@ -51,5 +51,10 @@ static const struct dma_mapping_ops pci32_dma_ops = { .mapping_error = pci32_map_error, }; -const struct dma_mapping_ops *dma_ops = &pci32_dma_ops; -EXPORT_SYMBOL(dma_ops); +/* this is temporary */ +int __init no_iommu_init(void) +{ + dma_ops = &pci32_dma_ops; + return 0; +} +fs_initcall(no_iommu_init); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f1c24d8e794..1323cd80387 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,5 +1,8 @@ #include +const struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1076097dcab..1ff7906a9a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -47,9 +47,6 @@ #include #include -const struct dma_mapping_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -- cgit v1.2.3 From f9c258de3494a5249a61fe110ece2082e5927468 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:52 -0300 Subject: x86: unify pci-nommu merge pci-base_32.c and pci-nommu_64.c into pci-nommu.c Their code were made the same, so now they can be merged. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 5 +-- arch/x86/kernel/pci-base_32.c | 60 ------------------------- arch/x86/kernel/pci-dma.c | 8 ++++ arch/x86/kernel/pci-dma_64.c | 8 ---- arch/x86/kernel/pci-nommu.c | 100 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu_64.c | 100 ----------------------------------------- 6 files changed, 110 insertions(+), 171 deletions(-) delete mode 100644 arch/x86/kernel/pci-base_32.c create mode 100644 arch/x86/kernel/pci-nommu.c delete mode 100644 arch/x86/kernel/pci-nommu_64.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1799f76a6a9..307aee5e8c5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,9 +24,8 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o -obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o -obj-$(CONFIG_X86_32) += pci-base_32.o +obj-y += alternative.o i8253.o pci-nommu.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c deleted file mode 100644 index b44ea517fcf..00000000000 --- a/arch/x86/kernel/pci-base_32.c +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include - -static dma_addr_t pci32_map_single(struct device *dev, phys_addr_t ptr, - size_t size, int direction) -{ - WARN_ON(size == 0); - flush_write_buffers(); - return ptr; -} - -static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, int direction) -{ - struct scatterlist *sg; - int i; - - WARN_ON(nents == 0 || sglist[0].length == 0); - - for_each_sg(sglist, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg->dma_address = sg_phys(sg); - sg->dma_length = sg->length; - } - - flush_write_buffers(); - return nents; -} - -/* Make sure we keep the same behaviour */ -static int pci32_map_error(dma_addr_t dma_addr) -{ - return 0; -} - -const struct dma_mapping_ops pci32_dma_ops = { - .map_single = pci32_map_single, - .unmap_single = NULL, - .map_sg = pci32_dma_map_sg, - .unmap_sg = NULL, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, - .mapping_error = pci32_map_error, -}; - -/* this is temporary */ -int __init no_iommu_init(void) -{ - dma_ops = &pci32_dma_ops; - return 0; -} -fs_initcall(no_iommu_init); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1323cd80387..37a558a9615 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,6 +3,14 @@ const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +#ifdef CONFIG_IOMMU_DEBUG +int panic_on_overflow __read_mostly = 1; +int force_iommu __read_mostly = 1; +#else +int panic_on_overflow __read_mostly = 0; +int force_iommu __read_mostly = 0; +#endif + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e697b865c1a..9ef18bfad2a 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -27,14 +27,6 @@ EXPORT_SYMBOL(iommu_bio_merge); static int iommu_sac_force __read_mostly = 0; int no_iommu __read_mostly; -#ifdef CONFIG_IOMMU_DEBUG -int panic_on_overflow __read_mostly = 1; -int force_iommu __read_mostly = 1; -#else -int panic_on_overflow __read_mostly = 0; -int force_iommu __read_mostly= 0; -#endif - /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c new file mode 100644 index 00000000000..aec43d56f49 --- /dev/null +++ b/arch/x86/kernel/pci-nommu.c @@ -0,0 +1,100 @@ +/* Fallback functions when the main IOMMU code is not compiled in. This + code is roughly equivalent to i386. */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) +{ + if (hwdev && bus + size > *hwdev->dma_mask) { + if (*hwdev->dma_mask >= DMA_32BIT_MASK) + printk(KERN_ERR + "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", + name, (long long)bus, size, + (long long)*hwdev->dma_mask); + return 0; + } + return 1; +} + +static dma_addr_t +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + dma_addr_t bus = paddr; + WARN_ON(size == 0); + if (!check_addr("map_single", hwdev, bus, size)) + return bad_dma_address; + flush_write_buffers(); + return bus; +} + + +/* Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scatter-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + s->dma_address = sg_phys(s); + if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) + return 0; + s->dma_length = s->length; + } + flush_write_buffers(); + return nents; +} + +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif +} + + +const struct dma_mapping_ops nommu_dma_ops = { + .map_single = nommu_map_single, + .map_sg = nommu_map_sg, + .mapping_error = nommu_mapping_error, + .is_phys = 1, +}; + +void __init no_iommu_init(void) +{ + if (dma_ops) + return; + + force_iommu = 0; /* no HW IOMMU */ + dma_ops = &nommu_dma_ops; +} diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c deleted file mode 100644 index 8d036aee2a8..00000000000 --- a/arch/x86/kernel/pci-nommu_64.c +++ /dev/null @@ -1,100 +0,0 @@ -/* Fallback functions when the main IOMMU code is not compiled in. This - code is roughly equivalent to i386. */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) -{ - if (hwdev && bus + size > *hwdev->dma_mask) { - if (*hwdev->dma_mask >= DMA_32BIT_MASK) - printk(KERN_ERR - "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static dma_addr_t -nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, - int direction) -{ - dma_addr_t bus = paddr; - WARN_ON(size == 0); - if (!check_addr("map_single", hwdev, bus, size)) - return bad_dma_address; - flush_write_buffers(); - return bus; -} - - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - s->dma_length = s->length; - } - flush_write_buffers(); - return nents; -} - -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { - .map_single = nommu_map_single, - .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, - .is_phys = 1, -}; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} -- cgit v1.2.3 From cb5867a5d8ca20e16ddc3397c36ee9c2e4cba219 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:51 -0300 Subject: x86: move initialization functions to pci-dma.c initcalls that triggers the various possibiities for dma subsys are moved to pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 25 +++++++++++++++++++++++++ arch/x86/kernel/pci-dma_64.c | 23 ----------------------- 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 37a558a9615..6b77fd872a7 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,4 +1,8 @@ #include +#include + +#include +#include const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -22,4 +26,25 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); +static int __init pci_iommu_init(void) +{ +#ifdef CONFIG_CALGARY_IOMMU + calgary_iommu_init(); +#endif + + intel_iommu_init(); + +#ifdef CONFIG_GART_IOMMU + gart_iommu_init(); +#endif + no_iommu_init(); + return 0; +} + +void pci_iommu_shutdown(void) +{ + gart_iommu_shutdown(); +} +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 9ef18bfad2a..42021300964 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -339,27 +339,6 @@ void __init pci_iommu_alloc(void) #endif } -static int __init pci_iommu_init(void) -{ -#ifdef CONFIG_CALGARY_IOMMU - calgary_iommu_init(); -#endif - - intel_iommu_init(); - -#ifdef CONFIG_GART_IOMMU - gart_iommu_init(); -#endif - - no_iommu_init(); - return 0; -} - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); -} - #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ @@ -372,5 +351,3 @@ static __devinit void via_no_dac(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); #endif -/* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); -- cgit v1.2.3 From 116890d556af38d539597655c564a73e6eef3d9e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:54 -0300 Subject: x86: move x86_64-specific to common code. This patch moves the bootmem functions, that are largely x86_64-specific into pci-dma.c. The code goes inside an ifdef. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_64.c | 68 ----------------------------------------- 2 files changed, 73 insertions(+), 68 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6b77fd872a7..91443361cb6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include #include @@ -26,6 +29,76 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); +#ifdef CONFIG_X86_64 +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + +void __init pci_iommu_alloc(void) +{ + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ +#ifdef CONFIG_GART_IOMMU + gart_iommu_hole_init(); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + detect_calgary(); +#endif + + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} +#endif + static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 42021300964..6b204cc4289 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -271,74 +271,6 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -static __initdata void *dma32_bootmem_ptr; -static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); - -static int __init parse_dma32_size_opt(char *p) -{ - if (!p) - return -EINVAL; - dma32_bootmem_size = memparse(p, &p); - return 0; -} -early_param("dma32_size", parse_dma32_size_opt); - -void __init dma32_reserve_bootmem(void) -{ - unsigned long size, align; - if (end_pfn <= MAX_DMA32_PFN) - return; - - align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); - dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - __pa(MAX_DMA_ADDRESS)); - if (dma32_bootmem_ptr) - dma32_bootmem_size = size; - else - dma32_bootmem_size = 0; -} -static void __init dma32_free_bootmem(void) -{ - int node; - - if (end_pfn <= MAX_DMA32_PFN) - return; - - if (!dma32_bootmem_ptr) - return; - - for_each_online_node(node) - free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), - dma32_bootmem_size); - - dma32_bootmem_ptr = NULL; - dma32_bootmem_size = 0; -} - -void __init pci_iommu_alloc(void) -{ - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ -#ifdef CONFIG_GART_IOMMU - gart_iommu_hole_init(); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - detect_calgary(); -#endif - - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} - #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -- cgit v1.2.3 From bca5c09663030bdd18ab1b3ccb6671f663c3345a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:53 -0300 Subject: x86: move pci fixup to pci-dma.c via_no_dac provides a fixup that is the same for both architectures. Move it to pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 18 ++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 13 ------------- arch/x86/kernel/pci-dma_64.c | 15 --------------- 3 files changed, 18 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 91443361cb6..48cccbe51aa 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,12 +1,16 @@ #include #include #include +#include #include #include #include #include +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); + const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -121,3 +125,17 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); + +#ifdef CONFIG_PCI +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +#endif diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 9e8297657c3..6543bb30b65 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -157,9 +157,6 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - int dma_supported(struct device *dev, u64 mask) { @@ -182,16 +179,6 @@ dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - static int check_iommu(char *s) { if (!strcmp(s, "usedac")) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 6b204cc4289..7820675a688 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -161,8 +161,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -static int forbid_dac __read_mostly; - int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI @@ -270,16 +268,3 @@ static __init int iommu_setup(char *p) return 0; } early_param("iommu", iommu_setup); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); -#endif -- cgit v1.2.3 From 8e0c379718ef32967deea55937895bfc9b493dd8 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:55 -0300 Subject: x86: merge dma_supported The code for both arches are very similar, so this patch merge them. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 24 ------------------------ arch/x86/kernel/pci-dma_64.c | 44 +------------------------------------------- 3 files changed, 45 insertions(+), 67 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 48cccbe51aa..7d3bd652c36 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -14,6 +14,8 @@ EXPORT_SYMBOL(forbid_dac); const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +int iommu_sac_force __read_mostly = 0; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -103,6 +105,48 @@ void __init pci_iommu_alloc(void) } #endif +int dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", + dev->bus_id); + return 0; + } +#endif + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + printk(KERN_INFO "%s: Force SAC with mask %Lx\n", + dev->bus_id, mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(dma_supported); + + static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 6543bb30b65..1d4091af441 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -155,30 +155,6 @@ void *dma_mark_declared_memory_occupied(struct device *dev, EXPORT_SYMBOL(dma_mark_declared_memory_occupied); #ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int -dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - /* Work around chipset bugs */ - if (forbid_dac > 0 && mask > 0xffffffffULL) - return 0; - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - return 1; -} -EXPORT_SYMBOL(dma_supported); - static int check_iommu(char *s) { if (!strcmp(s, "usedac")) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 7820675a688..c80da76e7e6 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -24,7 +24,7 @@ EXPORT_SYMBOL(bad_dma_address); int iommu_bio_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_bio_merge); -static int iommu_sac_force __read_mostly = 0; +extern int iommu_sac_force; int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ @@ -161,48 +161,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -int dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - - - - printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); - return 0; - } -#endif - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) - return 0; - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { - printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(dma_supported); - /* * See for the iommu kernel parameter * documentation. -- cgit v1.2.3 From fae9a0d8ca68a14da8d2351ad3e0bf42f3b29899 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:56 -0300 Subject: x86: merge iommu initialization parameters we merge the iommu initialization parameters in pci-dma.c Nice thing, that both architectures at least recognize the same parameters. usedac i386 parameter is marked for deprecation Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 12 ------- arch/x86/kernel/pci-dma_64.c | 79 ------------------------------------------ 3 files changed, 81 insertions(+), 91 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7d3bd652c36..48ab52d052b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -24,6 +24,18 @@ int panic_on_overflow __read_mostly = 0; int force_iommu __read_mostly = 0; #endif +int iommu_merge __read_mostly = 0; + +int no_iommu __read_mostly; +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly = 0; + +/* This tells the BIO block layer to assume merging. Default to off + because we cannot guarantee merging later. */ +int iommu_bio_merge __read_mostly = 0; +EXPORT_SYMBOL(iommu_bio_merge); + + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -105,6 +117,75 @@ void __init pci_iommu_alloc(void) } #endif +/* + * See for the iommu kernel parameter + * documentation. + */ +static __init int iommu_setup(char *p) +{ + iommu_merge = 1; + + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "off", 3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p, "force", 5)) + force_iommu = 1; + if (!strncmp(p, "noforce", 7)) { + iommu_merge = 0; + force_iommu = 0; + } + + if (!strncmp(p, "biomerge", 8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic", 5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic", 7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge", 5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge", 7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac", 8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; + if (!strncmp(p, "usedac", 6)) { + forbid_dac = -1; + return 1; + } +#ifdef CONFIG_SWIOTLB + if (!strncmp(p, "soft", 4)) + swiotlb = 1; +#endif + +#ifdef CONFIG_GART_IOMMU + gart_parse_options(p); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; +} +early_param("iommu", iommu_setup); + int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 1d4091af441..eea52df68a3 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -153,15 +153,3 @@ void *dma_mark_declared_memory_occupied(struct device *dev, return mem->virt_base + (pos << PAGE_SHIFT); } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index c80da76e7e6..e7d45cf8225 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -14,22 +14,9 @@ #include #include -int iommu_merge __read_mostly = 0; - dma_addr_t bad_dma_address __read_mostly; EXPORT_SYMBOL(bad_dma_address); -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_bio_merge); - -extern int iommu_sac_force; - -int no_iommu __read_mostly; -/* Set this to 1 if there is a HW IOMMU in the system */ -int iommu_detected __read_mostly = 0; - /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ @@ -160,69 +147,3 @@ void dma_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } EXPORT_SYMBOL(dma_free_coherent); - -/* - * See for the iommu kernel parameter - * documentation. - */ -static __init int iommu_setup(char *p) -{ - iommu_merge = 1; - - if (!p) - return -EINVAL; - - while (*p) { - if (!strncmp(p, "off", 3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p, "force", 5)) - force_iommu = 1; - if (!strncmp(p, "noforce", 7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge", 8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic", 5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic", 7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge", 5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge", 7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; - if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; - if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; - -#ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft", 4)) - swiotlb = 1; -#endif - -#ifdef CONFIG_GART_IOMMU - gart_parse_options(p); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; -} -early_param("iommu", iommu_setup); -- cgit v1.2.3 From 8e8edc6401205da3000cc3dfa76f3fd28a21d73c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:57 -0300 Subject: x86: move dma_coherent functions to pci-dma.c They are placed in an ifdef, since they are i386 specific the structure definition goes to dma-mapping.h. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 81 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 85 -------------------------------------------- 2 files changed, 81 insertions(+), 85 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 48ab52d052b..967dfcfa2ad 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -186,6 +186,87 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); +#ifdef CONFIG_X86_32 +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); +#endif /* CONFIG_X86_32 */ + int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index eea52df68a3..818d95efc3c 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -18,14 +18,6 @@ dma_addr_t bad_dma_address __read_mostly = 0x0; EXPORT_SYMBOL(bad_dma_address); -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -76,80 +68,3 @@ void dma_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -- cgit v1.2.3 From d09d815c1b1d437a3ea89ecd92c91179266d1243 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:58 -0300 Subject: x86: isolate coherent mapping functions i386 implements the declare coherent memory API, and x86_64 does not it is reflected in pieces of dma_alloc_coherent and dma_free_coherent. Those pieces are isolated in separate functions, that are declared as empty macros in x86_64. This way we can make the code the same. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 51 ++++++++++++++++++++++++++++++-------------- arch/x86/kernel/pci-dma_64.c | 11 +++++++++- 2 files changed, 45 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 818d95efc3c..78c7640252a 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -18,27 +18,50 @@ dma_addr_t bad_dma_address __read_mostly = 0x0; EXPORT_SYMBOL(bad_dma_address); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) { - void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); if (mem) { int page = bitmap_find_free_region(mem->bitmap, mem->size, order); if (page >= 0) { *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); } if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; } + return 0; +} + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *ret = NULL; + int order = get_order(size); + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) + return ret; if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; @@ -56,15 +79,11 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); + if (dma_release_coherent(dev, order, vaddr)) + return; + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index e7d45cf8225..6eacd58e451 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -39,6 +39,8 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) return page ? page_address(page) : NULL; } +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) /* * Allocate memory for a coherent mapping. */ @@ -50,6 +52,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned long dma_mask = 0; u64 bus; + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; + if (!dev) dev = &fallback_dev; dma_mask = dev->coherent_dma_mask; @@ -141,9 +147,12 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, get_order(size)); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From cac67877d268f21da74d879a355247e4e25b5b5f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:00 -0300 Subject: x86: move bad_dma_address It goes to pci-dma.c, and is removed from the arch-specific files. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma.c | 2 ++ arch/x86/kernel/pci-dma_32.c | 4 ---- arch/x86/kernel/pci-dma_64.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 967dfcfa2ad..00527e74e49 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,6 +35,8 @@ int iommu_detected __read_mostly = 0; int iommu_bio_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_bio_merge); +dma_addr_t bad_dma_address __read_mostly = 0; +EXPORT_SYMBOL(bad_dma_address); int dma_set_mask(struct device *dev, u64 mask) { diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 78c7640252a..9199bccb921 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,10 +14,6 @@ #include #include -/* For i386, we make it point to the NULL address */ -dma_addr_t bad_dma_address __read_mostly = 0x0; -EXPORT_SYMBOL(bad_dma_address); - static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret) { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 6eacd58e451..5f03e417421 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -14,8 +14,6 @@ #include #include -dma_addr_t bad_dma_address __read_mostly; -EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible -- cgit v1.2.3 From 2e33e361188617628e47b4bc47e87e84feaf556f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:20:59 -0300 Subject: x86: adjust dma_free_coherent for i386 We call unmap_single, if available. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 9199bccb921..5ae3470113c 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -80,6 +80,8 @@ void dma_free_coherent(struct device *dev, size_t size, WARN_ON(irqs_disabled()); /* for portability */ if (dma_release_coherent(dev, order, vaddr)) return; + if (dma_ops->unmap_single) + dma_ops->unmap_single(dev, dma_handle, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 71848d687e2a477cb7c68a854d8fdeaa5dff0ffc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:01 -0300 Subject: x86: remove virt_to_bus in pci-dma_64.c virt_to_bus() is deprecated according to the docs, and moreover, won't return the right thing in i386 if we're dealing with high memory mappings. So we make our allocation function return a page, and then use page_address() (for virtual addr) and page_to_phys() (for physical addr) instead. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 5f03e417421..13a31a4a4c1 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -28,13 +28,11 @@ struct device fallback_dev = { noinline static void * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { - struct page *page; int node; node = dev_to_node(dev); - page = alloc_pages_node(node, gfp, order); - return page ? page_address(page) : NULL; + return alloc_pages_node(node, gfp, order); } #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) @@ -47,6 +45,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *memory; + struct page *page; unsigned long dma_mask = 0; u64 bus; @@ -79,13 +78,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp |= GFP_DMA32; again: - memory = dma_alloc_pages(dev, gfp, get_order(size)); - if (memory == NULL) + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) return NULL; { int high, mmu; - bus = virt_to_bus(memory); + bus = page_to_phys(page); + memory = page_address(page); high = (bus + size) >= dma_mask; mmu = high; if (force_iommu && !(gfp & GFP_DMA)) @@ -112,7 +112,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, memset(memory, 0, size); if (!mmu) { - *dma_handle = virt_to_bus(memory); + *dma_handle = bus; return memory; } } -- cgit v1.2.3 From d1a079029036881375110f78df47d352e7c28a77 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:02 -0300 Subject: x86: use numa allocation function in i386 We can do it here to, in the same way x86_64 does. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 5ae3470113c..0d630ae3d91 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -48,10 +48,23 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) return 0; } +/* Allocate DMA memory on node near device */ +noinline struct page * +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) +{ + int node; + + node = dev_to_node(dev); + + return alloc_pages_node(node, gfp, order); +} + void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret = NULL; + struct page *page; + dma_addr_t bus; int order = get_order(size); /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -62,12 +75,16 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, order); + page = dma_alloc_pages(dev, gfp, order); + if (page == NULL) + return NULL; + + ret = page_address(page); + bus = page_to_phys(page); + + memset(ret, 0, size); + *dma_handle = bus; - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } return ret; } EXPORT_SYMBOL(dma_alloc_coherent); -- cgit v1.2.3 From 45a07e774950ef479f8996c0e2c5550dd6440453 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:04 -0300 Subject: x86: use a fallback dev for i386 We can use a fallback dev for cases of a NULL device being passed (mostly ISA) This comes from x86_64 implementation. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0d630ae3d91..0600a37ba83 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -14,6 +14,16 @@ #include #include +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + + static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret) { @@ -75,6 +85,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; + if (!dev) + dev = &fallback_dev; + page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; -- cgit v1.2.3 From 8779f2fc3b84ebb6c5181fb13d702e9944c16069 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 8 Apr 2008 13:21:05 -0300 Subject: x86: don't try to allocate from DMA zone at first If we fail, we'll loop into the allocation again, and then allocate in the DMA zone. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0600a37ba83..debe9119b72 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -82,9 +82,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) return ret; - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - if (!dev) dev = &fallback_dev; -- cgit v1.2.3 From 5fa78ca75d8e67063948a01b51594a0904af5710 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:05 -0300 Subject: x86: retry allocation if failed This patch puts in the code to retry allocation in case it fails. By its own, it does not make much sense but making the code look like x86_64. But later patches in this series will make we try to allocate from zones other than DMA first, which will possibly fail. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index debe9119b72..11f100a5f03 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -76,6 +76,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, struct page *page; dma_addr_t bus; int order = get_order(size); + unsigned long dma_mask = 0; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -85,15 +87,37 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (!dev) dev = &fallback_dev; + dma_mask = dev->coherent_dma_mask; + if (dma_mask == 0) + dma_mask = DMA_32BIT_MASK; + + again: page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; - ret = page_address(page); - bus = page_to_phys(page); - - memset(ret, 0, size); - *dma_handle = bus; + { + int high, mmu; + bus = page_to_phys(page); + ret = page_address(page); + high = (bus + size) >= dma_mask; + mmu = high; + if (force_iommu && !(gfp & GFP_DMA)) + mmu = 1; + else if (high) { + free_pages((unsigned long)ret, + get_order(size)); + + /* Don't use the 16MB ZONE_DMA unless absolutely + needed. It's better to use remapping first. */ + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + memset(ret, 0, size); + *dma_handle = bus; + } return ret; } -- cgit v1.2.3 From 8f19ca1341a6d89bd96e2e69e6e10f46d3258089 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:06 -0300 Subject: x86: unify gfp masks Use the same gfp masks for x86_64 and i386. It involves using HIGHMEM or DMA32 where necessary, for the sake of code compatibility, (no real effect), and using the NORETRY mask for i386. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 6 ++++-- arch/x86/kernel/pci-dma_64.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 11f100a5f03..5450bd142cb 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -79,7 +79,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, unsigned long dma_mask = 0; /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) return ret; @@ -91,7 +91,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_mask == 0) dma_mask = DMA_32BIT_MASK; - again: + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; +again: page = dma_alloc_pages(dev, gfp, order); if (page == NULL) return NULL; diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 13a31a4a4c1..b956f5945d6 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -49,6 +49,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned long dma_mask = 0; u64 bus; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) return memory; -- cgit v1.2.3 From aa99b16faadcc9a5b6bd9550fda117a8e9e46d26 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:07 -0300 Subject: x86: remove kludge from x86_64 The claim is that i386 does it. Just it does not. So remove it. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index b956f5945d6..596c8c88f36 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -68,10 +68,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - /* Kludge to make it bug-to-bug compatible with i386. i386 - uses the normal dma_mask for alloc_coherent. */ - dma_mask &= *dev->dma_mask; - /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If -- cgit v1.2.3 From da60cab4dd922cd933e82bace490f6155a32a90e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:08 -0300 Subject: x86: return conditional to mmu Just return our allocation if we don't have an mmu. For i386, where this patch is being applied, we never have. So our goal is just to have the code to look like x86_64's. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 5450bd142cb..f134de3833a 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -116,12 +116,42 @@ again: gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } + + /* Let low level make its own zone decisions */ + gfp &= ~(GFP_DMA32|GFP_DMA); + + if (dma_ops->alloc_coherent) + return dma_ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; + } memset(ret, 0, size); - *dma_handle = bus; + if (!mmu) { + *dma_handle = bus; + return ret; + } + } + + if (dma_ops->alloc_coherent) { + free_pages((unsigned long)ret, get_order(size)); + gfp &= ~(GFP_DMA|GFP_DMA32); + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + } + + if (dma_ops->map_simple) { + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(ret), + size, + PCI_DMA_BIDIRECTIONAL); + if (*dma_handle != bad_dma_address) + return ret; } - return ret; + if (panic_on_overflow) + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); + free_pages((unsigned long)ret, get_order(size)); + return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); -- cgit v1.2.3 From bb8ada95a7c11adf3dad4e8d5c55ef1650560592 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:09 -0300 Subject: x86: don't do dma if mask is NULL. if the device hasn't provided a mask, abort allocation. Note that we're using a fallback device now, so it does not cover the case of a NULL device: just drivers passing NULL masks around. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index f134de3833a..d2f70744a93 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -91,6 +91,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_mask == 0) dma_mask = DMA_32BIT_MASK; + if (dev->dma_mask == NULL) + return NULL; + /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; again: -- cgit v1.2.3 From 098cb7f27ed69276e4db560a444b94b982e4bb8f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 9 Apr 2008 13:18:10 -0300 Subject: x86: integrate pci-dma.c The code in pci-dma_{32,64}.c are now sufficiently close to each other. We merge them in pci-dma.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pci-dma.c | 175 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-dma_32.c | 173 ------------------------------------------ arch/x86/kernel/pci-dma_64.c | 154 ------------------------------------- 4 files changed, 176 insertions(+), 328 deletions(-) delete mode 100644 arch/x86/kernel/pci-dma_32.c delete mode 100644 arch/x86/kernel/pci-dma_64.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 307aee5e8c5..90e092d0af0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o +obj-y += bootflag.o e820_$(BITS).o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 00527e74e49..388b113a7d8 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -38,6 +38,15 @@ EXPORT_SYMBOL(iommu_bio_merge); dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to older i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -267,6 +276,43 @@ void *dma_mark_declared_memory_occupied(struct device *dev, return mem->virt_base + (pos << PAGE_SHIFT); } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} +#else +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) #endif /* CONFIG_X86_32 */ int dma_supported(struct device *dev, u64 mask) @@ -310,6 +356,135 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); +/* Allocate DMA memory on node near device */ +noinline struct page * +dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) +{ + int node; + + node = dev_to_node(dev); + + return alloc_pages_node(node, gfp, order); +} + +/* + * Allocate memory for a coherent mapping. + */ +void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + void *memory = NULL; + struct page *page; + unsigned long dma_mask = 0; + dma_addr_t bus; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) + dev = &fallback_dev; + dma_mask = dev->coherent_dma_mask; + if (dma_mask == 0) + dma_mask = DMA_32BIT_MASK; + + /* Device not DMA able */ + if (dev->dma_mask == NULL) + return NULL; + + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + +#ifdef CONFIG_X86_64 + /* Why <=? Even when the mask is smaller than 4GB it is often + larger than 16MB and in this case we have a chance of + finding fitting memory in the next higher zone first. If + not retry with true GFP_DMA. -AK */ + if (dma_mask <= DMA_32BIT_MASK) + gfp |= GFP_DMA32; +#endif + + again: + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) + return NULL; + + { + int high, mmu; + bus = page_to_phys(page); + memory = page_address(page); + high = (bus + size) >= dma_mask; + mmu = high; + if (force_iommu && !(gfp & GFP_DMA)) + mmu = 1; + else if (high) { + free_pages((unsigned long)memory, + get_order(size)); + + /* Don't use the 16MB ZONE_DMA unless absolutely + needed. It's better to use remapping first. */ + if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + /* Let low level make its own zone decisions */ + gfp &= ~(GFP_DMA32|GFP_DMA); + + if (dma_ops->alloc_coherent) + return dma_ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; + } + + memset(memory, 0, size); + if (!mmu) { + *dma_handle = bus; + return memory; + } + } + + if (dma_ops->alloc_coherent) { + free_pages((unsigned long)memory, get_order(size)); + gfp &= ~(GFP_DMA|GFP_DMA32); + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + } + + if (dma_ops->map_simple) { + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + size, + PCI_DMA_BIDIRECTIONAL); + if (*dma_handle != bad_dma_address) + return memory; + } + + if (panic_on_overflow) + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); + free_pages((unsigned long)memory, get_order(size)); + return NULL; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Unmap coherent memory. + * The caller must ensure that the device has finished accessing the mapping. + */ +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + int order = get_order(size); + WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; + if (dma_ops->unmap_single) + dma_ops->unmap_single(dev, bus, size, 0); + free_pages((unsigned long)vaddr, order); +} +EXPORT_SYMBOL(dma_free_coherent); static int __init pci_iommu_init(void) { diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c deleted file mode 100644 index d2f70744a93..00000000000 --- a/arch/x86/kernel/pci-dma_32.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include -#include - -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .bus_id = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} - -/* Allocate DMA memory on node near device */ -noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret = NULL; - struct page *page; - dma_addr_t bus; - int order = get_order(size); - unsigned long dma_mask = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &ret)) - return ret; - - if (!dev) - dev = &fallback_dev; - - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = DMA_32BIT_MASK; - - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; -again: - page = dma_alloc_pages(dev, gfp, order); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - ret = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)ret, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - - } - memset(ret, 0, size); - if (!mmu) { - *dma_handle = bus; - return ret; - } - } - - if (dma_ops->alloc_coherent) { - free_pages((unsigned long)ret, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(ret), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return ret; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)ret, get_order(size)); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - int order = get_order(size); - - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) - return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, dma_handle, size, 0); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c deleted file mode 100644 index 596c8c88f36..00000000000 --- a/arch/x86/kernel/pci-dma_64.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Dynamic DMA mapping support. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .bus_id = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - -/* Allocate DMA memory on node near device */ -noinline static void * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -/* - * Allocate memory for a coherent mapping. - */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - void *memory; - struct page *page; - unsigned long dma_mask = 0; - u64 bus; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) - dev = &fallback_dev; - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; - - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK) - gfp |= GFP_DMA32; - - again: - page = dma_alloc_pages(dev, gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (dma_ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); - free_pages((unsigned long)memory, get_order(size)); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) - return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 34d0559178393547505ec9492321255405f4e441 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 16 Apr 2008 11:45:15 -0500 Subject: x86: UV startup of slave cpus This patch changes smpboot.c so that it can start slave cpus running in UV non-unique apicid mode. The SIPI must be sent using a UV-specific mechanism. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 17 +++++++++++------ arch/x86/kernel/smpboot.c | 29 ++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 5d77c9cd8e1..ebf13908a74 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); + APIC_DM_INIT; + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + mdelay(10); + + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + APIC_DM_STARTUP; uv_write_global_mmr64(nasid, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid; + unsigned long val, apicid, lapicid; int nasid; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ nasid = uv_apicid_to_nasid(apicid); val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << + (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(nasid, UVH_IPI_INT, val); - printk(KERN_DEBUG - "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n", - cpu, apicid, vector, nasid, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e6abe8a49b1..6a925394bc7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { + send_status = uv_wakeup_secondary(phys_apicid, start_eip); + atomic_set(&init_deasserted, 1); + return send_status; + } + /* * Be paranoid about clearing APIC errors. */ @@ -918,16 +925,19 @@ do_rest: atomic_set(&init_deasserted, 0); - Dprintk("Setting warm reset code and vector.\n"); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - store_NMI_vector(&nmi_high, &nmi_low); + Dprintk("Setting warm reset code and vector.\n"); - smpboot_setup_warm_reset_vector(start_ip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_ip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } /* * Starting actual IPI sequence... @@ -966,7 +976,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - inquire_remote_apic(apicid); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) + inquire_remote_apic(apicid); } } -- cgit v1.2.3