From 88dff4936c0a5fa53080cca68dc963a8a2a674b0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 22 May 2009 11:35:50 +0800 Subject: x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot, see: http://bugzilla.kernel.org/show_bug.cgi?id=12901 [ Impact: fix hung reboot on certain systems ] Signed-off-by: Zhang Rui Cc: Len Brown LKML-Reference: <1242963350.32574.53.camel@rzhang-dt> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1340dad417f..667188e0b5a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -232,6 +232,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), }, }, + { /* Handle problems with rebooting on Sony VGN-Z540N */ + .callback = set_bios_reboot, + .ident = "Sony VGN-Z540N", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), + }, + }, { } }; -- cgit v1.2.3 From bca23dba760d6705c013f89113c46570378fb626 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 May 2009 11:46:16 -0700 Subject: x86, setup: revert ACPI 3 E820 extended attributes support Remove ACPI 3 E820 extended memory attributes support. At least one vendor actively set all the flags to zero, but left ECX on return at 24. This bug may be present in other BIOSes. The breakage functionally means the ACPI 3 flags are probably completely useless, and that no OS any time soon is going to rely on their existence. Therefore, drop support completely. We may want to revisit this question in the future, if we find ourselves actually needing the flags. This reverts all or part of the following checkins: cd670599b7b00d9263f6f11a05c0edeb9cbedaf3 c549e71d073a6e9a4847497344db28a784061455 However, retain the part from the latter commit that copies e820 into a temporary buffer; that is an unrelated BIOS workaround. Put in a comment to explain that part. See https://bugzilla.redhat.com/show_bug.cgi?id=499396 for some additional information. [ Impact: detect all memory on affected machines ] Reported-by: Thomas J. Baker Signed-off-by: H. Peter Anvin Acked-by: Len Brown Cc: Chuck Ebbert Cc: Kyle McMartin Cc: Matt Domsch --- arch/x86/boot/memory.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 5054c2ddd1a..74b3d2ba84e 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -17,11 +17,6 @@ #define SMAP 0x534d4150 /* ASCII "SMAP" */ -struct e820_ext_entry { - struct e820entry std; - u32 ext_flags; -} __attribute__((packed)); - static int detect_memory_e820(void) { int count = 0; @@ -29,13 +24,21 @@ static int detect_memory_e820(void) u32 size, id, edi; u8 err; struct e820entry *desc = boot_params.e820_map; - static struct e820_ext_entry buf; /* static so it is zeroed */ + static struct e820entry buf; /* static so it is zeroed */ /* - * Set this here so that if the BIOS doesn't change this field - * but still doesn't change %ecx, we're still okay... + * Note: at least one BIOS is known which assumes that the + * buffer pointed to by one e820 call is the same one as + * the previous call, and only changes modified fields. Therefore, + * we use a temporary buffer and copy the results entry by entry. + * + * This routine deliberately does not try to account for + * ACPI 3+ extended attributes. This is because there are + * BIOSes in the field which report zero for the valid bit for + * all ranges, and we don't currently make any use of the + * other attribute bits. Revisit this if we see the extended + * attribute bits deployed in a meaningful way in the future. */ - buf.ext_flags = 1; do { size = sizeof buf; @@ -66,13 +69,7 @@ static int detect_memory_e820(void) break; } - /* ACPI 3.0 added the extended flags support. If bit 0 - in the extended flags is zero, we're supposed to simply - ignore the entry -- a backwards incompatible change! */ - if (size > 20 && !(buf.ext_flags & 1)) - continue; - - *desc++ = buf.std; + *desc++ = buf; count++; } while (next && count < ARRAY_SIZE(boot_params.e820_map)); -- cgit v1.2.3 From 0c752a93353d9b17dbe148312d732fbe06d235e1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 22 May 2009 12:17:45 -0700 Subject: x86: introduce noxsave boot parameter Introduce "noxsave" boot parameter which will disable the cpu's xsave/xrstor capabilities. Useful for debugging and working around xsave related issues. [ Impact: make it possible to debug problems in the field ] Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1caefc82e6..77848d9fca6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int __init x86_xsave_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + return 1; +} +__setup("noxsave", x86_xsave_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -- cgit v1.2.3 From 0b827537e339c084ac9384df588969d400be9e0d Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 22 May 2009 13:23:37 -0700 Subject: x86: bugfix wbinvd() model check instead of family check wbinvd is supported on all CPUs 486 or later. But, pageattr.c is checking x86_model >= 4 before wbinvd(), which looks like an oversight bug. It was first introduced at one place by changeset d7c8f21a8cad0228c7c5ce2bb6dbd95d1ee49d13 and got copied over to second place in the same file later. [ Impact: fix missing cache flush on early-model CPUs, potential data corruption ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 797f9f107cb..2cc019a3f71 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg) */ __flush_tlb_all(); - if (cache && boot_cpu_data.x86_model >= 4) + if (cache && boot_cpu_data.x86 >= 4) wbinvd(); } @@ -218,7 +218,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, /* 4M threshold */ if (numpages >= 1024) { - if (boot_cpu_data.x86_model >= 4) + if (boot_cpu_data.x86 >= 4) wbinvd(); return; } -- cgit v1.2.3 From 0af48f42df15b97080b450d24219dd95db7b929a Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 22 May 2009 13:23:38 -0700 Subject: x86: cpa_flush_array wbinvd should be done on all CPUs cpa_flush_array seems to prefer wbinvd() over clflush at 4M threshold. clflush needs to be done on only one CPU as per instruction definition. wbinvd() however, should be done on all CPUs. [ Impact: fix missing flush which could cause data corruption ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2cc019a3f71..0f9052bcec4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -204,6 +204,11 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } +static void wbinvd_local(void *unused) +{ + wbinvd(); +} + static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { @@ -219,7 +224,8 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, /* 4M threshold */ if (numpages >= 1024) { if (boot_cpu_data.x86 >= 4) - wbinvd(); + on_each_cpu(wbinvd_local, NULL, 1); + return; } /* -- cgit v1.2.3 From 71c9d8b68b299bef614afc7907393564a9f1476f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 May 2009 12:01:59 +0900 Subject: x86: Remove remap percpu allocator for the time being Remap percpu allocator has subtle bug when combined with page attribute changing. Remap percpu allocator aliases PMD pages for the first chunk and as pageattr doesn't know about the alias it ends up updating page attributes of the original mapping thus leaving the alises in inconsistent state which might lead to subtle data corruption. Please read the following threads for more information: http://thread.gmane.org/gmane.linux.kernel/835783 The following is the proposed fix which teaches pageattr about percpu aliases. http://thread.gmane.org/gmane.linux.kernel/837157 However, the above changes are deemed too pervasive for upstream inclusion for 2.6.30 release, so this patch essentially disables the remap allocator for the time being. Signed-off-by: Tejun Heo LKML-Reference: <4A1A0A27.4050301@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 3a97a4cf187..8f0e13be36b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. + * + * NOTE: disabled for now. */ - if (!cpu_has_pse || !pcpu_need_numa()) + if (true || !cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* -- cgit v1.2.3 From a8cd0244e9cebcf9b358d24c7e7410062f3665cb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 May 2009 22:15:25 +0300 Subject: KVM: Make paravirt tlb flush also reload the PAE PDPTRs The paravirt tlb flush may be used not only to flush TLBs, but also to reload the four page-directory-pointer-table entries, as it is used as a replacement for reloading CR3. Change the code to do the entire CR3 reloading dance instead of simply flushing the TLB. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b6caf1329b1..32cf11e5728 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - kvm_x86_ops->tlb_flush(vcpu); - set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); + kvm_set_cr3(vcpu, vcpu->arch.cr3); return 1; } -- cgit v1.2.3 From a2edf57f510cce6a389cc14e58c6ad0a4296d6f9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 May 2009 22:19:00 +0300 Subject: KVM: Fix PDPTR reloading on CR4 writes The processor is documented to reload the PDPTRs while in PAE mode if any of the CR4 bits PSE, PGE, or PAE change. Linux relies on this behaviour when zapping the low mappings of PAE kernels during boot. The code already handled changes to CR4.PAE; augment it to also notice changes to PSE and PGE. This triggered while booting an F11 PAE kernel; the futex initialization code runs before any CR3 reloads and writes to a NULL pointer; the futex subsystem ended up uninitialized, killing PI futexes and pulseaudio which uses them. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49079a46687..3944e917e79 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + unsigned long old_cr4 = vcpu->arch.cr4; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); @@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_inject_gp(vcpu, 0); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) + } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) + && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); -- cgit v1.2.3 From 8e35961b57da14cb64cb0e4e1b7e3aabda6396fe Mon Sep 17 00:00:00 2001 From: Hideo Saito Date: Sun, 24 May 2009 15:33:34 +0000 Subject: powerpc/mm: Fix broken MMU PID stealing on !SMP The recent rework of the MMU PID handling for non-hash CPUs has a subtle bug in the !SMP "optimized" variant of the PID stealing function. It clears the PID in the mm context before it calls local_flush_tlb_mm(). However, the later will not flush anything if the PID in the context is clear... Signed-off-by: Hideo Saito Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mmu_context_nohash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index a70e311bd45..030d0005b4d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id) pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ __clear_bit(id, stale_map[cpu]); -- cgit v1.2.3 From 46176b4f6bac19454b7b5c35f68594b85850a600 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 26 May 2009 14:42:40 +0900 Subject: x86, relocs: ignore R_386_NONE in kernel relocation entries For relocatable 32bit kernels, boot/compressed/relocs.c processes relocation entries in the kernel image and appends it to the kernel image such that boot/compressed/head_32.S can relocate the kernel. The kernel image is one statically linked object and only uses two relocation types - R_386_PC32 and R_386_32, of the two only the latter needs massaging during kernel relocation and thus handled by relocs. R_386_PC32 is ignored and all other relocation types are considered error. When the target of a relocation resides in a discarded section, binutils doesn't throw away the relocation record but nullifies it by changing it to R_386_NONE, which unfortunately makes relocs fail. The problem was triggered by yet out-of-tree x86 stack unwind patches but given the binutils behavior, ignoring R_386_NONE is the right thing to do. The problem has been tracked down to binutils behavior by Jan Beulich. [ Impact: fix build with certain binutils by ignoring R_386_NONE ] Signed-off-by: Tejun Heo Cc: Jan Beulich Cc: Ingo Molnar LKML-Reference: <4A1B8150.40702@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/relocs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index 857e492c571..bbeb0c3fbd9 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) if (sym->st_shndx == SHN_ABS) { continue; } - if (r_type == R_386_PC32) { - /* PC relative relocations don't need to be adjusted */ + if (r_type == R_386_NONE || r_type == R_386_PC32) { + /* + * NONE can be ignored and and PC relative + * relocations don't need to be adjusted. + */ } else if (r_type == R_386_32) { /* Visit relocations that need to be adjusted */ -- cgit v1.2.3 From 4319503779060120fa5de9b8fde056603bb6e0fd Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 6 Mar 2009 20:24:57 +0000 Subject: [CPUFREQ] add atom family to p4-clockmod Some atom procs don't do freq scaling (such as the atom 330 on my own littlefalls2 board). By adding the atom family here, we at least get the benefit of passive cooling in a thermal emergency. Not sure how to see that its actually helping any, but the driver does bind and claim its functioning on my atom 330. Signed-off-by: Jarod Wilson Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 6ac55bd341a..86961519372 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ + case 0x1C: /* Atom */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ -- cgit v1.2.3 From d38e73e8dad454a5916f446b0d3523c1161ae95a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 23 Apr 2009 13:36:12 -0400 Subject: [CPUFREQ] powernow-k7 build fix when ACPI=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/cpu/cpufreq/powernow-k7.c:172: warning: 'invalidate_entry' defined but not used Reported-by: Toralf Förster Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 3c28ccd4974..a8363e5be4e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -168,10 +168,12 @@ static int check_powernow(void) return 1; } +#ifdef CONFIG_X86_POWERNOW_K7_ACPI static void invalidate_entry(unsigned int entry) { powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } +#endif static int get_ranges(unsigned char *pst) { -- cgit v1.2.3 From df1829770db415dc5a5ed5ada3bd70176c6f0a01 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Apr 2009 13:48:32 +0200 Subject: [CPUFREQ] powernow-k8 cleanup msg if BIOS does not export ACPI _PSS cpufreq data - Make the message shorter and easier to grep for - Use printk_once instead of WARN_ONCE (functionality of these was mixed) Signed-off-by: Thomas Renninger Cc: Langsdorf, Mark Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4709ead2db5..feef10c085a 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1215,13 +1215,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; int rc; - static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1244,19 +1247,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - /* - * Replace this one with print_once as soon as such a - * thing gets introduced - */ - if (!print_once) { - WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " - "does not provide ACPI _PSS objects " - "in a way that Linux understands. " - "Please report this to the Linux ACPI" - " maintainers and complain to your " - "BIOS vendor.\n"); - print_once++; - } + printk_once(ACPI_PSS_BIOS_BUG_MSG); goto err_out; } if (pol->cpu != 0) { -- cgit v1.2.3 From ca446d06351992e4f1a7c1e5e99870ab4ec5188f Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 22 Apr 2009 13:48:33 +0200 Subject: [CPUFREQ] powernow-k8: determine exact CPU frequency for HW Pstates Slightly modified by trenn@suse.de -> only do this on fam 10h and fam 11h. Currently powernow-k8 determines CPU frequency from ACPI PSS objects, but according to AMD family 11h BKDG this frequency is just a rounded value: "CoreFreq (MHz) = The CPU COF specified by MSRC001_00[6B:64][CpuFid] rounded to the nearest 100 Mhz." As a consequnce powernow-k8 reports wrong CPU frequency on some systems, e.g. on Turion X2 Ultra: powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 cpu cores) (version 2.20.00) powernow-k8: 0 : pstate 0 (2200 MHz) powernow-k8: 1 : pstate 1 (1100 MHz) powernow-k8: 2 : pstate 2 (600 MHz) But this is wrong as frequency for Pstate2 is 550 MHz. x86info reports it correctly: #x86info -a |grep Pstate ... Pstate-0: fid=e, did=0, vid=24 (2200MHz) Pstate-1: fid=e, did=1, vid=30 (1100MHz) Pstate-2: fid=e, did=2, vid=3c (550MHz) (current) Solution is to determine the frequency directly from Pstate MSRs instead of using rounded values from ACPI table. Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index feef10c085a..f6b32d11235 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data) data->batps); } +static u32 freq_from_fid_did(u32 fid, u32 did) +{ + u32 mhz = 0; + + if (boot_cpu_data.x86 == 0x10) + mhz = (100 * (fid + 0x10)) >> did; + else if (boot_cpu_data.x86 == 0x11) + mhz = (100 * (fid + 8)) >> did; + else + BUG(); + + return mhz * 1000; +} + static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) { @@ -923,8 +937,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, powernow_table[i].index = index; - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; + /* Frequency may be rounded for these */ + if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { + powernow_table[i].frequency = + freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); + } else + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } -- cgit v1.2.3 From 2171787be2e71ff71159857bfeb21398b61eb615 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Tue, 26 May 2009 10:33:35 -0700 Subject: x86: avoid back to back on_each_cpu in cpa_flush_array Cleanup cpa_flush_array() to avoid back to back on_each_cpu() calls. [ Impact: optimizes fix 0af48f42df15b97080b450d24219dd95db7b929a ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0f9052bcec4..e17efed088c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -204,30 +204,19 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } -static void wbinvd_local(void *unused) -{ - wbinvd(); -} - static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; + unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_range, NULL, 1); + on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); - if (!cache) + if (!cache || do_wbinvd) return; - /* 4M threshold */ - if (numpages >= 1024) { - if (boot_cpu_data.x86 >= 4) - on_each_cpu(wbinvd_local, NULL, 1); - - return; - } /* * We only need to flush on one CPU, * clflush is a MESI-coherent instruction that -- cgit v1.2.3 From 84532a0fc3d5811dca8e3726fe4d372ea87bd7c6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:33:14 +1000 Subject: Revert "powerpc: Rework dma-noncoherent to use generic vmalloc layer" This reverts commit 33f00dcedb0e22cdb156a23632814fc580fcfcf8. While it was a good idea to try to use the mm/vmalloc.c allocator instead of our own (in fact, ours is itself a dup on an old variant of the vmalloc one), unfortunately, the approach is terminally busted since dma_alloc_coherent() can be called at interrupt time or in atomic contexts and there's little chances we'll make the code in mm/vmalloc.c cope with\ that :-( Until we can get the generic code to forbid that idiocy and fix all drivers abusing it, we pretty much have no choice but revert to our custom virtual space allocator. There's also a problem with SMP safety since freeing such mapping would require an IPI which cannot be done at interrupt time. However, right now, I don't think we support any platform that is both SMP and has non-coherent DMA (don't laugh, I know such things do exist !) so we can sort that out later. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 25 +++ arch/powerpc/lib/dma-noncoherent.c | 303 ++++++++++++++++++++++++++++++------- 2 files changed, 271 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a0d1146a057..3bb43adce44 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,6 +868,31 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && 8xx diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index 005a28d380a..b7dc4c19f58 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -29,10 +29,120 @@ #include #include #include -#include #include +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) +#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + /* * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. @@ -41,21 +151,21 @@ void * __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; + struct ppc_vm_region *c; unsigned long order; - int i; - unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT; - unsigned int array_size = nr_pages * sizeof(struct page *); - struct page **pages; - struct page *end; u64 mask = 0x00ffffff, limit; /* ISA default */ - struct vm_struct *area; - BUG_ON(!mem_init_done); + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - printk(KERN_WARNING "coherent allocation too big (requested " - "%#x mask %#Lx)\n", size, mask); + if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); return NULL; } @@ -68,8 +178,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) if (!page) goto no_page; - end = page + (1 << order); - /* * Invalidate any data that might be lurking in the * kernel direct-mapped region for device DMA. @@ -80,59 +188,48 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) flush_dcache_range(kaddr, kaddr + size); } - split_page(page, order); - /* - * Set the "dma handle" + * Allocate a virtual address in the consistent mapping region. */ - *handle = page_to_phys(page); - - area = get_vm_area_caller(size, VM_IOREMAP, - __builtin_return_address(1)); - if (!area) - goto out_free_pages; - - if (array_size > PAGE_SIZE) { - pages = vmalloc(array_size); - area->flags |= VM_VPAGES; - } else { - pages = kmalloc(array_size, GFP_KERNEL); - } - if (!pages) - goto out_free_area; + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); - area->pages = pages; - area->nr_pages = nr_pages; + split_page(page, order); - for (i = 0; i < nr_pages; i++) - pages[i] = page + i; + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); - if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages)) - goto out_unmap; + do { + BUG_ON(!pte_none(*pte)); - /* - * Free the otherwise unused pages. - */ - page += nr_pages; - while (page < end) { - __free_page(page); - page++; + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; } - return area->addr; -out_unmap: - vunmap(area->addr); - if (array_size > PAGE_SIZE) - vfree(pages); - else - kfree(pages); - goto out_free_pages; -out_free_area: - free_vm_area(area); -out_free_pages: if (page) __free_pages(page, order); -no_page: + no_page: return NULL; } EXPORT_SYMBOL(__dma_alloc_coherent); @@ -142,11 +239,103 @@ EXPORT_SYMBOL(__dma_alloc_coherent); */ void __dma_free_coherent(size_t size, void *vaddr) { - vfree(vaddr); + struct ppc_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); } EXPORT_SYMBOL(__dma_free_coherent); +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + return ret; +} + +core_initcall(dma_alloc_init); + /* * make an area consistent. */ -- cgit v1.2.3 From 7a1450fdf4c69961f3926352fd8bc4ea19676756 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 04:55:38 -0400 Subject: Blackfin: hook up preadv/pwritev syscalls Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/unistd.h | 4 +++- arch/blackfin/mach-common/entry.S | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 1e57b636e0b..cf5066d3efd 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -378,8 +378,10 @@ #define __NR_dup3 363 #define __NR_pipe2 364 #define __NR_inotify_init1 365 +#define __NR_preadv 366 +#define __NR_pwritev 367 -#define __NR_syscall 366 +#define __NR_syscall 368 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 21e65a339a2..a063a434f7e 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1581,6 +1581,8 @@ ENTRY(_sys_call_table) .long _sys_dup3 .long _sys_pipe2 .long _sys_inotify_init1 /* 365 */ + .long _sys_preadv + .long _sys_pwritev .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall -- cgit v1.2.3 From 6b50520b2fd9bf521f9c947b5f6999bad273a51d Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 19 May 2009 10:03:22 -0400 Subject: Blackfin: ignore generated vmlinux.lds Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/blackfin/kernel/.gitignore (limited to 'arch') diff --git a/arch/blackfin/kernel/.gitignore b/arch/blackfin/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/blackfin/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds -- cgit v1.2.3 From 2ec10ea91bf3688013b00638f29df4f8f6b5c18b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 20 May 2009 19:45:39 -0400 Subject: Blackfin: drop unneeded asm/.gitignore We don't create a include/asm/mach/ symlink anymore, so we don't need the .gitignore for it. Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/blackfin/include/asm/.gitignore (limited to 'arch') diff --git a/arch/blackfin/include/asm/.gitignore b/arch/blackfin/include/asm/.gitignore deleted file mode 100644 index 7858564a446..00000000000 --- a/arch/blackfin/include/asm/.gitignore +++ /dev/null @@ -1 +0,0 @@ -+mach -- cgit v1.2.3 From add8a5050a52f1bd1be6b97be86fdd1cfbea2d1d Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 05:03:52 -0400 Subject: Blackfin: fix strncmp.o build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix some more fallout of the string changes: CC arch/blackfin/lib/strncmp.o In file included from include/linux/bitmap.h:9, from include/linux/nodemask.h:90, from include/linux/mmzone.h:17, from include/linux/gfp.h:5, from include/linux/kmod.h:23, from include/linux/module.h:14, from arch/blackfin/lib/strncmp.c:14: include/linux/string.h: In function ‘strstarts’: include/linux/string.h:132: error: implicit declaration of function ‘strncmp’ make[1]: *** [arch/blackfin/lib/strncmp.o] Error 1 Signed-off-by: Mike Frysinger CC: Rusty Russell --- arch/blackfin/lib/strncmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/lib/strncmp.c b/arch/blackfin/lib/strncmp.c index 2aaae78a68e..46518b1d298 100644 --- a/arch/blackfin/lib/strncmp.c +++ b/arch/blackfin/lib/strncmp.c @@ -8,9 +8,8 @@ #define strncmp __inline_strncmp #include -#undef strncmp - #include +#undef strncmp int strncmp(const char *cs, const char *ct, size_t count) { -- cgit v1.2.3 From b16e7766d6436835f473ba823ad04fbdfe5e9cbd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:36:10 +1000 Subject: powerpc: Move dma-noncoherent.c from arch/powerpc/lib to arch/powerpc/mm (pre-requisite to make the next patches more palatable) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/Makefile | 1 - arch/powerpc/lib/dma-noncoherent.c | 426 ------------------------------------- arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/dma-noncoherent.c | 426 +++++++++++++++++++++++++++++++++++++ 4 files changed, 427 insertions(+), 427 deletions(-) delete mode 100644 arch/powerpc/lib/dma-noncoherent.c create mode 100644 arch/powerpc/mm/dma-noncoherent.c (limited to 'arch') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8db35278a4b..29b742b90f1 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o obj-$(CONFIG_XMON) += sstep.o obj-$(CONFIG_KPROBES) += sstep.o -obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c deleted file mode 100644 index b7dc4c19f58..00000000000 --- a/arch/powerpc/lib/dma-noncoherent.c +++ /dev/null @@ -1,426 +0,0 @@ -/* - * PowerPC version derived from arch/arm/mm/consistent.c - * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) - * - * Copyright (C) 2000 Russell King - * - * Consistent memory allocators. Used for DMA devices that want to - * share uncached memory with the processor core. The function return - * is the virtual address and 'dma_handle' is the physical address. - * Mostly stolen from the ARM port, with some changes for PowerPC. - * -- Dan - * - * Reorganized to get rid of the arch-specific consistent_* functions - * and provide non-coherent implementations for the DMA API. -Matt - * - * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() - * implementation. This is pulled straight from ARM and barely - * modified. -Matt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It - * can be further configured for specific applications under - * the "Advanced Setup" menu. -Matt - */ -#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct ppc_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; -}; - -static struct ppc_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct ppc_vm_region * -ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct ppc_vm_region *c, *new; - - new = kmalloc(sizeof(struct ppc_vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) -{ - struct ppc_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - -/* - * Allocate DMA-coherent memory space and return both the kernel remapped - * virtual and bus address for that space. - */ -void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) -{ - struct page *page; - struct ppc_vm_region *c; - unsigned long order; - u64 mask = 0x00ffffff, limit; /* ISA default */ - - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { - printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", - size, mask); - return NULL; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - flush_dcache_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = ppc_vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - unsigned long vaddr = c->vm_start; - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); - struct page *end = page + (1 << order); - - split_page(page, order); - - /* - * Set the "dma handle" - */ - *handle = page_to_phys(page); - - do { - BUG_ON(!pte_none(*pte)); - - SetPageReserved(page); - set_pte_at(&init_mm, vaddr, - pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); - page++; - pte++; - vaddr += PAGE_SIZE; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); - no_page: - return NULL; -} -EXPORT_SYMBOL(__dma_alloc_coherent); - -/* - * free a page as defined by the above mapping. - */ -void __dma_free_coherent(size_t size, void *vaddr) -{ - struct ppc_vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - - size = PAGE_ALIGN(size); - - spin_lock_irqsave(&consistent_lock, flags); - - c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - - no_area: - spin_unlock_irqrestore(&consistent_lock, flags); - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, vaddr); - dump_stack(); -} -EXPORT_SYMBOL(__dma_free_coherent); - -/* - * Initialise the consistent memory allocation. - */ -static int __init dma_alloc_init(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(dma_alloc_init); - -/* - * make an area consistent. - */ -void __dma_sync(void *vaddr, size_t size, int direction) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - switch (direction) { - case DMA_NONE: - BUG(); - case DMA_FROM_DEVICE: - /* - * invalidate only when cache-line aligned otherwise there is - * the potential for discarding uncommitted data from the cache - */ - if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) - flush_dcache_range(start, end); - else - invalidate_dcache_range(start, end); - break; - case DMA_TO_DEVICE: /* writeback only */ - clean_dcache_range(start, end); - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - flush_dcache_range(start, end); - break; - } -} -EXPORT_SYMBOL(__dma_sync); - -#ifdef CONFIG_HIGHMEM -/* - * __dma_sync_page() implementation for systems using highmem. - * In this case, each page of a buffer must be kmapped/kunmapped - * in order to have a virtual address for __dma_sync(). This must - * not sleep so kmap_atomic()/kunmap_atomic() are used. - * - * Note: yes, it is possible and correct to have a buffer extend - * beyond the first page. - */ -static inline void __dma_sync_page_highmem(struct page *page, - unsigned long offset, size_t size, int direction) -{ - size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); - size_t cur_size = seg_size; - unsigned long flags, start, seg_offset = offset; - int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; - int seg_nr = 0; - - local_irq_save(flags); - - do { - start = (unsigned long)kmap_atomic(page + seg_nr, - KM_PPC_SYNC_PAGE) + seg_offset; - - /* Sync this buffer segment */ - __dma_sync((void *)start, seg_size, direction); - kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); - seg_nr++; - - /* Calculate next buffer segment size */ - seg_size = min((size_t)PAGE_SIZE, size - cur_size); - - /* Add the segment size to our running total */ - cur_size += seg_size; - seg_offset = 0; - } while (seg_nr < nr_segs); - - local_irq_restore(flags); -} -#endif /* CONFIG_HIGHMEM */ - -/* - * __dma_sync_page makes memory consistent. identical to __dma_sync, but - * takes a struct page instead of a virtual address - */ -void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) -{ -#ifdef CONFIG_HIGHMEM - __dma_sync_page_highmem(page, offset, size, direction); -#else - unsigned long start = (unsigned long)page_address(page) + offset; - __dma_sync((void *)start, size, direction); -#endif -} -EXPORT_SYMBOL(__dma_sync_page); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 17290bcedc5..b746f4ca420 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c new file mode 100644 index 00000000000..b7dc4c19f58 --- /dev/null +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -0,0 +1,426 @@ +/* + * PowerPC version derived from arch/arm/mm/consistent.c + * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) + * + * Copyright (C) 2000 Russell King + * + * Consistent memory allocators. Used for DMA devices that want to + * share uncached memory with the processor core. The function return + * is the virtual address and 'dma_handle' is the physical address. + * Mostly stolen from the ARM port, with some changes for PowerPC. + * -- Dan + * + * Reorganized to get rid of the arch-specific consistent_* functions + * and provide non-coherent implementations for the DMA API. -Matt + * + * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() + * implementation. This is pulled straight from ARM and barely + * modified. -Matt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) +#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void * +__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + struct ppc_vm_region *c; + unsigned long order; + u64 mask = 0x00ffffff, limit; /* ISA default */ + + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + size = PAGE_ALIGN(size); + limit = (mask + 1) & ~mask; + if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); + return NULL; + } + + order = get_order(size); + + if (mask != 0xffffffff) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + goto no_page; + + /* + * Invalidate any data that might be lurking in the + * kernel direct-mapped region for device DMA. + */ + { + unsigned long kaddr = (unsigned long)page_address(page); + memset(page_address(page), 0, size); + flush_dcache_range(kaddr, kaddr + size); + } + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); + + split_page(page, order); + + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); + + do { + BUG_ON(!pte_none(*pte)); + + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; + } + + if (page) + __free_pages(page, order); + no_page: + return NULL; +} +EXPORT_SYMBOL(__dma_alloc_coherent); + +/* + * free a page as defined by the above mapping. + */ +void __dma_free_coherent(size_t size, void *vaddr) +{ + struct ppc_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); +} +EXPORT_SYMBOL(__dma_free_coherent); + +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + return ret; +} + +core_initcall(dma_alloc_init); + +/* + * make an area consistent. + */ +void __dma_sync(void *vaddr, size_t size, int direction) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + + switch (direction) { + case DMA_NONE: + BUG(); + case DMA_FROM_DEVICE: + /* + * invalidate only when cache-line aligned otherwise there is + * the potential for discarding uncommitted data from the cache + */ + if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) + flush_dcache_range(start, end); + else + invalidate_dcache_range(start, end); + break; + case DMA_TO_DEVICE: /* writeback only */ + clean_dcache_range(start, end); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + flush_dcache_range(start, end); + break; + } +} +EXPORT_SYMBOL(__dma_sync); + +#ifdef CONFIG_HIGHMEM +/* + * __dma_sync_page() implementation for systems using highmem. + * In this case, each page of a buffer must be kmapped/kunmapped + * in order to have a virtual address for __dma_sync(). This must + * not sleep so kmap_atomic()/kunmap_atomic() are used. + * + * Note: yes, it is possible and correct to have a buffer extend + * beyond the first page. + */ +static inline void __dma_sync_page_highmem(struct page *page, + unsigned long offset, size_t size, int direction) +{ + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); + size_t cur_size = seg_size; + unsigned long flags, start, seg_offset = offset; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; + int seg_nr = 0; + + local_irq_save(flags); + + do { + start = (unsigned long)kmap_atomic(page + seg_nr, + KM_PPC_SYNC_PAGE) + seg_offset; + + /* Sync this buffer segment */ + __dma_sync((void *)start, seg_size, direction); + kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); + seg_nr++; + + /* Calculate next buffer segment size */ + seg_size = min((size_t)PAGE_SIZE, size - cur_size); + + /* Add the segment size to our running total */ + cur_size += seg_size; + seg_offset = 0; + } while (seg_nr < nr_segs); + + local_irq_restore(flags); +} +#endif /* CONFIG_HIGHMEM */ + +/* + * __dma_sync_page makes memory consistent. identical to __dma_sync, but + * takes a struct page instead of a virtual address + */ +void __dma_sync_page(struct page *page, unsigned long offset, + size_t size, int direction) +{ +#ifdef CONFIG_HIGHMEM + __dma_sync_page_highmem(page, offset, size, direction); +#else + unsigned long start = (unsigned long)page_address(page) + offset; + __dma_sync((void *)start, size, direction); +#endif +} +EXPORT_SYMBOL(__dma_sync_page); -- cgit v1.2.3 From f637a49e507c88354ab32b5d914e06acfb7ee00d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:44:50 +1000 Subject: powerpc: Minor cleanups of kernel virt address space definitions Make FIXADDR_TOP a compile time constant and cleanup a couple of definitions relative to the layout of the kernel address space on ppc32. We also print out that layout at boot time for debugging purposes. This is a pre-requisite for properly fixing non-coherent DMA allocactions. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fixmap.h | 4 ++-- arch/powerpc/include/asm/pgtable-ppc32.h | 22 ++++++++++++++++++++-- arch/powerpc/mm/init_32.c | 8 ++------ arch/powerpc/mm/mem.c | 13 +++++++++++++ arch/powerpc/mm/pgtable_32.c | 2 -- 5 files changed, 37 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index d60fd18f428..f1f4e23a84e 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -14,8 +14,6 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -extern unsigned long FIXADDR_TOP; - #ifndef __ASSEMBLY__ #include #include @@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP; #include #endif +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index ba45c997830..28fe9d4bae3 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -10,7 +10,7 @@ extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); -extern unsigned long ioremap_bot, ioremap_base; +extern unsigned long ioremap_bot; #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -55,9 +55,27 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +/* + * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary + * value (for now) on others, from where we can start layout kernel + * virtual space that goes below PKMAP and FIXMAP + */ +#ifdef CONFIG_HIGHMEM +#define KVIRT_TOP PKMAP_BASE +#else +#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#endif + +/* + * ioremap_bot starts at that address. Early ioremaps move down from there, + * until mem_init() at which point this becomes the top of the vmalloc + * and ioremap space + */ +#define IOREMAP_TOP KVIRT_TOP + /* * Just any arbitrary offset to the start of the vmalloc VM area: the - * current 64MB value just means that there will be a 64MB "hole" after the + * current 16MB value just means that there will be a 64MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 666a5e8a5be..3de6a0d9382 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -168,12 +168,8 @@ void __init MMU_init(void) ppc_md.progress("MMU:mapin", 0x301); mapin_ram(); -#ifdef CONFIG_HIGHMEM - ioremap_base = PKMAP_BASE; -#else - ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ -#endif /* CONFIG_HIGHMEM */ - ioremap_bot = ioremap_base; + /* Initialize early top-down ioremap allocator */ + ioremap_bot = IOREMAP_TOP; /* Map in I/O resources */ if (ppc_md.progress) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0602a76bf7..d3a4e67561f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -380,6 +380,19 @@ void __init mem_init(void) bsssize >> 10, initsize >> 10); +#ifdef CONFIG_PPC32 + pr_info("Kernel virtual memory layout:\n"); + pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); +#ifdef CONFIG_HIGHMEM + pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); +#endif /* CONFIG_HIGHMEM */ + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); + pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", + VMALLOC_START, VMALLOC_END); +#endif /* CONFIG_PPC32 */ + mem_init_done = 1; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 430d0908fa5..5422169626b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = (-PAGE_SIZE); -EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { -- cgit v1.2.3 From 8b31e49d1d75729c1da9009664ba52abd1adc628 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:50:33 +1000 Subject: powerpc: Fix up dma_alloc_coherent() on platforms without cache coherency. The implementation we just revived has issues, such as using a Kconfig-defined virtual address area in kernel space that nothing actually carves out (and thus will overlap whatever is there), or having some dependencies on being self contained in a single PTE page which adds unnecessary constraints on the kernel virtual address space. This fixes it by using more classic PTE accessors and automatically locating the area for consistent memory, carving an appropriate hole in the kernel virtual address space, leaving only the size of that area as a Kconfig option. It also brings some dma-mask related fixes from the ARM implementation which was almost identical initially but grew its own fixes. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 13 ---- arch/powerpc/include/asm/dma-mapping.h | 6 +- arch/powerpc/include/asm/pgtable-ppc32.h | 4 ++ arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/mm/dma-noncoherent.c | 108 ++++++++++++------------------- arch/powerpc/mm/mem.c | 4 ++ 6 files changed, 54 insertions(+), 83 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3bb43adce44..cdc9a6ff4be 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,19 +868,6 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" -config CONSISTENT_START_BOOL - bool "Set custom consistent memory pool address" - depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL - default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) - default "0xff100000" if NOT_COHERENT_CACHE - config CONSISTENT_SIZE_BOOL bool "Set custom consistent memory pool size" depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index c69f2b5f0cc..cb448d68452 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,9 @@ * allocate the space "normally" and use the cache management functions * to ensure it is consistent. */ -extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); +struct device; +extern void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); extern void __dma_free_coherent(size_t size, void *vaddr); extern void __dma_sync(void *vaddr, size_t size, int direction); extern void __dma_sync_page(struct page *page, unsigned long offset, @@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset, * Cache coherent cores. */ -#define __dma_alloc_coherent(gfp, size, handle) NULL +#define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) #define __dma_sync(addr, size, rw) ((void)0) #define __dma_sync_page(pg, off, sz, rw) ((void)0) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 28fe9d4bae3..c9ff9d75990 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -71,7 +71,11 @@ extern int icache_44x_need_flush; * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ +#ifdef CONFIG_NOT_COHERENT_CACHE +#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#else #define IOREMAP_TOP KVIRT_TOP +#endif /* * Just any arbitrary offset to the start of the vmalloc VM area: the diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 53c7788cba7..6b02793dc75 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE - ret = __dma_alloc_coherent(size, dma_handle, flag); + ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; *dma_handle += get_dma_direct_offset(dev); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b7dc4c19f58..36692f5c9a7 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -32,20 +32,21 @@ #include +#include "mmu_decl.h" + /* * This address range defaults to a value that is safe for all * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It * can be further configured for specific applications under * the "Advanced Setup" menu. -Matt */ -#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_BASE (IOREMAP_TOP) +#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE) #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) /* * This is the page table (2MB) covering uncached, DMA consistent allocations */ -static pte_t *consistent_pte; static DEFINE_SPINLOCK(consistent_lock); /* @@ -148,22 +149,38 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * virtual and bus address for that space. */ void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) +__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; struct ppc_vm_region *c; unsigned long order; - u64 mask = 0x00ffffff, limit; /* ISA default */ + u64 mask = ISA_DMA_THRESHOLD, limit; - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + goto no_page; + } + + if ((~mask) & ISA_DMA_THRESHOLD) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (unsigned long long)ISA_DMA_THRESHOLD); + goto no_page; + } } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + if ((limit && size >= limit) || + size >= (CONSISTENT_END - CONSISTENT_BASE)) { printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", size, mask); return NULL; @@ -171,6 +188,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) order = get_order(size); + /* Might be useful if we ever have a real legacy DMA zone... */ if (mask != 0xffffffff) gfp |= GFP_DMA; @@ -195,7 +213,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { unsigned long vaddr = c->vm_start; - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); struct page *end = page + (1 << order); split_page(page, order); @@ -206,13 +223,10 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) *handle = page_to_phys(page); do { - BUG_ON(!pte_none(*pte)); - SetPageReserved(page); - set_pte_at(&init_mm, vaddr, - pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + map_page(vaddr, page_to_phys(page), + pgprot_noncached(PAGE_KERNEL)); page++; - pte++; vaddr += PAGE_SIZE; } while (size -= PAGE_SIZE); @@ -241,8 +255,7 @@ void __dma_free_coherent(size_t size, void *vaddr) { struct ppc_vm_region *c; unsigned long flags, addr; - pte_t *ptep; - + size = PAGE_ALIGN(size); spin_lock_irqsave(&consistent_lock, flags); @@ -258,29 +271,26 @@ void __dma_free_coherent(size_t size, void *vaddr) size = c->vm_end - c->vm_start; } - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); addr = c->vm_start; do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + pte_t *ptep; unsigned long pfn; - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - + ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr), + addr), + addr), + addr); + if (!pte_none(*ptep) && pte_present(*ptep)) { + pfn = pte_pfn(*ptep); + pte_clear(&init_mm, addr, ptep); if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); + ClearPageReserved(page); __free_page(page); - continue; } } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + addr += PAGE_SIZE; } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); @@ -300,42 +310,6 @@ void __dma_free_coherent(size_t size, void *vaddr) } EXPORT_SYMBOL(__dma_free_coherent); -/* - * Initialise the consistent memory allocation. - */ -static int __init dma_alloc_init(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(dma_alloc_init); - /* * make an area consistent. */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d3a4e67561f..579382c163a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -387,6 +387,10 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); #endif /* CONFIG_HIGHMEM */ +#ifdef CONFIG_NOT_COHERENT_CACHE + pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", + IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); +#endif /* CONFIG_NOT_COHERENT_CACHE */ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", -- cgit v1.2.3 From 67a433ce278b98f47272726a22537fab7fd99de9 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Thu, 28 May 2009 16:42:25 +0300 Subject: Gemini: Fix SRAM/ROM location after memory swap Signed-off-by: Paulius Zaleckas --- arch/arm/mach-gemini/include/mach/hardware.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h index de6752674c0..213a4fcfeb1 100644 --- a/arch/arm/mach-gemini/include/mach/hardware.h +++ b/arch/arm/mach-gemini/include/mach/hardware.h @@ -15,10 +15,9 @@ /* * Memory Map definitions */ -/* FIXME: Does it really swap SRAM like this? */ #ifdef CONFIG_GEMINI_MEM_SWAP # define GEMINI_DRAM_BASE 0x00000000 -# define GEMINI_SRAM_BASE 0x20000000 +# define GEMINI_SRAM_BASE 0x70000000 #else # define GEMINI_SRAM_BASE 0x00000000 # define GEMINI_DRAM_BASE 0x10000000 -- cgit v1.2.3