From 409001948d9f221c94a61c3ee96de112755fc04d Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 22 Oct 2008 05:53:45 +0000 Subject: powerpc: Update page-in counter for CMM A new field has been added to the VPA as a method for the client OS to communicate to firmware the number of page-ins it is performing when running collaborative memory overcommit. The hypervisor will use this information to better determine if a partition is experiencing memory pressure and needs more memory allocated to it. Signed-off-by: Brian King Signed-off-by: Paul Mackerras --- arch/powerpc/mm/fault.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 565b7a237c8..b18bc0f023c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -318,9 +319,16 @@ good_area: goto do_sigbus; BUG(); } - if (ret & VM_FAULT_MAJOR) + if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - else +#ifdef CONFIG_PPC_SMLPAR + if (firmware_has_feature(FW_FEATURE_CMO)) { + preempt_disable(); + get_lppaca()->page_ins++; + preempt_enable(); + } +#endif + } else current->min_flt++; up_read(&mm->mmap_sem); return 0; -- cgit v1.2.3 From 7d4320f3d5ace5758111f2beac931376737f80f5 Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Thu, 30 Oct 2008 12:03:57 +0000 Subject: powerpc: Hugetlb pgtable cache access cleanup Andrew Morton suggested that using a macro that makes an array reference look like a function call makes it harder to understand the code. This therefore removes the huge_pgtable_cache(psize) macro and replaces its uses with pgtable_cache[HUGE_PGTABLE_INDEX(psize)]. Signed-off-by: Jon Tollefson Cc: Nick Piggin Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Acked-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a117024ab8c..c2231358adb 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,8 +53,7 @@ unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ /* Subtract one from array size because we don't need a cache for 4K since * is not a huge page size */ -#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ - + psize-1]) +#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1) #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { @@ -113,7 +112,7 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize), + pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -121,7 +120,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, spin_lock(&mm->page_table_lock); if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache(psize), new); + kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); else hpdp->pd = (unsigned long)new | HUGEPD_OK; spin_unlock(&mm->page_table_lock); @@ -760,13 +759,14 @@ static int __init hugetlbpage_init(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { if (mmu_huge_psizes[psize]) { - huge_pgtable_cache(psize) = kmem_cache_create( - HUGEPTE_CACHE_NAME(psize), - HUGEPTE_TABLE_SIZE(psize), - HUGEPTE_TABLE_SIZE(psize), - 0, - NULL); - if (!huge_pgtable_cache(psize)) + pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = + kmem_cache_create( + HUGEPTE_CACHE_NAME(psize), + HUGEPTE_TABLE_SIZE(psize), + HUGEPTE_TABLE_SIZE(psize), + 0, + NULL); + if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)]) panic("hugetlbpage_init(): could not create %s"\ "\n", HUGEPTE_CACHE_NAME(psize)); } -- cgit v1.2.3 From a6326e98a28d8a57f693369c82559543c6950f09 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Fri, 14 Nov 2008 12:07:34 +0000 Subject: powerpc: Correct page-in counter for CMM with 64k pages Linux will report the number of page-ins so that the hypervisor can better determine partition memory pressure. The hardware page size and the OS page size can be different. In the case where the hardware page size is 4k and the OS is running with 64k pages the code in commit 409001948d9f221c94a61c3ee96de112755fc04d ("powerpc: Update page-in counter for CMM") would under-report the number of pages. This corrects the reporting to the hypervisor by incrementing the page_in count by 1 << PAGE_FACTOR each time. Reported-by: Andrew Theurer Signed-off-by: Robert Jennings Signed-off-by: Paul Mackerras --- arch/powerpc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b18bc0f023c..7df0409107a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -324,7 +324,7 @@ good_area: #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); - get_lppaca()->page_ins++; + get_lppaca()->page_ins += (1 << PAGE_FACTOR); preempt_enable(); } #endif -- cgit v1.2.3 From f4f3a1261ad70988ad45614ebc87e553143a332b Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 19 Nov 2008 05:53:04 +0000 Subject: powerpc: hash_page_sync should only be used on SMP & STD_MMU_32 Clean up the ifdefs so we only use hash_page_sync if we have CONFIG_SMP && CONFIG_PPC_STD_MMU_32. Signed-off-by: Kumar Gala Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/pgtable_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c31d6d26f0b..44fbc81c9b2 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -48,7 +48,7 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) extern void hash_page_sync(void); #endif @@ -127,7 +127,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) hash_page_sync(); #endif free_page((unsigned long)pte); @@ -135,7 +135,7 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte) void pte_free(struct mm_struct *mm, pgtable_t ptepage) { -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) hash_page_sync(); #endif pgtable_page_dtor(ptepage); -- cgit v1.2.3 From 0186f47e703fb7aa14b54459d642ef5374b3a685 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 19 Nov 2008 12:50:04 +0000 Subject: powerpc: Use RCU based pte freeing mechanism for all powerpc Refactor the RCU based pte free code that was used on ppc64 to be used on all powerpc. Additionally refactor pte_free() & pte_free_kernel() into common code between ppc32 & ppc64. Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/hash_low_32.S | 30 ----------- arch/powerpc/mm/pgtable.c | 117 ++++++++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 21 -------- arch/powerpc/mm/tlb_64.c | 86 ------------------------------- 5 files changed, 118 insertions(+), 138 deletions(-) create mode 100644 arch/powerpc/mm/pgtable.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index e7392b45a5e..86e657bcfa7 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif -obj-y := fault.o mem.o \ +obj-y := fault.o mem.o pgtable.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o \ mmu_context_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 7bffb70b9fe..c5536b8b37a 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -35,36 +35,6 @@ mmu_hash_lock: .space 4 #endif /* CONFIG_SMP */ -/* - * Sync CPUs with hash_page taking & releasing the hash - * table lock - */ -#ifdef CONFIG_SMP - .text -_GLOBAL(hash_page_sync) - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - mtmsr r0 - lis r8,mmu_hash_lock@h - ori r8,r8,mmu_hash_lock@l - lis r0,0x0fff - b 10f -11: lwz r6,0(r8) - cmpwi 0,r6,0 - bne 11b -10: lwarx r6,0,r8 - cmpwi 0,r6,0 - bne- 11b - stwcx. r0,0,r8 - bne- 10b - isync - eieio - li r0,0 - stw r0,0(r8) - mtmsr r10 - blr -#endif /* CONFIG_SMP */ - /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c new file mode 100644 index 00000000000..6d94116fdea --- /dev/null +++ b/arch/powerpc/mm/pgtable.c @@ -0,0 +1,117 @@ +/* + * This file contains common routines for dealing with free of page tables + * + * Derived from arch/powerpc/mm/tlb_64.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +static unsigned long pte_freelist_forced_free; + +struct pte_freelist_batch +{ + struct rcu_head rcu; + unsigned int index; + pgtable_free_t tables[0]; +}; + +#define PTE_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ + / sizeof(pgtable_free_t)) + +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +static void pgtable_free_now(pgtable_free_t pgf) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 1); + + pgtable_free(pgf); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pgtable_free(batch->tables[i]); + + free_page((unsigned long)batch); +} + +static void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) +{ + /* This is safe since tlb_gather_mmu has disabled preemption */ + cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { + pgtable_free(pgf); + return; + } + + if (*batchp == NULL) { + *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); + if (*batchp == NULL) { + pgtable_free_now(pgf); + return; + } + (*batchp)->index = 0; + } + (*batchp)->tables[(*batchp)->index++] = pgf; + if ((*batchp)->index == PTE_FREELIST_SIZE) { + pte_free_submit(*batchp); + *batchp = NULL; + } +} + +void pte_free_finish(void) +{ + /* This is safe since tlb_gather_mmu has disabled preemption */ + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (*batchp == NULL) + return; + pte_free_submit(*batchp); + *batchp = NULL; +} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 44fbc81c9b2..c7b755cba26 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -48,10 +48,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[]; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) -extern void hash_page_sync(void); -#endif - #ifdef HAVE_BATS extern phys_addr_t v_mapped_by_bats(unsigned long va); extern unsigned long p_mapped_by_bats(phys_addr_t pa); @@ -125,23 +121,6 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) return ptepage; } -void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ -#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) - hash_page_sync(); -#endif - free_page((unsigned long)pte); -} - -void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ -#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_32) - hash_page_sync(); -#endif - pgtable_page_dtor(ptepage); - __free_page(ptepage); -} - void __iomem * ioremap(phys_addr_t addr, unsigned long size) { diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index be7dd422c0f..c931bc7d107 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -37,81 +37,6 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); * arch/powerpc/include/asm/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -static unsigned long pte_freelist_forced_free; - -struct pte_freelist_batch -{ - struct rcu_head rcu; - unsigned int index; - pgtable_free_t tables[0]; -}; - -#define PTE_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ - / sizeof(pgtable_free_t)) - -static void pte_free_smp_sync(void *arg) -{ - /* Do nothing, just ensure we sync with all CPUs */ -} - -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -static void pgtable_free_now(pgtable_free_t pgf) -{ - pte_freelist_forced_free++; - - smp_call_function(pte_free_smp_sync, NULL, 1); - - pgtable_free(pgf); -} - -static void pte_free_rcu_callback(struct rcu_head *head) -{ - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; - - for (i = 0; i < batch->index; i++) - pgtable_free(batch->tables[i]); - - free_page((unsigned long)batch); -} - -static void pte_free_submit(struct pte_freelist_batch *batch) -{ - INIT_RCU_HEAD(&batch->rcu); - call_rcu(&batch->rcu, pte_free_rcu_callback); -} - -void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) -{ - /* This is safe since tlb_gather_mmu has disabled preemption */ - cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); - - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pgtable_free(pgf); - return; - } - - if (*batchp == NULL) { - *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); - if (*batchp == NULL) { - pgtable_free_now(pgf); - return; - } - (*batchp)->index = 0; - } - (*batchp)->tables[(*batchp)->index++] = pgf; - if ((*batchp)->index == PTE_FREELIST_SIZE) { - pte_free_submit(*batchp); - *batchp = NULL; - } -} /* * A linux PTE was changed and the corresponding hash table entry @@ -229,17 +154,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) batch->index = 0; } -void pte_free_finish(void) -{ - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); - - if (*batchp == NULL) - return; - pte_free_submit(*batchp); - *batchp = NULL; -} - /** * __flush_hash_table_range - Flush all HPTEs for a given address range * from the hash table (and the TLB). But keeps -- cgit v1.2.3 From e41e811a79a4e328005be2744c3076ebde455088 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 14 Dec 2008 19:44:39 +0000 Subject: powerpc/mm: Rename tlb_32.c and tlb_64.c to tlb_hash32.c and tlb_hash64.c This renames the files to clarify the fact that they are used by the hash based family of CPUs (the 603 being an exception in that family but is still handled by that code). This paves the way for the new tlb_nohash.c coming via a subsequent commit. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/tlb_32.c | 190 -------------------------------------- arch/powerpc/mm/tlb_64.c | 211 ------------------------------------------- arch/powerpc/mm/tlb_hash32.c | 190 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/tlb_hash64.c | 211 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 402 insertions(+), 402 deletions(-) delete mode 100644 arch/powerpc/mm/tlb_32.c delete mode 100644 arch/powerpc/mm/tlb_64.c create mode 100644 arch/powerpc/mm/tlb_hash32.c create mode 100644 arch/powerpc/mm/tlb_hash64.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 86e657bcfa7..148de35c9ee 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_PPC64) += hash_utils_64.o \ gup.o mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ - tlb_$(CONFIG_WORD_SIZE).o + tlb_hash$(CONFIG_WORD_SIZE).o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c deleted file mode 100644 index f9a47fee392..00000000000 --- a/arch/powerpc/mm/tlb_32.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * This file contains the routines for TLB flushing. - * On machines where the MMU uses a hash table to store virtual to - * physical translations, these routines flush entries from the - * hash table also. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "mmu_decl.h" - -/* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (Hash != 0) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } -} -EXPORT_SYMBOL(flush_hash_entry); - -/* - * Called by ptep_set_access_flags, must flush on CPUs for which the - * DSI handler can't just "fixup" the TLB on a write fault - */ -void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) -{ - if (Hash != 0) - return; - _tlbie(addr); -} - -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (Hash == 0) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - -/* - * TLB flushing: - * - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes kernel pages - * - * since the hardware hash table functions as an extension of the - * tlb as far as the linux tables are concerned, flush it too. - * -- Cort - */ - -/* - * 750 SMP is a Bad Idea because the 750 doesn't broadcast all - * the cache operations on the bus. Hence we need to use an IPI - * to get the other CPU(s) to invalidate their TLBs. - */ -#ifdef CONFIG_SMP_750 -#define FINISH_FLUSH smp_send_tlb_invalidate(0) -#else -#define FINISH_FLUSH do { } while (0) -#endif - -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - pmd_t *pmd; - unsigned long pmd_end; - int count; - unsigned int ctx = mm->context.id; - - if (Hash == 0) { - _tlbia(); - return; - } - start &= PAGE_MASK; - if (start >= end) - return; - end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); - for (;;) { - pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; - if (pmd_end > end) - pmd_end = end; - if (!pmd_none(*pmd)) { - count = ((pmd_end - start) >> PAGE_SHIFT) + 1; - flush_hash_pages(ctx, start, pmd_val(*pmd), count); - } - if (pmd_end == end) - break; - start = pmd_end + 1; - ++pmd; - } -} - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); - FINISH_FLUSH; -} - -/* - * Flush all the (user) entries for the address space described by mm. - */ -void flush_tlb_mm(struct mm_struct *mm) -{ - struct vm_area_struct *mp; - - if (Hash == 0) { - _tlbia(); - return; - } - - /* - * It is safe to go down the mm's list of vmas when called - * from dup_mmap, holding mmap_sem. It would also be safe from - * unmap_region or exit_mmap, but not from vmtruncate on SMP - - * but it seems dup_mmap is the only SMP case which gets here. - */ - for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); - FINISH_FLUSH; -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) -{ - struct mm_struct *mm; - pmd_t *pmd; - - if (Hash == 0) { - _tlbie(vmaddr); - return; - } - mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); - if (!pmd_none(*pmd)) - flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); - FINISH_FLUSH; -} - -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); - FINISH_FLUSH; -} diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c deleted file mode 100644 index c931bc7d107..00000000000 --- a/arch/powerpc/mm/tlb_64.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * This file contains the routines for flushing entries from the - * TLB and MMU hash table. - * - * Derived from arch/ppc64/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * Dave Engebretsen - * Rework for PPC64 port. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); - -/* This is declared as we are using the more or less generic - * arch/powerpc/include/asm/tlb.h file -- tgall - */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -/* - * A linux PTE was changed and the corresponding hash table entry - * neesd to be flushed. This function will either perform the flush - * immediately or will batch it up if the current CPU has an active - * batch on it. - * - * Must be called from within some kind of spinlock/non-preempt region... - */ -void hpte_need_flush(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, unsigned long pte, int huge) -{ - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - unsigned long vsid, vaddr; - unsigned int psize; - int ssize; - real_pte_t rpte; - int i; - - i = batch->index; - - /* We mask the address for the base page size. Huge pages will - * have applied their own masking already - */ - addr &= PAGE_MASK; - - /* Get page size (maybe move back to caller). - * - * NOTE: when using special 64K mappings in 4K environment like - * for SPEs, we obtain the page size from the slice, which thus - * must still exist (and thus the VMA not reused) at the time - * of this call - */ - if (huge) { -#ifdef CONFIG_HUGETLB_PAGE - psize = get_slice_psize(mm, addr);; -#else - BUG(); - psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ -#endif - } else - psize = pte_pagesize_index(mm, addr, pte); - - /* Build full vaddr */ - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vaddr = hpt_va(addr, vsid, ssize); - rpte = __real_pte(__pte(pte), ptep); - - /* - * Check if we have an active batch on this CPU. If not, just - * flush now and return. For now, we don global invalidates - * in that case, might be worth testing the mm cpu mask though - * and decide to use local invalidates instead... - */ - if (!batch->active) { - flush_hash_page(vaddr, rpte, psize, ssize, 0); - return; - } - - /* - * This can happen when we are in the middle of a TLB batch and - * we encounter memory pressure (eg copy_page_range when it tries - * to allocate a new pte). If we have to reclaim memory and end - * up scanning and resetting referenced bits then our batch context - * will change mid stream. - * - * We also need to ensure only one page size is present in a given - * batch - */ - if (i != 0 && (mm != batch->mm || batch->psize != psize || - batch->ssize != ssize)) { - __flush_tlb_pending(batch); - i = 0; - } - if (i == 0) { - batch->mm = mm; - batch->psize = psize; - batch->ssize = ssize; - } - batch->pte[i] = rpte; - batch->vaddr[i] = vaddr; - batch->index = ++i; - if (i >= PPC64_TLB_BATCH_NR) - __flush_tlb_pending(batch); -} - -/* - * This function is called when terminating an mmu batch or when a batch - * is full. It will perform the flush of all the entries currently stored - * in a batch. - * - * Must be called from within some kind of spinlock/non-preempt region... - */ -void __flush_tlb_pending(struct ppc64_tlb_batch *batch) -{ - cpumask_t tmp; - int i, local = 0; - - i = batch->index; - tmp = cpumask_of_cpu(smp_processor_id()); - if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) - local = 1; - if (i == 1) - flush_hash_page(batch->vaddr[0], batch->pte[0], - batch->psize, batch->ssize, local); - else - flush_hash_range(i, local); - batch->index = 0; -} - -/** - * __flush_hash_table_range - Flush all HPTEs for a given address range - * from the hash table (and the TLB). But keeps - * the linux PTEs intact. - * - * @mm : mm_struct of the target address space (generally init_mm) - * @start : starting address - * @end : ending address (not included in the flush) - * - * This function is mostly to be used by some IO hotplug code in order - * to remove all hash entries from a given address range used to map IO - * space on a removed PCI-PCI bidge without tearing down the full mapping - * since 64K pages may overlap with other bridges when using 64K pages - * with 4K HW pages on IO space. - * - * Because of that usage pattern, it's only available with CONFIG_HOTPLUG - * and is implemented for small size rather than speed. - */ -#ifdef CONFIG_HOTPLUG - -void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - unsigned long flags; - - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); - - BUG_ON(!mm->pgd); - - /* Note: Normally, we should only ever use a batch within a - * PTE locked section. This violates the rule, but will work - * since we don't actually modify the PTEs, we just flush the - * hash while leaving the PTEs intact (including their reference - * to being hashed). This is not the most performance oriented - * way to do things but is fine for our needs here. - */ - local_irq_save(flags); - arch_enter_lazy_mmu_mode(); - for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_linux_pte(mm->pgd, start); - unsigned long pte; - - if (ptep == NULL) - continue; - pte = pte_val(*ptep); - if (!(pte & _PAGE_HASHPTE)) - continue; - hpte_need_flush(mm, start, ptep, pte, 0); - } - arch_leave_lazy_mmu_mode(); - local_irq_restore(flags); -} - -#endif /* CONFIG_HOTPLUG */ diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c new file mode 100644 index 00000000000..f9a47fee392 --- /dev/null +++ b/arch/powerpc/mm/tlb_hash32.c @@ -0,0 +1,190 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU uses a hash table to store virtual to + * physical translations, these routines flush entries from the + * hash table also. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "mmu_decl.h" + +/* + * Called when unmapping pages to flush entries from the TLB/hash table. + */ +void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + unsigned long ptephys; + + if (Hash != 0) { + ptephys = __pa(ptep) & PAGE_MASK; + flush_hash_pages(mm->context.id, addr, ptephys, 1); + } +} +EXPORT_SYMBOL(flush_hash_entry); + +/* + * Called by ptep_set_access_flags, must flush on CPUs for which the + * DSI handler can't just "fixup" the TLB on a write fault + */ +void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) +{ + if (Hash != 0) + return; + _tlbie(addr); +} + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +void tlb_flush(struct mmu_gather *tlb) +{ + if (Hash == 0) { + /* + * 603 needs to flush the whole TLB here since + * it doesn't use a hash table. + */ + _tlbia(); + } +} + +/* + * TLB flushing: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * since the hardware hash table functions as an extension of the + * tlb as far as the linux tables are concerned, flush it too. + * -- Cort + */ + +/* + * 750 SMP is a Bad Idea because the 750 doesn't broadcast all + * the cache operations on the bus. Hence we need to use an IPI + * to get the other CPU(s) to invalidate their TLBs. + */ +#ifdef CONFIG_SMP_750 +#define FINISH_FLUSH smp_send_tlb_invalidate(0) +#else +#define FINISH_FLUSH do { } while (0) +#endif + +static void flush_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + pmd_t *pmd; + unsigned long pmd_end; + int count; + unsigned int ctx = mm->context.id; + + if (Hash == 0) { + _tlbia(); + return; + } + start &= PAGE_MASK; + if (start >= end) + return; + end = (end - 1) | ~PAGE_MASK; + pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); + for (;;) { + pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; + if (pmd_end > end) + pmd_end = end; + if (!pmd_none(*pmd)) { + count = ((pmd_end - start) >> PAGE_SHIFT) + 1; + flush_hash_pages(ctx, start, pmd_val(*pmd), count); + } + if (pmd_end == end) + break; + start = pmd_end + 1; + ++pmd; + } +} + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); + FINISH_FLUSH; +} + +/* + * Flush all the (user) entries for the address space described by mm. + */ +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *mp; + + if (Hash == 0) { + _tlbia(); + return; + } + + /* + * It is safe to go down the mm's list of vmas when called + * from dup_mmap, holding mmap_sem. It would also be safe from + * unmap_region or exit_mmap, but not from vmtruncate on SMP - + * but it seems dup_mmap is the only SMP case which gets here. + */ + for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + FINISH_FLUSH; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct mm_struct *mm; + pmd_t *pmd; + + if (Hash == 0) { + _tlbie(vmaddr); + return; + } + mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; + pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); + if (!pmd_none(*pmd)) + flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); + FINISH_FLUSH; +} + +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_range(vma->vm_mm, start, end); + FINISH_FLUSH; +} diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c new file mode 100644 index 00000000000..c931bc7d107 --- /dev/null +++ b/arch/powerpc/mm/tlb_hash64.c @@ -0,0 +1,211 @@ +/* + * This file contains the routines for flushing entries from the + * TLB and MMU hash table. + * + * Derived from arch/ppc64/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); + +/* This is declared as we are using the more or less generic + * arch/powerpc/include/asm/tlb.h file -- tgall + */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +/* + * A linux PTE was changed and the corresponding hash table entry + * neesd to be flushed. This function will either perform the flush + * immediately or will batch it up if the current CPU has an active + * batch on it. + * + * Must be called from within some kind of spinlock/non-preempt region... + */ +void hpte_need_flush(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge) +{ + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + unsigned long vsid, vaddr; + unsigned int psize; + int ssize; + real_pte_t rpte; + int i; + + i = batch->index; + + /* We mask the address for the base page size. Huge pages will + * have applied their own masking already + */ + addr &= PAGE_MASK; + + /* Get page size (maybe move back to caller). + * + * NOTE: when using special 64K mappings in 4K environment like + * for SPEs, we obtain the page size from the slice, which thus + * must still exist (and thus the VMA not reused) at the time + * of this call + */ + if (huge) { +#ifdef CONFIG_HUGETLB_PAGE + psize = get_slice_psize(mm, addr);; +#else + BUG(); + psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ +#endif + } else + psize = pte_pagesize_index(mm, addr, pte); + + /* Build full vaddr */ + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } + vaddr = hpt_va(addr, vsid, ssize); + rpte = __real_pte(__pte(pte), ptep); + + /* + * Check if we have an active batch on this CPU. If not, just + * flush now and return. For now, we don global invalidates + * in that case, might be worth testing the mm cpu mask though + * and decide to use local invalidates instead... + */ + if (!batch->active) { + flush_hash_page(vaddr, rpte, psize, ssize, 0); + return; + } + + /* + * This can happen when we are in the middle of a TLB batch and + * we encounter memory pressure (eg copy_page_range when it tries + * to allocate a new pte). If we have to reclaim memory and end + * up scanning and resetting referenced bits then our batch context + * will change mid stream. + * + * We also need to ensure only one page size is present in a given + * batch + */ + if (i != 0 && (mm != batch->mm || batch->psize != psize || + batch->ssize != ssize)) { + __flush_tlb_pending(batch); + i = 0; + } + if (i == 0) { + batch->mm = mm; + batch->psize = psize; + batch->ssize = ssize; + } + batch->pte[i] = rpte; + batch->vaddr[i] = vaddr; + batch->index = ++i; + if (i >= PPC64_TLB_BATCH_NR) + __flush_tlb_pending(batch); +} + +/* + * This function is called when terminating an mmu batch or when a batch + * is full. It will perform the flush of all the entries currently stored + * in a batch. + * + * Must be called from within some kind of spinlock/non-preempt region... + */ +void __flush_tlb_pending(struct ppc64_tlb_batch *batch) +{ + cpumask_t tmp; + int i, local = 0; + + i = batch->index; + tmp = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) + local = 1; + if (i == 1) + flush_hash_page(batch->vaddr[0], batch->pte[0], + batch->psize, batch->ssize, local); + else + flush_hash_range(i, local); + batch->index = 0; +} + +/** + * __flush_hash_table_range - Flush all HPTEs for a given address range + * from the hash table (and the TLB). But keeps + * the linux PTEs intact. + * + * @mm : mm_struct of the target address space (generally init_mm) + * @start : starting address + * @end : ending address (not included in the flush) + * + * This function is mostly to be used by some IO hotplug code in order + * to remove all hash entries from a given address range used to map IO + * space on a removed PCI-PCI bidge without tearing down the full mapping + * since 64K pages may overlap with other bridges when using 64K pages + * with 4K HW pages on IO space. + * + * Because of that usage pattern, it's only available with CONFIG_HOTPLUG + * and is implemented for small size rather than speed. + */ +#ifdef CONFIG_HOTPLUG + +void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + unsigned long flags; + + start = _ALIGN_DOWN(start, PAGE_SIZE); + end = _ALIGN_UP(end, PAGE_SIZE); + + BUG_ON(!mm->pgd); + + /* Note: Normally, we should only ever use a batch within a + * PTE locked section. This violates the rule, but will work + * since we don't actually modify the PTEs, we just flush the + * hash while leaving the PTEs intact (including their reference + * to being hashed). This is not the most performance oriented + * way to do things but is fine for our needs here. + */ + local_irq_save(flags); + arch_enter_lazy_mmu_mode(); + for (; start < end; start += PAGE_SIZE) { + pte_t *ptep = find_linux_pte(mm->pgd, start); + unsigned long pte; + + if (ptep == NULL) + continue; + pte = pte_val(*ptep); + if (!(pte & _PAGE_HASHPTE)) + continue; + hpte_need_flush(mm, start, ptep, pte, 0); + } + arch_leave_lazy_mmu_mode(); + local_irq_restore(flags); +} + +#endif /* CONFIG_HOTPLUG */ -- cgit v1.2.3 From f63837f0581fe580168ae1a7d178ded935411747 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 14 Dec 2008 19:44:51 +0000 Subject: powerpc/mm: Remove flush_HPTE() The function flush_HPTE() is used in only one place, the implementation of DEBUG_PAGEALLOC on ppc32. It's actually a dup of flush_tlb_page() though it's -slightly- more efficient on hash based processors. We remove it and replace it by a direct call to the hash flush code on those processors and to flush_tlb_page() for everybody else. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mmu_decl.h | 17 ----------------- arch/powerpc/mm/pgtable_32.c | 6 +++++- 2 files changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index fab3cfad409..b4344fd30f2 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -58,17 +58,14 @@ extern phys_addr_t lowmem_end_addr; * architectures. -- Dan */ #if defined(CONFIG_8xx) -#define flush_HPTE(X, va, pg) _tlbie(va, 0 /* 8xx doesn't care about PID */) #define MMU_init_hw() do { } while(0) #define mmu_mapin_ram() (0UL) #elif defined(CONFIG_4xx) -#define flush_HPTE(pid, va, pg) _tlbie(va, pid) extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(void); #elif defined(CONFIG_FSL_BOOKE) -#define flush_HPTE(pid, va, pg) _tlbie(va, pid) extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(void); extern void adjust_total_lowmem(void); @@ -77,18 +74,4 @@ extern void adjust_total_lowmem(void); /* anything 32-bit except 4xx or 8xx */ extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(void); - -/* Be careful....this needs to be updated if we ever encounter 603 SMPs, - * which includes all new 82xx processors. We need tlbie/tlbsync here - * in that case (I think). -- Dan. - */ -static inline void flush_HPTE(unsigned context, unsigned long va, - unsigned long pdval) -{ - if ((Hash != 0) && - cpu_has_feature(CPU_FTR_HPTE_TABLE)) - flush_hash_pages(0, va, pdval, 1); - else - _tlbie(va); -} #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c7b755cba26..34147244013 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -342,7 +342,11 @@ static int __change_page_attr(struct page *page, pgprot_t prot) return -EINVAL; set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); wmb(); - flush_HPTE(0, address, pmd_val(*kpmd)); +#ifdef CONFIG_PPC_STD_MMU + flush_hash_pages(0, address, pmd_val(*kpmd), 1); +#else + flush_tlb_page(NULL, address); +#endif pte_unmap(kpte); return 0; -- cgit v1.2.3 From 5e696617c425eb97bd943d781f3941fb1e8f0e5b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:24 +0000 Subject: powerpc/mm: Split mmu_context handling This splits the mmu_context handling between 32-bit hash based processors, 64-bit hash based processors and everybody else. This is preliminary work for adding SMP support for BookE processors. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 7 +- arch/powerpc/mm/mmu_context_32.c | 84 ------------------ arch/powerpc/mm/mmu_context_64.c | 70 --------------- arch/powerpc/mm/mmu_context_hash32.c | 103 ++++++++++++++++++++++ arch/powerpc/mm/mmu_context_hash64.c | 78 +++++++++++++++++ arch/powerpc/mm/mmu_context_nohash.c | 162 +++++++++++++++++++++++++++++++++++ 6 files changed, 347 insertions(+), 157 deletions(-) delete mode 100644 arch/powerpc/mm/mmu_context_32.c delete mode 100644 arch/powerpc/mm/mmu_context_64.c create mode 100644 arch/powerpc/mm/mmu_context_hash32.c create mode 100644 arch/powerpc/mm/mmu_context_hash64.c create mode 100644 arch/powerpc/mm/mmu_context_nohash.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 148de35c9ee..923bd3fa7d6 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,15 +8,16 @@ endif obj-y := fault.o mem.o pgtable.o \ init_$(CONFIG_WORD_SIZE).o \ - pgtable_$(CONFIG_WORD_SIZE).o \ - mmu_context_$(CONFIG_WORD_SIZE).o + pgtable_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ gup.o mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ - tlb_hash$(CONFIG_WORD_SIZE).o + tlb_hash$(CONFIG_WORD_SIZE).o \ + mmu_context_hash$(CONFIG_WORD_SIZE).o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c deleted file mode 100644 index cc32ba41d90..00000000000 --- a/arch/powerpc/mm/mmu_context_32.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * This file contains the routines for handling the MMU on those - * PowerPC implementations where the MMU substantially follows the - * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include - -#include -#include - -unsigned long next_mmu_context; -unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; -#ifdef FEW_CONTEXTS -atomic_t nr_free_contexts; -struct mm_struct *context_mm[LAST_CONTEXT+1]; -void steal_context(void); -#endif /* FEW_CONTEXTS */ - -/* - * Initialize the context management stuff. - */ -void __init -mmu_context_init(void) -{ - /* - * Some processors have too few contexts to reserve one for - * init_mm, and require using context 0 for a normal task. - * Other processors reserve the use of context zero for the kernel. - * This code assumes FIRST_CONTEXT < 32. - */ - context_map[0] = (1 << FIRST_CONTEXT) - 1; - next_mmu_context = FIRST_CONTEXT; -#ifdef FEW_CONTEXTS - atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); -#endif /* FEW_CONTEXTS */ -} - -#ifdef FEW_CONTEXTS -/* - * Steal a context from a task that has one at the moment. - * This is only used on 8xx and 4xx and we presently assume that - * they don't do SMP. If they do then this will have to check - * whether the MM we steal is in use. - * We also assume that this is only used on systems that don't - * use an MMU hash table - this is true for 8xx and 4xx. - * This isn't an LRU system, it just frees up each context in - * turn (sort-of pseudo-random replacement :). This would be the - * place to implement an LRU scheme if anyone was motivated to do it. - * -- paulus - */ -void -steal_context(void) -{ - struct mm_struct *mm; - - /* free up context `next_mmu_context' */ - /* if we shouldn't free context 0, don't... */ - if (next_mmu_context < FIRST_CONTEXT) - next_mmu_context = FIRST_CONTEXT; - mm = context_mm[next_mmu_context]; - flush_tlb_mm(mm); - destroy_context(mm); -} -#endif /* FEW_CONTEXTS */ diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c deleted file mode 100644 index 1db38ba1f54..00000000000 --- a/arch/powerpc/mm/mmu_context_64.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * MMU context allocation for 64-bit kernels. - * - * Copyright (C) 2004 Anton Blanchard, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static DEFINE_SPINLOCK(mmu_context_lock); -static DEFINE_IDR(mmu_context_idr); - -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - int index; - int err; - -again: - if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(&mmu_context_lock); - err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); - spin_unlock(&mmu_context_lock); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > MAX_CONTEXT) { - spin_lock(&mmu_context_lock); - idr_remove(&mmu_context_idr, index); - spin_unlock(&mmu_context_lock); - return -ENOMEM; - } - - /* The old code would re-promote on fork, we don't do that - * when using slices as it could cause problem promoting slices - * that have been forced down to 4K - */ - if (slice_mm_new_context(mm)) - slice_set_user_psize(mm, mmu_virtual_psize); - mm->context.id = index; - - return 0; -} - -void destroy_context(struct mm_struct *mm) -{ - spin_lock(&mmu_context_lock); - idr_remove(&mmu_context_idr, mm->context.id); - spin_unlock(&mmu_context_lock); - - mm->context.id = NO_CONTEXT; -} diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c new file mode 100644 index 00000000000..0dfba2bf7f3 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -0,0 +1,103 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include + +/* + * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs + * (virtual segment identifiers) for each context. Although the + * hardware supports 24-bit VSIDs, and thus >1 million contexts, + * we only use 32,768 of them. That is ample, since there can be + * at most around 30,000 tasks in the system anyway, and it means + * that we can use a bitmap to indicate which contexts are in use. + * Using a bitmap means that we entirely avoid all of the problems + * that we used to have when the context number overflowed, + * particularly on SMP systems. + * -- paulus. + */ +#define NO_CONTEXT ((unsigned long) -1) +#define LAST_CONTEXT 32767 +#define FIRST_CONTEXT 1 + +/* + * This function defines the mapping from contexts to VSIDs (virtual + * segment IDs). We use a skew on both the context and the high 4 bits + * of the 32-bit virtual address (the "effective segment ID") in order + * to spread out the entries in the MMU hash table. Note, if this + * function is changed then arch/ppc/mm/hashtable.S will have to be + * changed to correspond. + * + * + * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \ + * & 0xffffff) + */ + +static unsigned long next_mmu_context; +static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; + + +/* + * Set up the context for a new address space. + */ +int init_new_context(struct task_struct *t, struct mm_struct *mm) +{ + unsigned long ctx = next_mmu_context; + + while (test_and_set_bit(ctx, context_map)) { + ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx); + if (ctx > LAST_CONTEXT) + ctx = 0; + } + next_mmu_context = (ctx + 1) & LAST_CONTEXT; + mm->context.id = ctx; + + return 0; +} + +/* + * We're finished using the context for an address space. + */ +void destroy_context(struct mm_struct *mm) +{ + preempt_disable(); + if (mm->context.id != NO_CONTEXT) { + clear_bit(mm->context.id, context_map); + mm->context.id = NO_CONTEXT; + } + preempt_enable(); +} + +/* + * Initialize the context management stuff. + */ +void __init mmu_context_init(void) +{ + /* Reserve context 0 for kernel use */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; +} diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c new file mode 100644 index 00000000000..dbeb86ac90c --- /dev/null +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -0,0 +1,78 @@ +/* + * MMU context allocation for 64-bit kernels. + * + * Copyright (C) 2004 Anton Blanchard, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +/* + * The proto-VSID space has 2^35 - 1 segments available for user mappings. + * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, + * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). + */ +#define NO_CONTEXT 0 +#define MAX_CONTEXT ((1UL << 19) - 1) + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, index); + spin_unlock(&mmu_context_lock); + return -ENOMEM; + } + + /* The old code would re-promote on fork, we don't do that + * when using slices as it could cause problem promoting slices + * that have been forced down to 4K + */ + if (slice_mm_new_context(mm)) + slice_set_user_psize(mm, mmu_virtual_psize); + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; +} diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c new file mode 100644 index 00000000000..00e02150abe --- /dev/null +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -0,0 +1,162 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU is not using the hash + * table, such as 8xx, 4xx, BookE's etc... + * + * Copyright 2008 Ben Herrenschmidt + * IBM Corp. + * + * Derived from previous arch/powerpc/mm/mmu_context.c + * and arch/powerpc/include/asm/mmu_context.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include + +/* + * The MPC8xx has only 16 contexts. We rotate through them on each + * task switch. A better way would be to keep track of tasks that + * own contexts, and implement an LRU usage. That way very active + * tasks don't always have to pay the TLB reload overhead. The + * kernel pages are mapped shared, so the kernel can run on behalf + * of any task that makes a kernel entry. Shared does not mean they + * are not protected, just that the ASID comparison is not performed. + * -- Dan + * + * The IBM4xx has 256 contexts, so we can just rotate through these + * as a way of "switching" contexts. If the TID of the TLB is zero, + * the PID/TID comparison is disabled, so we can use a TID of zero + * to represent all kernel pages as shared among all contexts. + * -- Dan + */ + +#ifdef CONFIG_8xx +#define NO_CONTEXT 16 +#define LAST_CONTEXT 15 +#define FIRST_CONTEXT 0 + +#elif defined(CONFIG_4xx) +#define NO_CONTEXT 256 +#define LAST_CONTEXT 255 +#define FIRST_CONTEXT 1 + +#elif defined(CONFIG_E200) || defined(CONFIG_E500) +#define NO_CONTEXT 256 +#define LAST_CONTEXT 255 +#define FIRST_CONTEXT 1 + +#else +#error Unsupported processor type +#endif + +static unsigned long next_mmu_context; +static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; +static atomic_t nr_free_contexts; +static struct mm_struct *context_mm[LAST_CONTEXT+1]; +static void steal_context(void); + +/* Steal a context from a task that has one at the moment. + * This is only used on 8xx and 4xx and we presently assume that + * they don't do SMP. If they do then this will have to check + * whether the MM we steal is in use. + * We also assume that this is only used on systems that don't + * use an MMU hash table - this is true for 8xx and 4xx. + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone was motivated to do it. + * -- paulus + */ +static void steal_context(void) +{ + struct mm_struct *mm; + + /* free up context `next_mmu_context' */ + /* if we shouldn't free context 0, don't... */ + if (next_mmu_context < FIRST_CONTEXT) + next_mmu_context = FIRST_CONTEXT; + mm = context_mm[next_mmu_context]; + flush_tlb_mm(mm); + destroy_context(mm); +} + + +/* + * Get a new mmu context for the address space described by `mm'. + */ +static inline void get_mmu_context(struct mm_struct *mm) +{ + unsigned long ctx; + + if (mm->context.id != NO_CONTEXT) + return; + + while (atomic_dec_if_positive(&nr_free_contexts) < 0) + steal_context(); + + ctx = next_mmu_context; + while (test_and_set_bit(ctx, context_map)) { + ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx); + if (ctx > LAST_CONTEXT) + ctx = 0; + } + next_mmu_context = (ctx + 1) & LAST_CONTEXT; + mm->context.id = ctx; + context_mm[ctx] = mm; +} + +void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) +{ + get_mmu_context(next); + + set_context(next->context.id, next->pgd); +} + +/* + * Set up the context for a new address space. + */ +int init_new_context(struct task_struct *t, struct mm_struct *mm) +{ + mm->context.id = NO_CONTEXT; + return 0; +} + +/* + * We're finished using the context for an address space. + */ +void destroy_context(struct mm_struct *mm) +{ + preempt_disable(); + if (mm->context.id != NO_CONTEXT) { + clear_bit(mm->context.id, context_map); + mm->context.id = NO_CONTEXT; + atomic_inc(&nr_free_contexts); + } + preempt_enable(); +} + + +/* + * Initialize the context management stuff. + */ +void __init mmu_context_init(void) +{ + /* + * Some processors have too few contexts to reserve one for + * init_mm, and require using context 0 for a normal task. + * Other processors reserve the use of context zero for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; + atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); +} + -- cgit v1.2.3 From 2ca8cf738907180e7fbda90f25f32b86feda609f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:29 +0000 Subject: powerpc/mm: Rework context management for CPUs with no hash table This reworks the context management code used by 4xx,8xx and freescale BookE. It adds support for SMP by implementing a concept of stale context map to lazily flush the TLB on processors where a context may have been invalidated. This also contains the ground work for generalizing such lazy TLB flushing by just picking up a new PID and marking the old one stale. This will be implemented later. This is a first implementation that uses a global spinlock. Ideally, we should try to get at least the fast path (context ID already assigned) lockless or limited to a per context lock, but for now this will do. I tried to keep the UP case reasonably simple to avoid adding too much overhead to 8xx which does a lot of context stealing since it effectively has only 16 PIDs available. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mmu_context_nohash.c | 268 +++++++++++++++++++++++++++++------ 1 file changed, 221 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 00e02150abe..8b5de52de0a 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -14,13 +14,28 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * + * TODO: + * + * - The global context lock will not scale very well + * - The maps should be dynamically allocated to allow for processors + * that support more PID bits at runtime + * - Implement flush_tlb_mm() by making the context stale and picking + * a new one + * - More aggressively clear stale map bits and maybe find some way to + * also clear mm->cpu_vm_mask bits when processes are migrated */ +#undef DEBUG +#define DEBUG_STEAL_ONLY +#undef DEBUG_MAP_CONSISTENCY + +#include #include #include #include #include +#include /* * The MPC8xx has only 16 contexts. We rotate through them on each @@ -40,17 +55,14 @@ */ #ifdef CONFIG_8xx -#define NO_CONTEXT 16 #define LAST_CONTEXT 15 #define FIRST_CONTEXT 0 #elif defined(CONFIG_4xx) -#define NO_CONTEXT 256 #define LAST_CONTEXT 255 #define FIRST_CONTEXT 1 #elif defined(CONFIG_E200) || defined(CONFIG_E500) -#define NO_CONTEXT 256 #define LAST_CONTEXT 255 #define FIRST_CONTEXT 1 @@ -58,66 +70,208 @@ #error Unsupported processor type #endif -static unsigned long next_mmu_context; +static unsigned int next_context, nr_free_contexts; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; -static atomic_t nr_free_contexts; +static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1]; static struct mm_struct *context_mm[LAST_CONTEXT+1]; -static void steal_context(void); +static spinlock_t context_lock = SPIN_LOCK_UNLOCKED; /* Steal a context from a task that has one at the moment. - * This is only used on 8xx and 4xx and we presently assume that - * they don't do SMP. If they do then this will have to check - * whether the MM we steal is in use. - * We also assume that this is only used on systems that don't - * use an MMU hash table - this is true for 8xx and 4xx. + * + * This is used when we are running out of available PID numbers + * on the processors. + * * This isn't an LRU system, it just frees up each context in * turn (sort-of pseudo-random replacement :). This would be the * place to implement an LRU scheme if anyone was motivated to do it. * -- paulus + * + * For context stealing, we use a slightly different approach for + * SMP and UP. Basically, the UP one is simpler and doesn't use + * the stale map as we can just flush the local CPU + * -- benh */ -static void steal_context(void) +#ifdef CONFIG_SMP +static unsigned int steal_context_smp(unsigned int id) { struct mm_struct *mm; + unsigned int cpu, max; - /* free up context `next_mmu_context' */ - /* if we shouldn't free context 0, don't... */ - if (next_mmu_context < FIRST_CONTEXT) - next_mmu_context = FIRST_CONTEXT; - mm = context_mm[next_mmu_context]; - flush_tlb_mm(mm); - destroy_context(mm); -} + again: + max = LAST_CONTEXT - FIRST_CONTEXT; + /* Attempt to free next_context first and then loop until we manage */ + while (max--) { + /* Pick up the victim mm */ + mm = context_mm[id]; -/* - * Get a new mmu context for the address space described by `mm'. + /* We have a candidate victim, check if it's active, on SMP + * we cannot steal active contexts + */ + if (mm->context.active) { + id++; + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + continue; + } + pr_debug("[%d] steal context %d from mm @%p\n", + smp_processor_id(), id, mm); + + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + + /* Mark it stale on all CPUs that used this mm */ + for_each_cpu_mask_nr(cpu, mm->cpu_vm_mask) + __set_bit(id, stale_map[cpu]); + return id; + } + + /* This will happen if you have more CPUs than available contexts, + * all we can do here is wait a bit and try again + */ + spin_unlock(&context_lock); + cpu_relax(); + spin_lock(&context_lock); + goto again; +} +#endif /* CONFIG_SMP */ + +/* Note that this will also be called on SMP if all other CPUs are + * offlined, which means that it may be called for cpu != 0. For + * this to work, we somewhat assume that CPUs that are onlined + * come up with a fully clean TLB (or are cleaned when offlined) */ -static inline void get_mmu_context(struct mm_struct *mm) +static unsigned int steal_context_up(unsigned int id) { - unsigned long ctx; + struct mm_struct *mm; + int cpu = smp_processor_id(); - if (mm->context.id != NO_CONTEXT) - return; + /* Pick up the victim mm */ + mm = context_mm[id]; + + pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - while (atomic_dec_if_positive(&nr_free_contexts) < 0) - steal_context(); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; - ctx = next_mmu_context; - while (test_and_set_bit(ctx, context_map)) { - ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx); - if (ctx > LAST_CONTEXT) - ctx = 0; + /* Flush the TLB for that context */ + local_flush_tlb_mm(mm); + + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ + __clear_bit(id, stale_map[cpu]); + + return id; +} + +#ifdef DEBUG_MAP_CONSISTENCY +static void context_check_map(void) +{ + unsigned int id, nrf, nact; + + nrf = nact = 0; + for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { + int used = test_bit(id, context_map); + if (!used) + nrf++; + if (used != (context_mm[id] != NULL)) + pr_err("MMU: Context %d is %s and MM is %p !\n", + id, used ? "used" : "free", context_mm[id]); + if (context_mm[id] != NULL) + nact += context_mm[id]->context.active; } - next_mmu_context = (ctx + 1) & LAST_CONTEXT; - mm->context.id = ctx; - context_mm[ctx] = mm; + if (nrf != nr_free_contexts) { + pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", + nr_free_contexts, nrf); + nr_free_contexts = nrf; + } + if (nact > num_online_cpus()) + pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", + nact, num_online_cpus()); } +#else +static void context_check_map(void) { } +#endif void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - get_mmu_context(next); + unsigned int id, cpu = smp_processor_id(); + unsigned long *map; - set_context(next->context.id, next->pgd); + /* No lockless fast path .. yet */ + spin_lock(&context_lock); + +#ifndef DEBUG_STEAL_ONLY + pr_debug("[%d] activating context for mm @%p, active=%d, id=%d\n", + cpu, next, next->context.active, next->context.id); +#endif + +#ifdef CONFIG_SMP + /* Mark us active and the previous one not anymore */ + next->context.active++; + if (prev) { + WARN_ON(prev->context.active < 1); + prev->context.active--; + } +#endif /* CONFIG_SMP */ + + /* If we already have a valid assigned context, skip all that */ + id = next->context.id; + if (likely(id != MMU_NO_CONTEXT)) + goto ctxt_ok; + + /* We really don't have a context, let's try to acquire one */ + id = next_context; + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + map = context_map; + + /* No more free contexts, let's try to steal one */ + if (nr_free_contexts == 0) { +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + id = steal_context_smp(id); + goto stolen; + } +#endif /* CONFIG_SMP */ + id = steal_context_up(id); + goto stolen; + } + nr_free_contexts--; + + /* We know there's at least one free context, try to find it */ + while (__test_and_set_bit(id, map)) { + id = find_next_zero_bit(map, LAST_CONTEXT+1, id); + if (id > LAST_CONTEXT) + id = FIRST_CONTEXT; + } + stolen: + next_context = id + 1; + context_mm[id] = next; + next->context.id = id; + +#ifndef DEBUG_STEAL_ONLY + pr_debug("[%d] picked up new id %d, nrf is now %d\n", + cpu, id, nr_free_contexts); +#endif + + context_check_map(); + ctxt_ok: + + /* If that context got marked stale on this CPU, then flush the + * local TLB for it and unmark it before we use it + */ + if (test_bit(id, stale_map[cpu])) { + pr_debug("[%d] flushing stale context %d for mm @%p !\n", + cpu, id, next); + local_flush_tlb_mm(next); + + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ + __clear_bit(id, stale_map[cpu]); + } + + /* Flick the MMU and release lock */ + set_context(id, next->pgd); + spin_unlock(&context_lock); } /* @@ -125,7 +279,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) */ int init_new_context(struct task_struct *t, struct mm_struct *mm) { - mm->context.id = NO_CONTEXT; + mm->context.id = MMU_NO_CONTEXT; + mm->context.active = 0; + return 0; } @@ -134,13 +290,25 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - preempt_disable(); - if (mm->context.id != NO_CONTEXT) { - clear_bit(mm->context.id, context_map); - mm->context.id = NO_CONTEXT; - atomic_inc(&nr_free_contexts); + unsigned int id; + + if (mm->context.id == MMU_NO_CONTEXT) + return; + + WARN_ON(mm->context.active != 0); + + spin_lock(&context_lock); + id = mm->context.id; + if (id != MMU_NO_CONTEXT) { + __clear_bit(id, context_map); + mm->context.id = MMU_NO_CONTEXT; +#ifdef DEBUG_MAP_CONSISTENCY + mm->context.active = 0; + context_mm[id] = NULL; +#endif + nr_free_contexts++; } - preempt_enable(); + spin_unlock(&context_lock); } @@ -149,6 +317,12 @@ void destroy_context(struct mm_struct *mm) */ void __init mmu_context_init(void) { + /* Mark init_mm as being active on all possible CPUs since + * we'll get called with prev == init_mm the first time + * we schedule on a given CPU + */ + init_mm.context.active = NR_CPUS; + /* * Some processors have too few contexts to reserve one for * init_mm, and require using context 0 for a normal task. @@ -156,7 +330,7 @@ void __init mmu_context_init(void) * This code assumes FIRST_CONTEXT < 32. */ context_map[0] = (1 << FIRST_CONTEXT) - 1; - next_mmu_context = FIRST_CONTEXT; - atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); + next_context = FIRST_CONTEXT; + nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; } -- cgit v1.2.3 From 7c03d653cd257793dc40520c94e229b5fd0578e7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:32 +0000 Subject: powerpc/mm: Introduce MMU features We're soon running out of CPU features and I need to add some new ones for various MMU related bits, so this patch separates the MMU features from the CPU features. I moved over the 32-bit MMU related ones, added base features for MMU type families, but didn't move over any 64-bit only feature yet. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/ppc_mmu_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 6aa12081377..9d97db7b7cf 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -192,7 +192,7 @@ void __init MMU_init_hw(void) extern unsigned int hash_page[]; extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; - if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) { /* * Put a blr (procedure return) instruction at the * start of hash_page, since we can still get DSI -- cgit v1.2.3 From f048aace29e007f2b642097e2da8231e0e9cce2d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:38 +0000 Subject: powerpc/mm: Add SMP support to no-hash TLB handling This commit moves the whole no-hash TLB handling out of line into a new tlb_nohash.c file, and implements some basic SMP support using IPIs and/or broadcast tlbivax instructions. Note that I'm using local invalidations for D->I cache coherency. At worst, if another processor is trying to execute the same and has the old entry in its TLB, it will just take a fault and re-do the TLB flush locally (it won't re-do the cache flush in any case). Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/mm/tlb_hash32.c | 4 + arch/powerpc/mm/tlb_nohash.c | 209 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/mm/tlb_nohash.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 923bd3fa7d6..af987df8d5a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -9,7 +9,7 @@ endif obj-y := fault.o mem.o pgtable.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o +obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7df0409107a..87f1f955dea 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -284,7 +284,7 @@ good_area: } pte_update(ptep, 0, _PAGE_HWEXEC | _PAGE_ACCESSED); - _tlbie(address, mm->context.id); + local_flush_tlb_page(vma, address); pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); return 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b9e1a1da6e5..8fee696fb79 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -488,7 +488,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * we invalidate the TLB here, thus avoiding dcbst * misbehaviour. */ - _tlbie(address, 0 /* 8xx doesn't care about PID */); + _tlbil_va(address, 0 /* 8xx doesn't care about PID */); #endif /* The _PAGE_USER test should really be _PAGE_EXEC, but * older glibc versions execute some code from no-exec diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index f9a47fee392..65190587a36 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -137,6 +137,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) flush_range(&init_mm, start, end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_kernel_range); /* * Flush all the (user) entries for the address space described by mm. @@ -160,6 +161,7 @@ void flush_tlb_mm(struct mm_struct *mm) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_mm); void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { @@ -176,6 +178,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_page); /* * For each address in the range, find the pte for the address @@ -188,3 +191,4 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, flush_range(vma->vm_mm, start, end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c new file mode 100644 index 00000000000..803a64c02b0 --- /dev/null +++ b/arch/powerpc/mm/tlb_nohash.c @@ -0,0 +1,209 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU does not use a hash table to store virtual to + * physical translations (ie, SW loaded TLBs or Book3E compilant processors, + * this does -not- include 603 however which shares the implementation with + * hash based processors) + * + * -- BenH + * + * Copyright 2008 Ben Herrenschmidt + * IBM Corp. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mmu_decl.h" + +/* + * Base TLB flushing operations: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * - local_* variants of page and mm only apply to the current + * processor + */ + +/* + * These are the base non-SMP variants of page and mm flushing + */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbil_pid(pid); + preempt_enable(); +} +EXPORT_SYMBOL(local_flush_tlb_mm); + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + unsigned int pid; + + preempt_disable(); + pid = vma ? vma->vm_mm->context.id : 0; + if (pid != MMU_NO_CONTEXT) + _tlbil_va(vmaddr, pid); + preempt_enable(); +} +EXPORT_SYMBOL(local_flush_tlb_page); + + +/* + * And here are the SMP non-local implementations + */ +#ifdef CONFIG_SMP + +static DEFINE_SPINLOCK(tlbivax_lock); + +struct tlb_flush_param { + unsigned long addr; + unsigned int pid; +}; + +static void do_flush_tlb_mm_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_pid(p ? p->pid : 0); +} + +static void do_flush_tlb_page_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_va(p->addr, p->pid); +} + + +/* Note on invalidations and PID: + * + * We snapshot the PID with preempt disabled. At this point, it can still + * change either because: + * - our context is being stolen (PID -> NO_CONTEXT) on another CPU + * - we are invaliating some target that isn't currently running here + * and is concurrently acquiring a new PID on another CPU + * - some other CPU is re-acquiring a lost PID for this mm + * etc... + * + * However, this shouldn't be a problem as we only guarantee + * invalidation of TLB entries present prior to this call, so we + * don't care about the PID changing, and invalidating a stale PID + * is generally harmless. + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + cpumask_t cpu_mask; + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) { + struct tlb_flush_param p = { .pid = pid }; + smp_call_function_mask(cpu_mask, do_flush_tlb_mm_ipi, &p, 1); + } + _tlbil_pid(pid); + no_context: + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_mm); + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + cpumask_t cpu_mask; + unsigned int pid; + + preempt_disable(); + pid = vma ? vma->vm_mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto bail; + cpu_mask = vma->vm_mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) { + /* If broadcast tlbivax is supported, use it */ + if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { + int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); + if (lock) + spin_lock(&tlbivax_lock); + _tlbivax_bcast(vmaddr, pid); + if (lock) + spin_unlock(&tlbivax_lock); + goto bail; + } else { + struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; + smp_call_function_mask(cpu_mask, + do_flush_tlb_page_ipi, &p, 1); + } + } + _tlbil_va(vmaddr, pid); + bail: + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_page); + +#endif /* CONFIG_SMP */ + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_SMP + preempt_disable(); + smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); + _tlbil_pid(0); + preempt_enable(); +#endif + _tlbil_pid(0); +} +EXPORT_SYMBOL(flush_tlb_kernel_range); + +/* + * Currently, for range flushing, we just do a full mm flush. This should + * be optimized based on a threshold on the size of the range, since + * some implementation can stack multiple tlbivax before a tlbsync but + * for now, we keep it that way + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) + +{ + flush_tlb_mm(vma->vm_mm); +} +EXPORT_SYMBOL(flush_tlb_range); -- cgit v1.2.3 From 2a4aca1144394653269720ffbb5a325a77abd5fa Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:42 +0000 Subject: powerpc/mm: Split low level tlb invalidate for nohash processors Currently, the various forms of low level TLB invalidations are all implemented in misc_32.S for 32-bit processors, in a fairly scary mess of #ifdef's and with interesting duplication such as a whole bunch of code for FSL _tlbie and _tlbia which are no longer used. This moves things around such that _tlbie is now defined in hash_low_32.S and is only used by the 32-bit hash code, and all nohash CPUs use the various _tlbil_* forms that are now moved to a new file, tlb_nohash_low.S. I moved all the definitions for that stuff out of include/asm/tlbflush.h as they are really internal mm stuff, into mm/mmu_decl.h The code should have no functional changes. I kept some variants inline for trivial forms on things like 40x and 8xx. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/hash_low_32.S | 76 ++++++++++++++++++ arch/powerpc/mm/mmu_decl.h | 48 ++++++++++++ arch/powerpc/mm/tlb_nohash_low.S | 165 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/mm/tlb_nohash_low.S (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index af987df8d5a..953cc4a1cde 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -9,7 +9,8 @@ endif obj-y := fault.o mem.o pgtable.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o +obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ + tlb_nohash_low.o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index c5536b8b37a..c8eac22a8f0 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -633,3 +633,79 @@ _GLOBAL(flush_hash_patch_B) SYNC_601 isync blr + +/* + * Flush an entry from the TLB + */ +_GLOBAL(_tlbie) +#ifdef CONFIG_SMP + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) + lwz r8,TI_CPU(r8) + oris r8,r8,11 + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + SYNC_601 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + SYNC_601 + isync +#else /* CONFIG_SMP */ + tlbie r3 + sync +#endif /* CONFIG_SMP */ + blr + +/* + * Flush the entire TLB. 603/603e only + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_SMP) + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) + lwz r8,TI_CPU(r8) + oris r8,r8,10 + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + SYNC_601 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + sync + tlbia + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + SYNC_601 + isync +#else /* CONFIG_SMP */ + sync + tlbia + sync +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index b4344fd30f2..4314b39b6fa 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -22,10 +22,58 @@ #include #include +#ifdef CONFIG_PPC_MMU_NOHASH + +/* + * On 40x and 8xx, we directly inline tlbia and tlbivax + */ +#if defined(CONFIG_40x) || defined(CONFIG_8xx) +static inline void _tlbil_all(void) +{ + asm volatile ("sync; tlbia; isync" : : : "memory") +} +static inline void _tlbil_pid(unsigned int pid) +{ + asm volatile ("sync; tlbia; isync" : : : "memory") +} +#else /* CONFIG_40x || CONFIG_8xx */ +extern void _tlbil_all(void); +extern void _tlbil_pid(unsigned int pid); +#endif /* !(CONFIG_40x || CONFIG_8xx) */ + +/* + * On 8xx, we directly inline tlbie, on others, it's extern + */ +#ifdef CONFIG_8xx +static inline void _tlbil_va(unsigned long address, unsigned int pid) +{ + asm volatile ("tlbie %0; sync" : : "r" (address) : "memory") +} +#else /* CONFIG_8xx */ +extern void _tlbil_va(unsigned long address, unsigned int pid); +#endif /* CONIFG_8xx */ + +/* + * As of today, we don't support tlbivax broadcast on any + * implementation. When that becomes the case, this will be + * an extern. + */ +static inline void _tlbivax_bcast(unsigned long address, unsigned int pid) +{ + BUG(); +} + +#else /* CONFIG_PPC_MMU_NOHASH */ + extern void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); +extern void _tlbie(unsigned long address); +extern void _tlbia(void); + +#endif /* CONFIG_PPC_MMU_NOHASH */ + #ifdef CONFIG_PPC32 extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S new file mode 100644 index 00000000000..763c59fe007 --- /dev/null +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -0,0 +1,165 @@ +/* + * This file contains low-level functions for performing various + * types of TLB invalidations on various processors with no hash + * table. + * + * This file implements the following functions for all no-hash + * processors. Some aren't implemented for some variants. Some + * are inline in tlbflush.h + * + * - tlbil_va + * - tlbil_pid + * - tlbil_all + * - tlbivax_bcast (not yet) + * + * Code mostly moved over from misc_32.S + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Partially rewritten by Cort Dougan (cort@cs.nmt.edu) + * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_40x) + +/* + * 40x implementation needs only tlbil_va + */ +_GLOBAL(_tlbil_va) + /* We run the search with interrupts disabled because we have to change + * the PID and I don't want to preempt when that happens. + */ + mfmsr r5 + mfspr r6,SPRN_PID + wrteei 0 + mtspr SPRN_PID,r4 + tlbsx. r3, 0, r3 + mtspr SPRN_PID,r6 + wrtee r5 + bne 1f + sync + /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is + * clear. Since 25 is the V bit in the TLB_TAG, loading this value + * will invalidate the TLB entry. */ + tlbwe r3, r3, TLB_TAG + isync +1: blr + +#elif defined(CONFIG_8xx) + +/* + * Nothing to do for 8xx, everything is inline + */ + +#elif defined(CONFIG_44x) + +/* + * 440 implementation uses tlbsx/we for tlbil_va and a full sweep + * of the TLB for everything else. + */ +_GLOBAL(_tlbil_va) + mfspr r5,SPRN_MMUCR + rlwimi r5,r4,0,24,31 /* Set TID */ + + /* We have to run the search with interrupts disabled, even critical + * and debug interrupts (in fact the only critical exceptions we have + * are debug and machine check). Otherwise an interrupt which causes + * a TLB miss can clobber the MMUCR between the mtspr and the tlbsx. */ + mfmsr r4 + lis r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@ha + addi r6,r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l + andc r6,r4,r6 + mtmsr r6 + mtspr SPRN_MMUCR,r5 + tlbsx. r3, 0, r3 + mtmsr r4 + bne 1f + sync + /* There are only 64 TLB entries, so r3 < 64, + * which means bit 22, is clear. Since 22 is + * the V bit in the TLB_PAGEID, loading this + * value will invalidate the TLB entry. + */ + tlbwe r3, r3, PPC44x_TLB_PAGEID + isync +1: blr + +_GLOBAL(_tlbil_all) +_GLOBAL(_tlbil_pid) + li r3,0 + sync + + /* Load high watermark */ + lis r4,tlb_44x_hwater@ha + lwz r5,tlb_44x_hwater@l(r4) + +1: tlbwe r3,r3,PPC44x_TLB_PAGEID + addi r3,r3,1 + cmpw 0,r3,r5 + ble 1b + + isync + blr + +#elif defined(CONFIG_FSL_BOOKE) +/* + * FSL BookE implementations. Currently _pid and _all are the + * same. This will change when tlbilx is actually supported and + * performs invalidate-by-PID. This change will be driven by + * mmu_features conditional + */ + +/* + * Flush MMU TLB on the local processor + */ +_GLOBAL(_tlbil_pid) +_GLOBAL(_tlbil_all) +#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ + MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) + li r3,(MMUCSR0_TLBFI)@l + mtspr SPRN_MMUCSR0, r3 +1: + mfspr r3,SPRN_MMUCSR0 + andi. r3,r3,MMUCSR0_TLBFI@l + bne 1b + msync + isync + blr + +/* + * Flush MMU TLB for a particular address, but only on the local processor + * (no broadcast) + */ +_GLOBAL(_tlbil_va) + mfmsr r10 + wrteei 0 + slwi r4,r4,16 + mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ + tlbsx 0,r3 + mfspr r4,SPRN_MAS1 /* check valid */ + andis. r3,r4,MAS1_VALID@h + beq 1f + rlwinm r4,r4,0,1,31 + mtspr SPRN_MAS1,r4 + tlbwe + msync + isync +1: wrtee r10 + blr +#elif +#error Unsupported processor type ! +#endif -- cgit v1.2.3 From 760ec0e02d8a13d0ed60d99f47879d4aa8ef1910 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:46 +0000 Subject: powerpc/44x: No need to mask MSR:CE, ME or DE in _tlbil_va on 440 The handlers for Critical, Machine Check or Debug interrupts will save and restore MMUCR nowadays, thus we only need to disable normal interrupts when invalidating TLB entries. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Acked-by: Josh Boyer Signed-off-by: Paul Mackerras --- arch/powerpc/mm/tlb_nohash_low.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 763c59fe007..f900a39e6ec 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -75,18 +75,19 @@ _GLOBAL(_tlbil_va) mfspr r5,SPRN_MMUCR rlwimi r5,r4,0,24,31 /* Set TID */ - /* We have to run the search with interrupts disabled, even critical - * and debug interrupts (in fact the only critical exceptions we have - * are debug and machine check). Otherwise an interrupt which causes - * a TLB miss can clobber the MMUCR between the mtspr and the tlbsx. */ + /* We have to run the search with interrupts disabled, otherwise + * an interrupt which causes a TLB miss can clobber the MMUCR + * between the mtspr and the tlbsx. + * + * Critical and Machine Check interrupts take care of saving + * and restoring MMUCR, so only normal interrupts have to be + * taken care of. + */ mfmsr r4 - lis r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@ha - addi r6,r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l - andc r6,r4,r6 - mtmsr r6 + wrteei 0 mtspr SPRN_MMUCR,r5 tlbsx. r3, 0, r3 - mtmsr r4 + wrtee r4 bne 1f sync /* There are only 64 TLB entries, so r3 < 64, -- cgit v1.2.3 From 77520351805cc19ba37394ae33f862ef6d3c2a23 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:48 +0000 Subject: powerpc/mm: Runtime allocation of mmu context maps for nohash CPUs This makes the MMU context code used for CPUs with no hash table (except 603) dynamically allocate the various maps used to track the state of contexts. Only the main free map and CPU 0 stale map are allocated at boot time. Other CPU maps are allocated when those CPUs are brought up and freed if they are unplugged. This also moves the initialization of the MMU context management slightly later during the boot process, which should be fine as it's really only needed when userland if first started anyways. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/init_32.c | 4 - arch/powerpc/mm/mmu_context_nohash.c | 161 ++++++++++++++++++++++++----------- 2 files changed, 111 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 388ceda632f..578294c3b1c 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -180,9 +179,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:setio", 0x302); - /* Initialize the context management stuff */ - mmu_context_init(); - if (ppc_md.progress) ppc_md.progress("MMU:exit", 0x211); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 8b5de52de0a..52a0cfc38b6 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -28,54 +28,30 @@ #undef DEBUG #define DEBUG_STEAL_ONLY #undef DEBUG_MAP_CONSISTENCY +/*#define DEBUG_CLAMP_LAST_CONTEXT 15 */ #include #include #include +#include +#include +#include +#include #include #include -#include - -/* - * The MPC8xx has only 16 contexts. We rotate through them on each - * task switch. A better way would be to keep track of tasks that - * own contexts, and implement an LRU usage. That way very active - * tasks don't always have to pay the TLB reload overhead. The - * kernel pages are mapped shared, so the kernel can run on behalf - * of any task that makes a kernel entry. Shared does not mean they - * are not protected, just that the ASID comparison is not performed. - * -- Dan - * - * The IBM4xx has 256 contexts, so we can just rotate through these - * as a way of "switching" contexts. If the TID of the TLB is zero, - * the PID/TID comparison is disabled, so we can use a TID of zero - * to represent all kernel pages as shared among all contexts. - * -- Dan - */ - -#ifdef CONFIG_8xx -#define LAST_CONTEXT 15 -#define FIRST_CONTEXT 0 - -#elif defined(CONFIG_4xx) -#define LAST_CONTEXT 255 -#define FIRST_CONTEXT 1 - -#elif defined(CONFIG_E200) || defined(CONFIG_E500) -#define LAST_CONTEXT 255 -#define FIRST_CONTEXT 1 - -#else -#error Unsupported processor type -#endif +static unsigned int first_context, last_context; static unsigned int next_context, nr_free_contexts; -static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; -static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1]; -static struct mm_struct *context_mm[LAST_CONTEXT+1]; +static unsigned long *context_map; +static unsigned long *stale_map[NR_CPUS]; +static struct mm_struct **context_mm; static spinlock_t context_lock = SPIN_LOCK_UNLOCKED; +#define CTX_MAP_SIZE \ + (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) + + /* Steal a context from a task that has one at the moment. * * This is used when we are running out of available PID numbers @@ -98,7 +74,7 @@ static unsigned int steal_context_smp(unsigned int id) unsigned int cpu, max; again: - max = LAST_CONTEXT - FIRST_CONTEXT; + max = last_context - first_context; /* Attempt to free next_context first and then loop until we manage */ while (max--) { @@ -110,8 +86,8 @@ static unsigned int steal_context_smp(unsigned int id) */ if (mm->context.active) { id++; - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; + if (id > last_context) + id = first_context; continue; } pr_debug("[%d] steal context %d from mm @%p\n", @@ -169,7 +145,7 @@ static void context_check_map(void) unsigned int id, nrf, nact; nrf = nact = 0; - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { + for (id = first_context; id <= last_context; id++) { int used = test_bit(id, context_map); if (!used) nrf++; @@ -187,6 +163,8 @@ static void context_check_map(void) if (nact > num_online_cpus()) pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", nact, num_online_cpus()); + if (first_context > 0 && !test_bit(0, context_map)) + pr_err("MMU: Context 0 has been freed !!!\n"); } #else static void context_check_map(void) { } @@ -209,6 +187,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* Mark us active and the previous one not anymore */ next->context.active++; if (prev) { +#ifndef DEBUG_STEAL_ONLY + pr_debug(" old context %p active was: %d\n", + prev, prev->context.active); +#endif WARN_ON(prev->context.active < 1); prev->context.active--; } @@ -221,8 +203,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* We really don't have a context, let's try to acquire one */ id = next_context; - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; + if (id > last_context) + id = first_context; map = context_map; /* No more free contexts, let's try to steal one */ @@ -240,9 +222,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* We know there's at least one free context, try to find it */ while (__test_and_set_bit(id, map)) { - id = find_next_zero_bit(map, LAST_CONTEXT+1, id); - if (id > LAST_CONTEXT) - id = FIRST_CONTEXT; + id = find_next_zero_bit(map, last_context+1, id); + if (id > last_context) + id = first_context; } stolen: next_context = id + 1; @@ -311,6 +293,42 @@ void destroy_context(struct mm_struct *mm) spin_unlock(&context_lock); } +#ifdef CONFIG_SMP + +static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + + /* We don't touch CPU 0 map, it's allocated at aboot and kept + * around forever + */ + if (cpu == 0) + return NOTIFY_OK; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu); + stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_DEAD_FROZEN: + pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu); + kfree(stale_map[cpu]); + stale_map[cpu] = NULL; + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { + .notifier_call = mmu_context_cpu_notify, +}; + +#endif /* CONFIG_SMP */ /* * Initialize the context management stuff. @@ -323,14 +341,57 @@ void __init mmu_context_init(void) */ init_mm.context.active = NR_CPUS; + /* + * The MPC8xx has only 16 contexts. We rotate through them on each + * task switch. A better way would be to keep track of tasks that + * own contexts, and implement an LRU usage. That way very active + * tasks don't always have to pay the TLB reload overhead. The + * kernel pages are mapped shared, so the kernel can run on behalf + * of any task that makes a kernel entry. Shared does not mean they + * are not protected, just that the ASID comparison is not performed. + * -- Dan + * + * The IBM4xx has 256 contexts, so we can just rotate through these + * as a way of "switching" contexts. If the TID of the TLB is zero, + * the PID/TID comparison is disabled, so we can use a TID of zero + * to represent all kernel pages as shared among all contexts. + * -- Dan + */ + if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { + first_context = 0; + last_context = 15; + } else { + first_context = 1; + last_context = 255; + } + +#ifdef DEBUG_CLAMP_LAST_CONTEXT + last_context = DEBUG_CLAMP_LAST_CONTEXT; +#endif + /* + * Allocate the maps used by context management + */ + context_map = alloc_bootmem(CTX_MAP_SIZE); + context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + +#ifdef CONFIG_SMP + register_cpu_notifier(&mmu_context_cpu_nb); +#endif + + printk(KERN_INFO + "MMU: Allocated %d bytes of context maps for %d contexts\n", + 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)), + last_context - first_context + 1); + /* * Some processors have too few contexts to reserve one for * init_mm, and require using context 0 for a normal task. * Other processors reserve the use of context zero for the kernel. - * This code assumes FIRST_CONTEXT < 32. + * This code assumes first_context < 32. */ - context_map[0] = (1 << FIRST_CONTEXT) - 1; - next_context = FIRST_CONTEXT; - nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; + context_map[0] = (1 << first_context) - 1; + next_context = first_context; + nr_free_contexts = last_context - first_context + 1; } -- cgit v1.2.3 From 64b3d0e8122b422e879b23d42f9e0e8efbbf9744 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:51 +0000 Subject: powerpc/mm: Rework usage of _PAGE_COHERENT/NO_CACHE/GUARDED Currently, we never set _PAGE_COHERENT in the PTEs, we just OR it in in the hash code based on some CPU feature bit. We also manipulate _PAGE_NO_CACHE and _PAGE_GUARDED by hand in all sorts of places. This changes the logic so that instead, the PTE now contains _PAGE_COHERENT for all normal RAM pages thay have I = 0 on platforms that need it. The hash code clears it if the feature bit is not set. It also adds some clean accessors to setup various valid combinations of access flags and change various bits of code to use them instead. This should help having the PTE actually containing the bit combinations that we really want. I also removed _PAGE_GUARDED from _PAGE_BASE on 44x and instead set it explicitely from the TLB miss. I will ultimately remove it completely as it appears that it might not be needed after all but in the meantime, having it in the TLB miss makes things a lot easier. Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_32.S | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index c8eac22a8f0..28845604a10 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -323,8 +323,8 @@ _GLOBAL(create_hpte) ori r8,r8,0xe14 /* clear out reserved bits and M */ andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION - ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ -END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) + rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ +END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) #ifdef CONFIG_PTE_64BIT /* Put the XPN bits into the PTE */ rlwimi r8,r10,8,20,22 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8fee696fb79..53b06ebb3f2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -102,8 +102,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); if (!page_is_ram(pfn)) - vma_prot = __pgprot(pgprot_val(vma_prot) - | _PAGE_GUARDED | _PAGE_NO_CACHE); + vma_prot = pgprot_noncached(vma_prot); + return vma_prot; } EXPORT_SYMBOL(phys_mem_access_prot); -- cgit v1.2.3 From a14953597b771f793ce32529d7b8b04fdedca3ef Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 21 Dec 2008 02:54:25 -0700 Subject: powerpc: Fix missing 'blr' in _tlbia() Rework to MMU code dropped a much missed 'blr' instruction. Brown-Paper-Bag-Worn-By: Benjamin Herrenschmidt Signed-off-by: Grant Likely --- arch/powerpc/mm/hash_low_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 28845604a10..67850ec9feb 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -709,3 +709,4 @@ _GLOBAL(_tlbia) tlbia sync #endif /* CONFIG_SMP */ + blr -- cgit v1.2.3 From 01695a9687e5a8d78589605037cc7828a5b67ac9 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 17 Dec 2008 10:09:10 +0000 Subject: powerpc/32: Allow __ioremap on RAM addresses for kdump kernel While for debugging it is good to catch bogus users of ioremap, though for kdump support it is more convenient to use __ioremap for copy_oldmem_page() (exactly as we do for PPC64 currently). Note that copy_oldmem_page() calls __ioremap with flags set to '0', so it should be safe with the regard to the caches. The other option is to use kmap_atomic_pfn()[1], but it will not work for kernels compiled without HIGHMEM. That is, on a board with 256MB RAM and crashkernel=64M@32M case, the !HIGHMEM capturing kernel maps 0-96M range, which does not include all the memory needed to capture the dump. And, obviously, accessing anything upper than 96M will cause faults. [1] http://ozlabs.org/pipermail/linuxppc-dev/2007-November/046747.html Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- arch/powerpc/mm/pgtable_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 34147244013..cd5609759d4 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -173,6 +173,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) if (p < 16*1024*1024) p += _ISA_MEM_BASE; +#ifndef CONFIG_CRASH_DUMP /* * Don't allow anybody to remap normal RAM that we're using. * mem_init() sets high_memory so only do the check after that. @@ -182,6 +183,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) (unsigned long long)p, __builtin_return_address(0)); return NULL; } +#endif if (size == 0) return NULL; -- cgit v1.2.3 From ccdcef72c249c289898b164eada89a61855b9287 Mon Sep 17 00:00:00 2001 From: Dale Farnsworth Date: Wed, 17 Dec 2008 10:09:13 +0000 Subject: powerpc/32: Add the ability for a classic ppc kernel to be loaded at 32M Add the ability for a classic ppc kernel to be loaded at an address of 32MB. This done by fixing a few places that assume we are loaded at address 0, and by changing several uses of KERNELBASE to use PAGE_OFFSET, instead. Signed-off-by: Dale Farnsworth Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- arch/powerpc/mm/init_32.c | 2 +- arch/powerpc/mm/pgtable_32.c | 4 ++-- arch/powerpc/mm/ppc_mmu_32.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 578294c3b1c..666a5e8a5be 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -48,7 +48,7 @@ #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) /* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ -#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) +#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" #endif #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cd5609759d4..8cba46fc9e3 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -269,7 +269,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) } /* - * Map in a big chunk of physical memory starting at KERNELBASE. + * Map in a big chunk of physical memory starting at PAGE_OFFSET. */ void __init mapin_ram(void) { @@ -278,7 +278,7 @@ void __init mapin_ram(void) int ktext; s = mmu_mapin_ram(); - v = KERNELBASE + s; + v = PAGE_OFFSET + s; p = memstart_addr + s; for (; s < total_lowmem; s += PAGE_SIZE) { ktext = ((char *) v >= _stext && (char *) v < etext); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 9d97db7b7cf..45d925360b8 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -95,16 +95,16 @@ unsigned long __init mmu_mapin_ram(void) break; } - setbat(2, KERNELBASE, 0, bl, _PAGE_RAM); - done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; + setbat(2, PAGE_OFFSET, 0, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1; if ((done < tot) && !bat_addrs[3].limit) { /* use BAT3 to cover a bit more */ tot -= done; for (bl = 128<<10; bl < max_size; bl <<= 1) if (bl * 2 > tot) break; - setbat(3, KERNELBASE+done, done, bl, _PAGE_RAM); - done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; + setbat(3, PAGE_OFFSET+done, done, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1; } return done; -- cgit v1.2.3 From ca9153a3a2a7556d091dfe080e42b0e67881fff6 Mon Sep 17 00:00:00 2001 From: Ilya Yanok Date: Thu, 11 Dec 2008 04:55:41 +0300 Subject: powerpc/44x: Support 16K/64K base page sizes on 44x This adds support for 16k and 64k page sizes on PowerPC 44x processors. The PGDIR table is much smaller than a page when using 16k or 64k pages (512 and 32 bytes respectively) so we allocate the PGDIR with kzalloc() instead of __get_free_pages(). One PTE table covers rather a large memory area when using 16k or 64k pages (32MB or 512MB respectively), so we can easily put FIXMAP and PKMAP in the area covered by one PTE table. Signed-off-by: Yuri Tikhonov Signed-off-by: Vladimir Panfilov Signed-off-by: Ilya Yanok Acked-by: Josh Boyer Signed-off-by: Paul Mackerras --- arch/powerpc/mm/pgtable_32.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8cba46fc9e3..38ff35f2142 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -68,24 +68,29 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); #define p_mapped_by_tlbcam(x) (0UL) #endif /* HAVE_TLBCAM */ -#ifdef CONFIG_PTE_64BIT -/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */ -#define PGDIR_ORDER 1 -#else -#define PGDIR_ORDER 0 -#endif +#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + /* pgdir take page or two with 4K pages and a page fraction otherwise */ +#ifndef CONFIG_PPC_4K_PAGES + ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); +#else + ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, + PGDIR_ORDER - PAGE_SHIFT); +#endif return ret; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_pages((unsigned long)pgd, PGDIR_ORDER); +#ifndef CONFIG_PPC_4K_PAGES + kfree((void *)pgd); +#else + free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); +#endif } __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) @@ -385,7 +390,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = 0xfffff000; +unsigned long FIXADDR_TOP = (-PAGE_SIZE); EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -- cgit v1.2.3