From f430c02b13f00146106fedcace810e61b4493d8c Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 12 Apr 2006 15:21:06 -0500
Subject: [PATCH] powerpc: Quiet page order output

No need to always print page orders.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/hash_utils_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c006d903963..b43ed92ef47 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -319,7 +319,7 @@ static void __init htab_init_page_sizes(void)
 		mmu_virtual_psize = MMU_PAGE_64K;
 #endif
 
-	printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+	printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n",
 	       mmu_psize_defs[mmu_linear_psize].shift,
 	       mmu_psize_defs[mmu_virtual_psize].shift);
 
-- 
cgit v1.2.3


From e110b281dc93e3b4587a3d0440bb7ae38daddfde Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 12 Apr 2006 15:25:01 -0500
Subject: [PATCH] powerpc: Less verbose mem configuration output

Quieten some of the debug ram config output. we already print out available
memory at KERN_INFO level.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/mem.c  | 6 +++---
 arch/powerpc/mm/numa.c | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 741dd8802d4..69f3b9a20be 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -299,9 +299,9 @@ void __init paging_init(void)
 	kmap_prot = PAGE_KERNEL;
 #endif /* CONFIG_HIGHMEM */
 
-	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
-	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 	/*
 	 * All pages are DMA-able so we put them all in the DMA zone.
@@ -380,7 +380,7 @@ void __init mem_init(void)
 			totalhigh_pages++;
 		}
 		totalram_pages += totalhigh_pages;
-		printk(KERN_INFO "High memory: %luk\n",
+		printk(KERN_DEBUG "High memory: %luk\n",
 		       totalhigh_pages << (PAGE_SHIFT-10));
 	}
 #endif /* CONFIG_HIGHMEM */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0a335f34974..ea816c618a7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -465,9 +465,9 @@ static void __init setup_nonnuma(void)
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned int i;
 
-	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
-	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 
 	for (i = 0; i < lmb.memory.cnt; ++i)
@@ -485,7 +485,7 @@ void __init dump_numa_cpu_topology(void)
 		return;
 
 	for_each_online_node(node) {
-		printk(KERN_INFO "Node %d CPUs:", node);
+		printk(KERN_DEBUG "Node %d CPUs:", node);
 
 		count = 0;
 		/*
@@ -521,7 +521,7 @@ static void __init dump_numa_memory_topology(void)
 	for_each_online_node(node) {
 		unsigned long i;
 
-		printk(KERN_INFO "Node %d Memory:", node);
+		printk(KERN_DEBUG "Node %d Memory:", node);
 
 		count = 0;
 
-- 
cgit v1.2.3


From 2babf5c2ec2f2d5de3e38d20f7df7fd815fd10c9 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 17 May 2006 18:00:46 +1000
Subject: [PATCH] powerpc: Unify mem= handling

We currently do mem= handling in three seperate places. And as benh pointed out
I wrote two of them. Now that we parse command line parameters earlier we can
clean this mess up.

Moving the parsing out of prom_init means the device tree might be allocated
above the memory limit. If that happens we'd have to move it. As it happens
we already have logic to do that for kdump, so just genericise it.

This also means we might have reserved regions above the memory limit, if we
do the bootmem allocator will blow up, so we have to modify
lmb_enforce_memory_limit() to truncate the reserves as well.

Tested on P5 LPAR, iSeries, F50, 44p. Tested moving device tree on P5 and
44p and F50.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/lmb.c | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
index 417d5851855..8b6f522655a 100644
--- a/arch/powerpc/mm/lmb.c
+++ b/arch/powerpc/mm/lmb.c
@@ -89,20 +89,25 @@ static long __init lmb_regions_adjacent(struct lmb_region *rgn,
 	return lmb_addrs_adjacent(base1, size1, base2, size2);
 }
 
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void __init lmb_coalesce_regions(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
+static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r)
 {
 	unsigned long i;
 
-	rgn->region[r1].size += rgn->region[r2].size;
-	for (i=r2; i < rgn->cnt-1; i++) {
-		rgn->region[i].base = rgn->region[i+1].base;
-		rgn->region[i].size = rgn->region[i+1].size;
+	for (i = r; i < rgn->cnt - 1; i++) {
+		rgn->region[i].base = rgn->region[i + 1].base;
+		rgn->region[i].size = rgn->region[i + 1].size;
 	}
 	rgn->cnt--;
 }
 
+/* Assumption: base addr of region 1 < base addr of region 2 */
+static void __init lmb_coalesce_regions(struct lmb_region *rgn,
+		unsigned long r1, unsigned long r2)
+{
+	rgn->region[r1].size += rgn->region[r2].size;
+	lmb_remove_region(rgn, r2);
+}
+
 /* This routine called with relocation disabled. */
 void __init lmb_init(void)
 {
@@ -294,17 +299,16 @@ unsigned long __init lmb_end_of_DRAM(void)
 	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
 }
 
-/*
- * Truncate the lmb list to memory_limit if it's set
- * You must call lmb_analyze() after this.
- */
+/* You must call lmb_analyze() after this. */
 void __init lmb_enforce_memory_limit(unsigned long memory_limit)
 {
 	unsigned long i, limit;
+	struct lmb_property *p;
 
 	if (! memory_limit)
 		return;
 
+	/* Truncate the lmb regions to satisfy the memory limit. */
 	limit = memory_limit;
 	for (i = 0; i < lmb.memory.cnt; i++) {
 		if (limit > lmb.memory.region[i].size) {
@@ -316,4 +320,21 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit)
 		lmb.memory.cnt = i + 1;
 		break;
 	}
+
+	lmb.rmo_size = lmb.memory.region[0].size;
+
+	/* And truncate any reserves above the limit also. */
+	for (i = 0; i < lmb.reserved.cnt; i++) {
+		p = &lmb.reserved.region[i];
+
+		if (p->base > memory_limit)
+			p->size = 0;
+		else if ((p->base + p->size) > memory_limit)
+			p->size = memory_limit - p->base;
+
+		if (p->size == 0) {
+			lmb_remove_region(&lmb.reserved, i);
+			i--;
+		}
+	}
 }
-- 
cgit v1.2.3


From c5cf0e30bf3d8cb56758abb612827647c0a821cf Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@au1.ibm.com>
Date: Tue, 30 May 2006 14:14:19 +1000
Subject: [PATCH] powerpc: Fix buglet with MMU hash management

Our MMU hash management code would not set the "C" bit (changed bit) in
the hardware PTE when updating a RO PTE into a RW PTE. That would cause
the hardware to possibly to a write back to the hash table to set it on
the first store access, which in addition to being a performance issue,
might also hit a bug when running with native hash management (non-HV)
as our code is specifically optimized for the case where no write back
happens.

Thus there is a very small therocial window were a hash PTE can become
corrupted if that HPTE has just been upgraded to read write, a store
access happens on it, and that races with another processor evicting
that same slot. Since eviction (caused by an almost full hash) is
extremely rare, the bug is very unlikely to happen fortunately.

This fixes by allowing the updating of the protection bits in the native
hash handling to also set (but not clear) the "C" bit, and, in order to
also improve performances in the general case, by always setting that
bit on newly inserted hash PTE so that writeback really never happens.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/hash_low_64.S    | 3 +++
 arch/powerpc/mm/hash_native_64.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index e0d02c4a261..106fba39198 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -136,6 +136,7 @@ _GLOBAL(__hash_page_4K)
 	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
 	andc	r0,r30,r0		/* r0 = pte & ~r0 */
 	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
+	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */
 
 	/* We eventually do the icache sync here (maybe inline that
 	 * code rather than call a C function...) 
@@ -400,6 +401,7 @@ _GLOBAL(__hash_page_4K)
 	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
 	andc	r0,r30,r0		/* r0 = pte & ~r0 */
 	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
+	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */
 
 	/* We eventually do the icache sync here (maybe inline that
 	 * code rather than call a C function...)
@@ -671,6 +673,7 @@ _GLOBAL(__hash_page_64K)
 	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
 	andc	r0,r30,r0		/* r0 = pte & ~r0 */
 	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
+	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */
 
 	/* We eventually do the icache sync here (maybe inline that
 	 * code rather than call a C function...)
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 33654d1b1b4..3b8205033f1 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -238,7 +238,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 		DBG_LOW(" -> hit\n");
 		/* Update the HPTE */
 		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
-			(newpp & (HPTE_R_PP | HPTE_R_N));
+			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
 		native_unlock_hpte(hptep);
 	}
 
-- 
cgit v1.2.3


From 6218a761bbc27acc65248c80024875bcc06d52b1 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 11 Jun 2006 14:15:17 +1000
Subject: powerpc: add context.vdso_base for 32-bit too

This adds a vdso_base element to the mm_context_t for 32-bit compiles
(both for ARCH=powerpc and ARCH=ppc).  This fixes the compile errors
that have been reported in arch/powerpc/kernel/signal_32.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/mmu_context_32.c | 2 +-
 arch/powerpc/mm/ppc_mmu_32.c     | 2 +-
 arch/powerpc/mm/tlb_32.c         | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c
index a8816e0f6a8..e326e4249e1 100644
--- a/arch/powerpc/mm/mmu_context_32.c
+++ b/arch/powerpc/mm/mmu_context_32.c
@@ -30,7 +30,7 @@
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 
-mm_context_t next_mmu_context;
+unsigned long next_mmu_context;
 unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
 #ifdef FEW_CONTEXTS
 atomic_t nr_free_contexts;
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index ed7fcfe5fd3..1df731e42b5 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -190,7 +190,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 		return;
 	pmd = pmd_offset(pgd_offset(mm, ea), ea);
 	if (!pmd_none(*pmd))
-		add_hash_page(mm->context, ea, pmd_val(*pmd));
+		add_hash_page(mm->context.id, ea, pmd_val(*pmd));
 }
 
 /*
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index ad580f3742e..02eb23e036d 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -42,7 +42,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
 
 	if (Hash != 0) {
 		ptephys = __pa(ptep) & PAGE_MASK;
-		flush_hash_pages(mm->context, addr, ptephys, 1);
+		flush_hash_pages(mm->context.id, addr, ptephys, 1);
 	}
 }
 
@@ -102,7 +102,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
 	pmd_t *pmd;
 	unsigned long pmd_end;
 	int count;
-	unsigned int ctx = mm->context;
+	unsigned int ctx = mm->context.id;
 
 	if (Hash == 0) {
 		_tlbia();
@@ -172,7 +172,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
 	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
 	if (!pmd_none(*pmd))
-		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
+		flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
 	FINISH_FLUSH;
 }
 
-- 
cgit v1.2.3


From 430644312810645a6e05855db50a978df9ba3ad3 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 12 Jun 2006 18:38:21 +1000
Subject: powerpc: Remove unused paca->pgdir field

The pgdir field in the paca was a leftover from the dynamic VSIDs
patch, and is not used in the current kernel code.  This removes it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/slb.c  | 3 ---
 arch/powerpc/mm/stab.c | 4 ----
 2 files changed, 7 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index ffc8ed4de62..2cc61736fee 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -122,9 +122,6 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 
 	get_paca()->slb_cache_ptr = 0;
 	get_paca()->context = mm->context;
-#ifdef CONFIG_PPC_64K_PAGES
-	get_paca()->pgdir = mm->pgd;
-#endif /* CONFIG_PPC_64K_PAGES */
 
 	/*
 	 * preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 4a9291d9fef..691320c90b7 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -200,10 +200,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
 
 	__get_cpu_var(stab_cache_ptr) = 0;
 
-#ifdef CONFIG_PPC_64K_PAGES
-	get_paca()->pgdir = mm->pgd;
-#endif /* CONFIG_PPC_64K_PAGES */
-
 	/* Now preload some entries for the new task */
 	if (test_tsk_thread_flag(tsk, TIF_32BIT))
 		unmapped_base = TASK_UNMAPPED_BASE_USER32;
-- 
cgit v1.2.3


From bf72aeba2ffef599d1d386425c9e46b82be657cd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 15 Jun 2006 10:45:18 +1000
Subject: powerpc: Use 64k pages without needing cache-inhibited large pages

Some POWER5+ machines can do 64k hardware pages for normal memory but
not for cache-inhibited pages.  This patch lets us use 64k hardware
pages for most user processes on such machines (assuming the kernel
has been configured with CONFIG_PPC_64K_PAGES=y).  User processes
start out using 64k pages and get switched to 4k pages if they use any
non-cacheable mappings.

With this, we use 64k pages for the vmalloc region and 4k pages for
the imalloc region.  If anything creates a non-cacheable mapping in
the vmalloc region, the vmalloc region will get switched to 4k pages.
I don't know of any driver other than the DRM that would do this,
though, and these machines don't have AGP.

When a region gets switched from 64k pages to 4k pages, we do not have
to clear out all the 64k HPTEs from the hash table immediately.  We
use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page
was hashed in as a 64k page or a set of 4k pages.  If hash_page is
trying to insert a 4k page for a Linux PTE and it sees that it has
already been inserted as a 64k page, it first invalidates the 64k HPTE
before inserting the 4k HPTE.  The hash invalidation routines also use
the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a
set of 4k HPTEs to remove.  With those two changes, we can tolerate a
mix of 4k and 64k HPTEs in the hash table, and they will all get
removed when the address space is torn down.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/hash_low_64.S    | 28 ++++++++++++++
 arch/powerpc/mm/hash_utils_64.c  | 84 +++++++++++++++++++++++++++++++++++-----
 arch/powerpc/mm/mmu_context_64.c |  3 ++
 arch/powerpc/mm/slb.c            | 29 +++++++-------
 arch/powerpc/mm/slb_low.S        | 17 +++++---
 arch/powerpc/mm/tlb_64.c         |  5 ++-
 6 files changed, 135 insertions(+), 31 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 106fba39198..52e91423895 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -369,6 +369,7 @@ _GLOBAL(__hash_page_4K)
 	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
 	or	r30,r30,r31
 	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+	oris	r30,r30,_PAGE_COMBO@h
 	/* Write the linux PTE atomically (setting busy) */
 	stdcx.	r30,0,r6
 	bne-	1b
@@ -428,6 +429,14 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 	andi.	r0,r31,_PAGE_HASHPTE
 	li	r26,0			/* Default hidx */
 	beq	htab_insert_pte
+
+	/*
+	 * Check if the pte was already inserted into the hash table
+	 * as a 64k HW page, and invalidate the 64k HPTE if so.
+	 */
+	andis.	r0,r31,_PAGE_COMBO@h
+	beq	htab_inval_old_hpte
+
 	ld	r6,STK_PARM(r6)(r1)
 	ori	r26,r6,0x8000		/* Load the hidx mask */
 	ld	r26,0(r26)
@@ -498,6 +507,19 @@ _GLOBAL(htab_call_hpte_remove)
 	/* Try all again */
 	b	htab_insert_pte
 
+	/*
+	 * Call out to C code to invalidate an 64k HW HPTE that is
+	 * useless now that the segment has been switched to 4k pages.
+	 */
+htab_inval_old_hpte:
+	mr	r3,r29			/* virtual addr */
+	mr	r4,r31			/* PTE.pte */
+	li	r5,0			/* PTE.hidx */
+	li	r6,MMU_PAGE_64K		/* psize */
+	ld	r7,STK_PARM(r8)(r1)	/* local */
+	bl	.flush_hash_page
+	b	htab_insert_pte
+	
 htab_bail_ok:
 	li	r3,0
 	b	htab_bail
@@ -638,6 +660,12 @@ _GLOBAL(__hash_page_64K)
 	 * is changing this PTE anyway and might hash it.
 	 */
 	bne-	ht64_bail_ok
+BEGIN_FTR_SECTION
+	/* Check if PTE has the cache-inhibit bit set */
+	andi.	r0,r31,_PAGE_NO_CACHE
+	/* If so, bail out and refault as a 4k page */
+	bne-	ht64_bail_ok
+END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
 	/* Prepare new PTE value (turn access RW into DIRTY, then
 	 * add BUSY,HASHPTE and ACCESSED)
 	 */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b43ed92ef47..d03fd2b4445 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -92,10 +92,15 @@ unsigned long htab_size_bytes;
 unsigned long htab_hash_mask;
 int mmu_linear_psize = MMU_PAGE_4K;
 int mmu_virtual_psize = MMU_PAGE_4K;
+int mmu_vmalloc_psize = MMU_PAGE_4K;
+int mmu_io_psize = MMU_PAGE_4K;
 #ifdef CONFIG_HUGETLB_PAGE
 int mmu_huge_psize = MMU_PAGE_16M;
 unsigned int HPAGE_SHIFT;
 #endif
+#ifdef CONFIG_PPC_64K_PAGES
+int mmu_ci_restrictions;
+#endif
 
 /* There are definitions of page sizes arrays to be used when none
  * is provided by the firmware.
@@ -308,20 +313,31 @@ static void __init htab_init_page_sizes(void)
 	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
 		mmu_linear_psize = MMU_PAGE_1M;
 
+#ifdef CONFIG_PPC_64K_PAGES
 	/*
 	 * Pick a size for the ordinary pages. Default is 4K, we support
-	 * 64K if cache inhibited large pages are supported by the
-	 * processor
+	 * 64K for user mappings and vmalloc if supported by the processor.
+	 * We only use 64k for ioremap if the processor
+	 * (and firmware) support cache-inhibited large pages.
+	 * If not, we use 4k and set mmu_ci_restrictions so that
+	 * hash_page knows to switch processes that use cache-inhibited
+	 * mappings to 4k pages.
 	 */
-#ifdef CONFIG_PPC_64K_PAGES
-	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
-	    cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+	if (mmu_psize_defs[MMU_PAGE_64K].shift) {
 		mmu_virtual_psize = MMU_PAGE_64K;
+		mmu_vmalloc_psize = MMU_PAGE_64K;
+		if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+			mmu_io_psize = MMU_PAGE_64K;
+		else
+			mmu_ci_restrictions = 1;
+	}
 #endif
 
-	printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n",
+	printk(KERN_DEBUG "Page orders: linear mapping = %d, "
+	       "virtual = %d, io = %d\n",
 	       mmu_psize_defs[mmu_linear_psize].shift,
-	       mmu_psize_defs[mmu_virtual_psize].shift);
+	       mmu_psize_defs[mmu_virtual_psize].shift,
+	       mmu_psize_defs[mmu_io_psize].shift);
 
 #ifdef CONFIG_HUGETLB_PAGE
 	/* Init large page size. Currently, we pick 16M or 1M depending
@@ -556,6 +572,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	pte_t *ptep;
 	cpumask_t tmp;
 	int rc, user_region = 0, local = 0;
+	int psize;
 
 	DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
 		ea, access, trap);
@@ -575,10 +592,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 			return 1;
 		}
 		vsid = get_vsid(mm->context.id, ea);
+		psize = mm->context.user_psize;
 		break;
 	case VMALLOC_REGION_ID:
 		mm = &init_mm;
 		vsid = get_kernel_vsid(ea);
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
 		break;
 	default:
 		/* Not a valid range
@@ -629,7 +651,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 #ifndef CONFIG_PPC_64K_PAGES
 	rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
 #else
-	if (mmu_virtual_psize == MMU_PAGE_64K)
+	if (mmu_ci_restrictions) {
+		/* If this PTE is non-cacheable, switch to 4k */
+		if (psize == MMU_PAGE_64K &&
+		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+			if (user_region) {
+				psize = MMU_PAGE_4K;
+				mm->context.user_psize = MMU_PAGE_4K;
+				mm->context.sllp = SLB_VSID_USER |
+					mmu_psize_defs[MMU_PAGE_4K].sllp;
+			} else if (ea < VMALLOC_END) {
+				/*
+				 * some driver did a non-cacheable mapping
+				 * in vmalloc space, so switch vmalloc
+				 * to 4k pages
+				 */
+				printk(KERN_ALERT "Reducing vmalloc segment "
+				       "to 4kB pages because of "
+				       "non-cacheable mapping\n");
+				psize = mmu_vmalloc_psize = MMU_PAGE_4K;
+			}
+		}
+		if (user_region) {
+			if (psize != get_paca()->context.user_psize) {
+				get_paca()->context = mm->context;
+				slb_flush_and_rebolt();
+			}
+		} else if (get_paca()->vmalloc_sllp !=
+			   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+			get_paca()->vmalloc_sllp =
+				mmu_psize_defs[mmu_vmalloc_psize].sllp;
+			slb_flush_and_rebolt();
+		}
+	}
+	if (psize == MMU_PAGE_64K)
 		rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
 	else
 		rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
@@ -681,7 +736,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 #ifndef CONFIG_PPC_64K_PAGES
 	__hash_page_4K(ea, access, vsid, ptep, trap, local);
 #else
-	if (mmu_virtual_psize == MMU_PAGE_64K)
+	if (mmu_ci_restrictions) {
+		/* If this PTE is non-cacheable, switch to 4k */
+		if (mm->context.user_psize == MMU_PAGE_64K &&
+		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+			mm->context.user_psize = MMU_PAGE_4K;
+			mm->context.sllp = SLB_VSID_USER |
+				mmu_psize_defs[MMU_PAGE_4K].sllp;
+			get_paca()->context = mm->context;
+			slb_flush_and_rebolt();
+		}
+	}
+	if (mm->context.user_psize == MMU_PAGE_64K)
 		__hash_page_64K(ea, access, vsid, ptep, trap, local);
 	else
 		__hash_page_4K(ea, access, vsid, ptep, trap, local);
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c
index 714a84dd8d5..65d18dca266 100644
--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_64.c
@@ -49,6 +49,9 @@ again:
 	}
 
 	mm->context.id = index;
+	mm->context.user_psize = mmu_virtual_psize;
+	mm->context.sllp = SLB_VSID_USER |
+		mmu_psize_defs[mmu_virtual_psize].sllp;
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 2cc61736fee..6a8bf6c6000 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -60,19 +60,19 @@ static inline void create_slbe(unsigned long ea, unsigned long flags,
 		     : "memory" );
 }
 
-static void slb_flush_and_rebolt(void)
+void slb_flush_and_rebolt(void)
 {
 	/* If you change this make sure you change SLB_NUM_BOLTED
 	 * appropriately too. */
-	unsigned long linear_llp, virtual_llp, lflags, vflags;
+	unsigned long linear_llp, vmalloc_llp, lflags, vflags;
 	unsigned long ksp_esid_data;
 
 	WARN_ON(!irqs_disabled());
 
 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
-	virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
 	lflags = SLB_VSID_KERNEL | linear_llp;
-	vflags = SLB_VSID_KERNEL | virtual_llp;
+	vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
 	ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
 	if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET)
@@ -164,11 +164,10 @@ static inline void patch_slb_encoding(unsigned int *insn_addr,
 
 void slb_initialize(void)
 {
-	unsigned long linear_llp, virtual_llp;
+	unsigned long linear_llp, vmalloc_llp, io_llp;
 	static int slb_encoding_inited;
 	extern unsigned int *slb_miss_kernel_load_linear;
-	extern unsigned int *slb_miss_kernel_load_virtual;
-	extern unsigned int *slb_miss_user_load_normal;
+	extern unsigned int *slb_miss_kernel_load_io;
 #ifdef CONFIG_HUGETLB_PAGE
 	extern unsigned int *slb_miss_user_load_huge;
 	unsigned long huge_llp;
@@ -178,18 +177,19 @@ void slb_initialize(void)
 
 	/* Prepare our SLB miss handler based on our page size */
 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
-	virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
+	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
+	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
+
 	if (!slb_encoding_inited) {
 		slb_encoding_inited = 1;
 		patch_slb_encoding(slb_miss_kernel_load_linear,
 				   SLB_VSID_KERNEL | linear_llp);
-		patch_slb_encoding(slb_miss_kernel_load_virtual,
-				   SLB_VSID_KERNEL | virtual_llp);
-		patch_slb_encoding(slb_miss_user_load_normal,
-				   SLB_VSID_USER | virtual_llp);
+		patch_slb_encoding(slb_miss_kernel_load_io,
+				   SLB_VSID_KERNEL | io_llp);
 
 		DBG("SLB: linear  LLP = %04x\n", linear_llp);
-		DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+		DBG("SLB: io      LLP = %04x\n", io_llp);
 #ifdef CONFIG_HUGETLB_PAGE
 		patch_slb_encoding(slb_miss_user_load_huge,
 				   SLB_VSID_USER | huge_llp);
@@ -204,7 +204,7 @@ void slb_initialize(void)
 	unsigned long lflags, vflags;
 
 	lflags = SLB_VSID_KERNEL | linear_llp;
-	vflags = SLB_VSID_KERNEL | virtual_llp;
+	vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
 	/* Invalidate the entire SLB (even slot 0) & all the ERATS */
 	asm volatile("isync":::"memory");
@@ -212,7 +212,6 @@ void slb_initialize(void)
 	asm volatile("isync; slbia; isync":::"memory");
 	create_slbe(PAGE_OFFSET, lflags, 0);
 
-	/* VMALLOC space has 4K pages always for now */
 	create_slbe(VMALLOC_START, vflags, 1);
 
 	/* We don't bolt the stack for the time being - we're in boot,
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index abfaabf667b..8548dcf8ef8 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -59,10 +59,19 @@ _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
 	b	slb_finish_load
 
-1:	/* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+1:	/* vmalloc/ioremap mapping encoding bits, the "li" instructions below
 	 * will be patched by the kernel at boot
 	 */
-_GLOBAL(slb_miss_kernel_load_virtual)
+BEGIN_FTR_SECTION
+	/* check whether this is in vmalloc or ioremap space */
+	clrldi	r11,r10,48
+	cmpldi	r11,(VMALLOC_SIZE >> 28) - 1
+	bgt	5f
+	lhz	r11,PACAVMALLOCSLLP(r13)
+	b	slb_finish_load
+5:
+END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+_GLOBAL(slb_miss_kernel_load_io)
 	li	r11,0
 	b	slb_finish_load
 
@@ -96,9 +105,7 @@ _GLOBAL(slb_miss_user_load_huge)
 1:
 #endif /* CONFIG_HUGETLB_PAGE */
 
-_GLOBAL(slb_miss_user_load_normal)
-	li	r11,0
-
+	lhz	r11,PACACONTEXTSLLP(r13)
 2:
 	ld	r9,PACACONTEXTID(r13)
 	rldimi	r10,r9,USER_ESID_BITS,0
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index f734b11566c..e7449b068c8 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -131,7 +131,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 {
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 	unsigned long vsid;
-	unsigned int psize = mmu_virtual_psize;
+	unsigned int psize;
 	int i;
 
 	i = batch->index;
@@ -148,7 +148,8 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 #else
 		BUG();
 #endif
-	}
+	} else
+		psize = pte_pagesize_index(pte);
 
 	/*
 	 * This can happen when we are in the middle of a TLB batch and
-- 
cgit v1.2.3


From 227318bbde6c8309b1d20ab46532ec2b737e1fee Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sat, 10 Jun 2006 20:32:01 +1000
Subject: [POWERPC] Remove stale 64bit on 32bit kernel code

Remove some stale POWER3/POWER4/970 on 32bit kernel support.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/hash_low_32.S | 34 ----------------------------------
 arch/powerpc/mm/ppc_mmu_32.c  | 10 ----------
 2 files changed, 44 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index ea469eefa14..94255beeecd 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -74,12 +74,6 @@ _GLOBAL(hash_page_sync)
  */
 	.text
 _GLOBAL(hash_page)
-#ifdef CONFIG_PPC64BRIDGE
-	mfmsr	r0
-	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
-	MTMSRD(r0)
-	isync
-#endif
 	tophys(r7,0)			/* gets -KERNELBASE into r7 */
 #ifdef CONFIG_SMP
 	addis	r8,r7,mmu_hash_lock@h
@@ -285,7 +279,6 @@ Hash_base = 0xc0180000
 Hash_bits = 12				/* e.g. 256kB hash table */
 Hash_msk = (((1 << Hash_bits) - 1) * 64)
 
-#ifndef CONFIG_PPC64BRIDGE
 /* defines for the PTE format for 32-bit PPCs */
 #define PTE_SIZE	8
 #define PTEG_SIZE	64
@@ -299,21 +292,6 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
 #define SET_V(r)	oris r,r,PTE_V@h
 #define CLR_V(r,t)	rlwinm r,r,0,1,31
 
-#else
-/* defines for the PTE format for 64-bit PPCs */
-#define PTE_SIZE	16
-#define PTEG_SIZE	128
-#define LG_PTEG_SIZE	7
-#define LDPTEu		ldu
-#define STPTE		std
-#define CMPPTE		cmpd
-#define PTE_H		2
-#define PTE_V		1
-#define TST_V(r)	andi. r,r,PTE_V
-#define SET_V(r)	ori r,r,PTE_V
-#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
-#endif /* CONFIG_PPC64BRIDGE */
-
 #define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
 #define HASH_RIGHT	31-LG_PTEG_SIZE
 
@@ -331,14 +309,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
 
 	/* Construct the high word of the PPC-style PTE (r5) */
-#ifndef CONFIG_PPC64BRIDGE
 	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
 	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
-#else /* CONFIG_PPC64BRIDGE */
-	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
-	sldi	r5,r3,12		/* shift vsid into position */
-	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
-#endif /* CONFIG_PPC64BRIDGE */
 	SET_V(r5)			/* set V (valid) bit */
 
 	/* Get the address of the primary PTE group in the hash table (r3) */
@@ -516,14 +488,8 @@ _GLOBAL(flush_hash_pages)
 	add	r3,r3,r0		/* note code below trims to 24 bits */
 
 	/* Construct the high word of the PPC-style PTE (r11) */
-#ifndef CONFIG_PPC64BRIDGE
 	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
 	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
-#else /* CONFIG_PPC64BRIDGE */
-	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
-	sldi	r11,r3,12		/* shift vsid into position */
-	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
-#endif /* CONFIG_PPC64BRIDGE */
 	SET_V(r11)			/* set V (valid) bit */
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 1df731e42b5..ab5cd724b12 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -42,11 +42,7 @@ unsigned long _SDR1;
 
 union ubat {			/* BAT register values to be loaded */
 	BAT	bat;
-#ifdef CONFIG_PPC64BRIDGE
-	u64	word[2];
-#else
 	u32	word[2];
-#endif
 } BATS[4][2];			/* 4 pairs of IBAT, DBAT */
 
 struct batrange {		/* stores address ranges mapped by BATs */
@@ -220,15 +216,9 @@ void __init MMU_init_hw(void)
 
 	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
 
-#ifdef CONFIG_PPC64BRIDGE
-#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
-#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
-#define MIN_N_HPTEG	2048		/* min 256kB hash table */
-#else
 #define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
 #define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
 #define MIN_N_HPTEG	1024		/* min 64kB hash table */
-#endif
 
 	/*
 	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
-- 
cgit v1.2.3


From ee0339f205d60375c5ce1653c0dc318c6ec72668 Mon Sep 17 00:00:00 2001
From: Jon Loeliger <jdl@jdl.com>
Date: Sat, 17 Jun 2006 17:52:44 -0500
Subject: [POWERPC] Add starting of secondary 86xx CPUs.

Clear the high BATS during load_up_mmu if FTR_HAS_HIGH_BATS.
Allow just a bit more time for secondary CPUs to phone home.

Signed-off-by: Wei Zhang <Wei.Zhang@freescale.com>
Signed-off-by: Haiying Wang <Haiying.Wang@freescale.com>
Signed-off-by: Jon Loeliger <jdl@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/ppc_mmu_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index ab5cd724b12..2ed43a493b3 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -43,13 +43,13 @@ unsigned long _SDR1;
 union ubat {			/* BAT register values to be loaded */
 	BAT	bat;
 	u32	word[2];
-} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
+} BATS[8][2];			/* 8 pairs of IBAT, DBAT */
 
 struct batrange {		/* stores address ranges mapped by BATs */
 	unsigned long start;
 	unsigned long limit;
 	unsigned long phys;
-} bat_addrs[4];
+} bat_addrs[8];
 
 /*
  * Return PA for this VA if it is mapped by a BAT, or 0
-- 
cgit v1.2.3