aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig24
-rw-r--r--arch/powerpc/kernel/asm-offsets.c16
-rw-r--r--arch/powerpc/kernel/lparmap.c3
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/hash_low_64.S5
-rw-r--r--arch/powerpc/mm/hash_utils_64.c142
-rw-r--r--arch/powerpc/mm/hugetlbpage.c548
-rw-r--r--arch/powerpc/mm/init_64.c17
-rw-r--r--arch/powerpc/mm/mem.c25
-rw-r--r--arch/powerpc/mm/mmu_context_64.c10
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c2
-rw-r--r--arch/powerpc/mm/slb.c11
-rw-r--r--arch/powerpc/mm/slb_low.S52
-rw-r--r--arch/powerpc/mm/slice.c633
-rw-r--r--arch/powerpc/mm/tlb_32.c4
-rw-r--r--arch/powerpc/mm/tlb_64.c12
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c2
-rw-r--r--arch/powerpc/platforms/cell/Kconfig15
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile2
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c80
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c181
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c28
-rw-r--r--arch/powerpc/platforms/iseries/Kconfig4
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c87
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c14
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c2
-rw-r--r--drivers/char/Kconfig3
-rw-r--r--include/asm-powerpc/mmu-hash64.h11
-rw-r--r--include/asm-powerpc/paca.h2
-rw-r--r--include/asm-powerpc/page_64.h86
-rw-r--r--include/asm-powerpc/pgalloc-64.h31
-rw-r--r--include/asm-powerpc/pgtable-4k.h6
-rw-r--r--include/asm-powerpc/pgtable-64k.h7
-rw-r--r--include/asm-powerpc/spu_csa.h10
-rw-r--r--include/linux/suspend.h11
-rw-r--r--kernel/power/snapshot.c12
38 files changed, 1323 insertions, 789 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 808d2ef80e2..ccc5410af99 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -120,19 +120,6 @@ config GENERIC_BUG
config SYS_SUPPORTS_APM_EMULATION
bool
-#
-# Powerpc uses the slab allocator to manage its ptes and the
-# page structs of ptes are used for splitting the page table
-# lock for configurations supporting more than SPLIT_PTLOCK_CPUS.
-#
-# In that special configuration the page structs of slabs are modified.
-# This setting disables the selection of SLUB as a slab allocator.
-#
-config ARCH_USES_SLAB_PAGE_STRUCT
- bool
- default y
- depends on SPLIT_PTLOCK_CPUS <= NR_CPUS
-
config DEFAULT_UIMAGE
bool
help
@@ -352,6 +339,11 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32
+config PPC_MM_SLICES
+ bool
+ default y if HUGETLB_PAGE
+ default n
+
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on PPC64
@@ -541,9 +533,15 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_HAS_HASH_64K
+ bool
+ depends on PPC64
+ default n
+
config PPC_64K_PAGES
bool "64k page size"
depends on PPC64
+ select PPC_HAS_HASH_64K
help
This option changes the kernel logical page size to 64k. On machines
without processor support for 64k pages, the kernel will simulate
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 37bc35e69db..2cb1d948779 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -122,12 +122,18 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
- DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
-#ifdef CONFIG_HUGETLB_PAGE
- DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
- DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
-#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_PPC_MM_SLICES
+ DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
+ context.low_slices_psize));
+ DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
+ context.high_slices_psize));
+ DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+ DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
+#else
+ DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+
+#endif /* CONFIG_PPC_MM_SLICES */
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index 584d1e3c013..af11285ffbd 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -10,7 +10,8 @@
#include <asm/pgtable.h>
#include <asm/iseries/lpar_map.h>
-const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
+/* The # is to stop gcc trying to make .text nonexecutable */
+const struct LparMap __attribute__((__section__(".text #"))) xLparMap = {
.xNumberEsids = HvEsidsToMap,
.xNumberRanges = HvRangesToMap,
.xSegmentTableOffs = STAB0_PAGE,
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 38a81967ca0..4f839c6a976 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index e64ce3eec36..4762ff7c14d 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -615,6 +615,9 @@ htab_pte_insert_failure:
li r3,-1
b htab_bail
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K
/*****************************************************************************
* *
@@ -870,7 +873,7 @@ ht64_pte_insert_failure:
b ht64_bail
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_HAS_HASH_64K */
/*****************************************************************************
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 9b226fa7006..028ba4ed03d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,6 +51,7 @@
#include <asm/cputable.h>
#include <asm/abs_addr.h>
#include <asm/sections.h>
+#include <asm/spu.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -419,7 +420,7 @@ static void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
-#ifdef CONFIG_PPC_64K_PAGES
+#ifdef CONFIG_PPC_HAS_HASH_64K
extern unsigned int *ht64_call_hpte_insert1;
extern unsigned int *ht64_call_hpte_insert2;
extern unsigned int *ht64_call_hpte_remove;
@@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
* Demote a segment to using 4k pages.
* For now this makes the whole process use 4k pages.
*/
-void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
-{
#ifdef CONFIG_PPC_64K_PAGES
+static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
if (mm->context.user_psize == MMU_PAGE_4K)
return;
+#ifdef CONFIG_PPC_MM_SLICES
+ slice_set_user_psize(mm, MMU_PAGE_4K);
+#else /* CONFIG_PPC_MM_SLICES */
mm->context.user_psize = MMU_PAGE_4K;
mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
+#endif /* CONFIG_PPC_MM_SLICES */
+
#ifdef CONFIG_SPE_BASE
spu_flush_all_slbs(mm);
#endif
-#endif
}
-
-EXPORT_SYMBOL_GPL(demote_segment_4k);
+#endif /* CONFIG_PPC_64K_PAGES */
/* Result code is:
* 0 - handled
@@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
return 1;
}
vsid = get_vsid(mm->context.id, ea);
+#ifdef CONFIG_PPC_MM_SLICES
+ psize = get_slice_psize(mm, ea);
+#else
psize = mm->context.user_psize;
+#endif
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
@@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
+#ifdef CONFIG_HUGETLB_PAGE
/* Handle hugepage regions */
- if (unlikely(in_hugepage_area(mm->context, ea))) {
+ if (HPAGE_SHIFT && psize == mmu_huge_psize) {
DBG_LOW(" -> huge page !\n");
return hash_huge_page(mm, access, ea, vsid, local, trap);
}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ /* If we use 4K pages and our psize is not 4K, then we are hitting
+ * a special driver mapping, we need to align the address before
+ * we fetch the PTE
+ */
+ if (psize != MMU_PAGE_4K)
+ ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
ptep = find_linux_pte(pgdir, ea);
@@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
}
/* Do actual hashing */
-#ifndef CONFIG_PPC_64K_PAGES
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
+#ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
if (pte_val(*ptep) & _PAGE_4K_PFN) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
- if (mmu_ci_restrictions) {
- /* If this PTE is non-cacheable, switch to 4k */
- if (psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE)) {
- if (user_region) {
- demote_segment_4k(mm, ea);
- psize = MMU_PAGE_4K;
- } else if (ea < VMALLOC_END) {
- /*
- * some driver did a non-cacheable mapping
- * in vmalloc space, so switch vmalloc
- * to 4k pages
- */
- printk(KERN_ALERT "Reducing vmalloc segment "
- "to 4kB pages because of "
- "non-cacheable mapping\n");
- psize = mmu_vmalloc_psize = MMU_PAGE_4K;
- }
+ /* If this PTE is non-cacheable and we have restrictions on
+ * using non cacheable large pages, then we switch to 4k
+ */
+ if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
+ (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+ if (user_region) {
+ demote_segment_4k(mm, ea);
+ psize = MMU_PAGE_4K;
+ } else if (ea < VMALLOC_END) {
+ /*
+ * some driver did a non-cacheable mapping
+ * in vmalloc space, so switch vmalloc
+ * to 4k pages
+ */
+ printk(KERN_ALERT "Reducing vmalloc segment "
+ "to 4kB pages because of "
+ "non-cacheable mapping\n");
+ psize = mmu_vmalloc_psize = MMU_PAGE_4K;
#ifdef CONFIG_SPE_BASE
spu_flush_all_slbs(mm);
#endif
}
- if (user_region) {
- if (psize != get_paca()->context.user_psize) {
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
- }
- } else if (get_paca()->vmalloc_sllp !=
- mmu_psize_defs[mmu_vmalloc_psize].sllp) {
- get_paca()->vmalloc_sllp =
- mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ }
+ if (user_region) {
+ if (psize != get_paca()->context.user_psize) {
+ get_paca()->context.user_psize =
+ mm->context.user_psize;
slb_flush_and_rebolt();
}
+ } else if (get_paca()->vmalloc_sllp !=
+ mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+ get_paca()->vmalloc_sllp =
+ mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ slb_flush_and_rebolt();
}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K
if (psize == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
+#endif /* CONFIG_PPC_HAS_HASH_64K */
rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#endif /* CONFIG_PPC_64K_PAGES */
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
@@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long flags;
int local = 0;
- /* We don't want huge pages prefaulted for now
- */
- if (unlikely(in_hugepage_area(mm->context, ea)))
+ BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+
+#ifdef CONFIG_PPC_MM_SLICES
+ /* We only prefault standard pages for now */
+ if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize));
return;
+#endif
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
" trap=%lx\n", mm, mm->pgd, ea, access, trap);
- /* Get PTE, VSID, access mask */
+ /* Get Linux PTE if available */
pgdir = mm->pgd;
if (pgdir == NULL)
return;
ptep = find_linux_pte(pgdir, ea);
if (!ptep)
return;
+
+#ifdef CONFIG_PPC_64K_PAGES
+ /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
+ * a 64K kernel), then we don't preload, hash_page() will take
+ * care of it once we actually try to access the page.
+ * That way we don't have to duplicate all of the logic for segment
+ * page size demotion here
+ */
+ if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+ return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get VSID */
vsid = get_vsid(mm->context.id, ea);
- /* Hash it in */
+ /* Hash doesn't like irqs */
local_irq_save(flags);
+
+ /* Is that local to this CPU ? */
mask = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(mm->cpu_vm_mask, mask))
local = 1;
-#ifndef CONFIG_PPC_64K_PAGES
- __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
- if (mmu_ci_restrictions) {
- /* If this PTE is non-cacheable, switch to 4k */
- if (mm->context.user_psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE))
- demote_segment_4k(mm, ea);
- }
+
+ /* Hash it in */
+#ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K)
__hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- __hash_page_4K(ea, access, vsid, ptep, trap, local);
#endif /* CONFIG_PPC_64K_PAGES */
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb959264c10..92a1b16fb7e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pgd_t *pg;
pud_t *pu;
- BUG_ON(! in_hugepage_area(mm->context, addr));
+ BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
addr &= HPAGE_MASK;
@@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pud_t *pu;
hugepd_t *hpdp = NULL;
- BUG_ON(! in_hugepage_area(mm->context, addr));
+ BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
addr &= HPAGE_MASK;
@@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
start = addr;
pgd = pgd_offset((*tlb)->mm, addr);
do {
- BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
+ BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
@@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return __pte(old);
}
-struct slb_flush_info {
- struct mm_struct *mm;
- u16 newareas;
-};
-
-static void flush_low_segments(void *parm)
-{
- struct slb_flush_info *fi = parm;
- unsigned long i;
-
- BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
-
- if (current->active_mm != fi->mm)
- return;
-
- /* Only need to do anything if this CPU is working in the same
- * mm as the one which has changed */
-
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
-
- asm volatile("isync" : : : "memory");
- for (i = 0; i < NUM_LOW_AREAS; i++) {
- if (! (fi->newareas & (1U << i)))
- continue;
- asm volatile("slbie %0"
- : : "r" ((i << SID_SHIFT) | SLBIE_C));
- }
- asm volatile("isync" : : : "memory");
-}
-
-static void flush_high_segments(void *parm)
-{
- struct slb_flush_info *fi = parm;
- unsigned long i, j;
-
-
- BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
-
- if (current->active_mm != fi->mm)
- return;
-
- /* Only need to do anything if this CPU is working in the same
- * mm as the one which has changed */
-
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
-
- asm volatile("isync" : : : "memory");
- for (i = 0; i < NUM_HIGH_AREAS; i++) {
- if (! (fi->newareas & (1U << i)))
- continue;
- for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
- asm volatile("slbie %0"
- :: "r" (((i << HTLB_AREA_SHIFT)
- + (j << SID_SHIFT)) | SLBIE_C));
- }
- asm volatile("isync" : : : "memory");
-}
-
-static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
- unsigned long start = area << SID_SHIFT;
- unsigned long end = (area+1) << SID_SHIFT;
- struct vm_area_struct *vma;
-
- BUG_ON(area >= NUM_LOW_AREAS);
-
- /* Check no VMAs are in the region */
- vma = find_vma(mm, start);
- if (vma && (vma->vm_start < end))
- return -EBUSY;
-
- return 0;
-}
-
-static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
- unsigned long start = area << HTLB_AREA_SHIFT;
- unsigned long end = (area+1) << HTLB_AREA_SHIFT;
- struct vm_area_struct *vma;
-
- BUG_ON(area >= NUM_HIGH_AREAS);
-
- /* Hack, so that each addresses is controlled by exactly one
- * of the high or low area bitmaps, the first high area starts
- * at 4GB, not 0 */
- if (start == 0)
- start = 0x100000000UL;
-
- /* Check no VMAs are in the region */
- vma = find_vma(mm, start);
- if (vma && (vma->vm_start < end))
- return -EBUSY;
-
- return 0;
-}
-
-static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
- unsigned long i;
- struct slb_flush_info fi;
-
- BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
- BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
-
- newareas &= ~(mm->context.low_htlb_areas);
- if (! newareas)
- return 0; /* The segments we want are already open */
-
- for (i = 0; i < NUM_LOW_AREAS; i++)
- if ((1 << i) & newareas)
- if (prepare_low_area_for_htlb(mm, i) != 0)
- return -EBUSY;
-
- mm->context.low_htlb_areas |= newareas;
-
- /* the context change must make it to memory before the flush,
- * so that further SLB misses do the right thing. */
- mb();
-
- fi.mm = mm;
- fi.newareas = newareas;
- on_each_cpu(flush_low_segments, &fi, 0, 1);
-
- return 0;
-}
-
-static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
- struct slb_flush_info fi;
- unsigned long i;
-
- BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
- BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
- != NUM_HIGH_AREAS);
-
- newareas &= ~(mm->context.high_htlb_areas);
- if (! newareas)
- return 0; /* The areas we want are already open */
-
- for (i = 0; i < NUM_HIGH_AREAS; i++)
- if ((1 << i) & newareas)
- if (prepare_high_area_for_htlb(mm, i) != 0)
- return -EBUSY;
-
- mm->context.high_htlb_areas |= newareas;
-
- /* the context change must make it to memory before the flush,
- * so that further SLB misses do the right thing. */
- mb();
-
- fi.mm = mm;
- fi.newareas = newareas;
- on_each_cpu(flush_high_segments, &fi, 0, 1);
-
- return 0;
-}
-
-int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
-{
- int err = 0;
-
- if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
- return -EINVAL;
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
-
- if (addr < 0x100000000UL)
- err = open_low_hpage_areas(current->mm,
- LOW_ESID_MASK(addr, len));
- if ((addr + len) > 0x100000000UL)
- err = open_high_hpage_areas(current->mm,
- HTLB_AREA_MASK(addr, len));
-#ifdef CONFIG_SPE_BASE
- spu_flush_all_slbs(current->mm);
-#endif
- if (err) {
- printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
- " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
- addr, len,
- LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
- return err;
- }
-
- return 0;
-}
-
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
pte_t *ptep;
struct page *page;
- if (! in_hugepage_area(mm->context, address))
+ if (get_slice_psize(mm, address) != mmu_huge_psize)
return ERR_PTR(-EINVAL);
ptep = huge_pte_offset(mm, address);
@@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
return NULL;
}
-/* Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions. */
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
-
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* handle fixed mapping: prevent overlap with huge pages */
- if (flags & MAP_FIXED) {
- if (is_hugepage_only_range(mm, addr, len))
- return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (((TASK_SIZE - len) >= addr)
- && (!vma || (addr+len) <= vma->vm_start)
- && !is_hugepage_only_range(mm, addr,len))
- return addr;
- }
- if (len > mm->cached_hole_size) {
- start_addr = addr = mm->free_area_cache;
- } else {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
-full_search:
- vma = find_vma(mm, addr);
- while (TASK_SIZE - len >= addr) {
- BUG_ON(vma && (addr >= vma->vm_end));
-
- if (touches_hugepage_low_range(mm, addr, len)) {
- addr = ALIGN(addr+1, 1<<SID_SHIFT);
- vma = find_vma(mm, addr);
- continue;
- }
- if (touches_hugepage_high_range(mm, addr, len)) {
- addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
- vma = find_vma(mm, addr);
- continue;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- vma = vma->vm_next;
- }
-
- /* Make sure we didn't miss any holes */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
-}
-
-/*
- * This mmap-allocator allocates new areas top-down from below the
- * stack's low limit (the base):
- *
- * Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions.
- */
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- struct vm_area_struct *vma, *prev_vma;
- struct mm_struct *mm = current->mm;
- unsigned long base = mm->mmap_base, addr = addr0;
- unsigned long largest_hole = mm->cached_hole_size;
- int first_time = 1;
-
- /* requested length too big for entire address space */
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* handle fixed mapping: prevent overlap with huge pages */
- if (flags & MAP_FIXED) {
- if (is_hugepage_only_range(mm, addr, len))
- return -EINVAL;
- return addr;
- }
-
- /* dont allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
- /* requesting a specific address */
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start)
- && !is_hugepage_only_range(mm, addr,len))
- return addr;
- }
-
- if (len <= largest_hole) {
- largest_hole = 0;
- mm->free_area_cache = base;
- }
-try_again:
- /* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
-
- /* either no address requested or cant fit in requested address hole */
- addr = (mm->free_area_cache - len) & PAGE_MASK;
- do {
-hugepage_recheck:
- if (touches_hugepage_low_range(mm, addr, len)) {
- addr = (addr & ((~0) << SID_SHIFT)) - len;
- goto hugepage_recheck;
- } else if (touches_hugepage_high_range(mm, addr, len)) {
- addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
- goto hugepage_recheck;
- }
-
- /*
- * Lookup failure means no vma is above this address,
- * i.e. return with success:
- */
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
- return addr;
-
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr+len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end))) {
- /* remember the address as a hint for next time */
- mm->cached_hole_size = largest_hole;
- return (mm->free_area_cache = addr);
- } else {
- /* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end) {
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
- }
-
- /* remember the largest hole we saw so far */
- if (addr + largest_hole < vma->vm_start)
- largest_hole = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start-len;
- } while (len <= vma->vm_start);
-
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (first_time) {
- mm->free_area_cache = base;
- largest_hole = 0;
- first_time = 0;
- goto try_again;
- }
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
- addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = base;
- mm->cached_hole_size = ~0UL;
-
- return addr;
-}
-
-static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
-{
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || ((addr + len) <= vma->vm_start)))
- return 0;
-
- return -ENOMEM;
-}
-
-static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
-{
- unsigned long addr = 0;
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- while (addr + len <= 0x100000000UL) {
- BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
- if (! __within_hugepage_low_range(addr, len, segmask)) {
- addr = ALIGN(addr+1, 1<<SID_SHIFT);
- vma = find_vma(current->mm, addr);
- continue;
- }
-
- if (!vma || (addr + len) <= vma->vm_start)
- return addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
- /* Depending on segmask this might not be a confirmed
- * hugepage region, so the ALIGN could have skipped
- * some VMAs */
- vma = find_vma(current->mm, addr);
- }
-
- return -ENOMEM;
-}
-
-static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
-{
- unsigned long addr = 0x100000000UL;
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- while (addr + len <= TASK_SIZE_USER64) {
- BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
- if (! __within_hugepage_high_range(addr, len, areamask)) {
- addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
- vma = find_vma(current->mm, addr);
- continue;
- }
-
- if (!vma || (addr + len) <= vma->vm_start)
- return addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
- /* Depending on segmask this might not be a confirmed
- * hugepage region, so the ALIGN could have skipped
- * some VMAs */
- vma = find_vma(current->mm, addr);
- }
-
- return -ENOMEM;
-}
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- int lastshift;
- u16 areamask, curareas;
-
- if (HPAGE_SHIFT == 0)
- return -EINVAL;
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- if (!cpu_has_feature(CPU_FTR_16M_PAGE))
- return -EINVAL;
-
- /* Paranoia, caller should have dealt with this */
- BUG_ON((addr + len) < addr);
-
- /* Handle MAP_FIXED */
- if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(addr, len, pgoff))
- return -EINVAL;
- return addr;
- }
-
- if (test_thread_flag(TIF_32BIT)) {
- curareas = current->mm->context.low_htlb_areas;
-
- /* First see if we can use the hint address */
- if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
- areamask = LOW_ESID_MASK(addr, len);
- if (open_low_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
-
- /* Next see if we can map in the existing low areas */
- addr = htlb_get_low_area(len, curareas);
- if (addr != -ENOMEM)
- return addr;
-
- /* Finally go looking for areas to open */
- lastshift = 0;
- for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
- ! lastshift; areamask >>=1) {
- if (areamask & 1)
- lastshift = 1;
-
- addr = htlb_get_low_area(len, curareas | areamask);
- if ((addr != -ENOMEM)
- && open_low_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
- } else {
- curareas = current->mm->context.high_htlb_areas;
-
- /* First see if we can use the hint address */
- /* We discourage 64-bit processes from doing hugepage
- * mappings below 4GB (must use MAP_FIXED) */
- if ((addr >= 0x100000000UL)
- && (htlb_check_hinted_area(addr, len) == 0)) {
- areamask = HTLB_AREA_MASK(addr, len);
- if (open_high_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
-
- /* Next see if we can map in the existing high areas */
- addr = htlb_get_high_area(len, curareas);
- if (addr != -ENOMEM)
- return addr;
-
- /* Finally go looking for areas to open */
- lastshift = 0;
- for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
- ! lastshift; areamask >>=1) {
- if (areamask & 1)
- lastshift = 1;
-
- addr = htlb_get_high_area(len, curareas | areamask);
- if ((addr != -ENOMEM)
- && open_high_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
- }
- printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
- " enough areas\n");
- return -ENOMEM;
+ return slice_get_unmapped_area(addr, len, flags,
+ mmu_huge_psize, 1, 0);
}
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1fe852181..7312a265545 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
-#ifdef CONFIG_PPC_64K_PAGES
-static const unsigned int pgtable_cache_size[3] = {
- PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
- "pte_pmd_cache", "pmd_cache", "pgd_cache",
-};
-#else
static const unsigned int pgtable_cache_size[2] = {
- PTE_TABLE_SIZE, PMD_TABLE_SIZE
+ PGD_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
- "pgd_pte_cache", "pud_pmd_cache",
-};
+#ifdef CONFIG_PPC_64K_PAGES
+ "pgd_cache", "pmd_cache",
+#else
+ "pgd_cache", "pud_pmd_cache",
#endif /* CONFIG_PPC_64K_PAGES */
+};
#ifdef CONFIG_HUGETLB_PAGE
/* Hugepages need one extra cache, initialized in hugetlbpage.c. We
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a6e08f3298..246eeea40ec 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
+#include <linux/suspend.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -276,6 +277,28 @@ void __init do_init_bootmem(void)
init_bootmem_done = 1;
}
+/* mark pages that don't exist as nosave */
+static int __init mark_nonram_nosave(void)
+{
+ unsigned long lmb_next_region_start_pfn,
+ lmb_region_max_pfn;
+ int i;
+
+ for (i = 0; i < lmb.memory.cnt - 1; i++) {
+ lmb_region_max_pfn =
+ (lmb.memory.region[i].base >> PAGE_SHIFT) +
+ (lmb.memory.region[i].size >> PAGE_SHIFT);
+ lmb_next_region_start_pfn =
+ lmb.memory.region[i+1].base >> PAGE_SHIFT;
+
+ if (lmb_region_max_pfn < lmb_next_region_start_pfn)
+ register_nosave_region(lmb_region_max_pfn,
+ lmb_next_region_start_pfn);
+ }
+
+ return 0;
+}
+
/*
* paging_init() sets up the page tables - in fact we've already done this.
*/
@@ -307,6 +330,8 @@ void __init paging_init(void)
max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
#endif
free_area_init_nodes(max_zone_pfns);
+
+ mark_nonram_nosave();
}
#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c
index 90a06ac02d5..7a78cdc0515 100644
--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_64.c
@@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int index;
int err;
+ int new_context = (mm->context.id == 0);
again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
@@ -50,9 +51,18 @@ again:
}
mm->context.id = index;
+#ifdef CONFIG_PPC_MM_SLICES
+ /* The old code would re-promote on fork, we don't do that
+ * when using slices as it could cause problem promoting slices
+ * that have been forced down to 4K
+ */
+ if (new_context)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+#else
mm->context.user_psize = mmu_virtual_psize;
mm->context.sllp = SLB_VSID_USER |
mmu_psize_defs[mmu_virtual_psize].sllp;
+#endif
return 0;
}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 05066674a7a..ec1421a20aa 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
if (Hash == 0)
return;
- pmd = pmd_offset(pgd_offset(mm, ea), ea);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
if (!pmd_none(*pmd))
add_hash_page(mm->context.id, ea, pmd_val(*pmd));
}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 224e960650a..304375a7357 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -198,12 +198,6 @@ void slb_initialize(void)
static int slb_encoding_inited;
extern unsigned int *slb_miss_kernel_load_linear;
extern unsigned int *slb_miss_kernel_load_io;
-#ifdef CONFIG_HUGETLB_PAGE
- extern unsigned int *slb_miss_user_load_huge;
- unsigned long huge_llp;
-
- huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
-#endif
/* Prepare our SLB miss handler based on our page size */
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -220,11 +214,6 @@ void slb_initialize(void)
DBG("SLB: linear LLP = %04x\n", linear_llp);
DBG("SLB: io LLP = %04x\n", io_llp);
-#ifdef CONFIG_HUGETLB_PAGE
- patch_slb_encoding(slb_miss_user_load_huge,
- SLB_VSID_USER | huge_llp);
- DBG("SLB: huge LLP = %04x\n", huge_llp);
-#endif
}
get_paca()->stab_rr = SLB_NUM_BOLTED;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b10e4707d7c..cd1a93d4948 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io)
srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
- /* Figure out if the segment contains huge pages */
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- b 1f
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+
+ /* when using slices, we extract the psize off the slice bitmaps
+ * and then we need to get the sllp encoding off the mmu_psize_defs
+ * array.
+ *
+ * XXX This is a bit inefficient especially for the normal case,
+ * so we should try to implement a fast path for the standard page
+ * size using the old sllp value so we avoid the array. We cannot
+ * really do dynamic patching unfortunately as processes might flip
+ * between 4k and 64k standard page size
+ */
+#ifdef CONFIG_PPC_MM_SLICES
cmpldi r10,16
- lhz r9,PACALOWHTLBAREAS(r13)
- mr r11,r10
+ /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
+ ld r9,PACALOWSLICESPSIZE(r13)
+ sldi r11,r10,2
blt 5f
+ ld r9,PACAHIGHSLICEPSIZE(r13)
+ srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
+ andi. r11,r11,0x3c
- lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
-
-5: srd r9,r9,r11
- andi. r9,r9,1
- beq 1f
-_GLOBAL(slb_miss_user_load_huge)
- li r11,0
- b 2f
-1:
-#endif /* CONFIG_HUGETLB_PAGE */
+5: /* Extract the psize and multiply to get an array offset */
+ srd r9,r9,r11
+ andi. r9,r9,0xf
+ mulli r9,r9,MMUPSIZEDEFSIZE
+ /* Now get to the array and obtain the sllp
+ */
+ ld r11,PACATOC(r13)
+ ld r11,mmu_psize_defs@got(r11)
+ add r11,r11,r9
+ ld r11,MMUPSIZESLLP(r11)
+ ori r11,r11,SLB_VSID_USER
+#else
+ /* paca context sllp already contains the SLB_VSID_USER bits */
lhz r11,PACACONTEXTSLLP(r13)
-2:
+#endif /* CONFIG_PPC_MM_SLICES */
+
ld r9,PACACONTEXTID(r13)
rldimi r10,r9,USER_ESID_BITS,0
b slb_finish_load
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
new file mode 100644
index 00000000000..f833dba2a02
--- /dev/null
+++ b/arch/powerpc/mm/slice.c
@@ -0,0 +1,633 @@
+/*
+ * address space "slices" (meta-segments) support
+ *
+ * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * Based on hugetlb implementation
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/mman.h>
+#include <asm/mmu.h>
+#include <asm/spu.h>
+
+static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED;
+
+
+#ifdef DEBUG
+int _slice_debug = 1;
+
+static void slice_print_mask(const char *label, struct slice_mask mask)
+{
+ char *p, buf[16 + 3 + 16 + 1];
+ int i;
+
+ if (!_slice_debug)
+ return;
+ p = buf;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
+ *(p++) = ' ';
+ *(p++) = '-';
+ *(p++) = ' ';
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+ *(p++) = 0;
+
+ printk(KERN_DEBUG "%s:%s\n", label, buf);
+}
+
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
+
+#else
+
+static void slice_print_mask(const char *label, struct slice_mask mask) {}
+#define slice_dbg(fmt...)
+
+#endif
+
+static struct slice_mask slice_range_to_mask(unsigned long start,
+ unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ struct slice_mask ret = { 0, 0 };
+
+ if (start < SLICE_LOW_TOP) {
+ unsigned long mend = min(end, SLICE_LOW_TOP);
+ unsigned long mstart = min(start, SLICE_LOW_TOP);
+
+ ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(mstart));
+ }
+
+ if ((start + len) > SLICE_LOW_TOP)
+ ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
+ - (1u << GET_HIGH_SLICE_INDEX(start));
+
+ return ret;
+}
+
+static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ struct vm_area_struct *vma;
+
+ if ((mm->task_size - len) < addr)
+ return 0;
+ vma = find_vma(mm, addr);
+ return (!vma || (addr + len) <= vma->vm_start);
+}
+
+static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
+ 1ul << SLICE_LOW_SHIFT);
+}
+
+static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ unsigned long start = slice << SLICE_HIGH_SHIFT;
+ unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = SLICE_LOW_TOP;
+
+ return !slice_area_is_free(mm, start, end - start);
+}
+
+static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+{
+ struct slice_mask ret = { 0, 0 };
+ unsigned long i;
+
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (!slice_low_has_vma(mm, i))
+ ret.low_slices |= 1u << i;
+
+ if (mm->task_size <= SLICE_LOW_TOP)
+ return ret;
+
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (!slice_high_has_vma(mm, i))
+ ret.high_slices |= 1u << i;
+
+ return ret;
+}
+
+static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ struct slice_mask ret = { 0, 0 };
+ unsigned long i;
+ u64 psizes;
+
+ psizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (((psizes >> (i * 4)) & 0xf) == psize)
+ ret.low_slices |= 1u << i;
+
+ psizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (((psizes >> (i * 4)) & 0xf) == psize)
+ ret.high_slices |= 1u << i;
+
+ return ret;
+}
+
+static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
+{
+ return (mask.low_slices & available.low_slices) == mask.low_slices &&
+ (mask.high_slices & available.high_slices) == mask.high_slices;
+}
+
+static void slice_flush_segments(void *parm)
+{
+ struct mm_struct *mm = parm;
+ unsigned long flags;
+
+ if (mm != current->active_mm)
+ return;
+
+ /* update the paca copy of the context struct */
+ get_paca()->context = current->active_mm->context;
+
+ local_irq_save(flags);
+ slb_flush_and_rebolt();
+ local_irq_restore(flags);
+}
+
+static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+{
+ /* Write the new slice psize bits */
+ u64 lpsizes, hpsizes;
+ unsigned long i, flags;
+
+ slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
+ slice_print_mask(" mask", mask);
+
+ /* We need to use a spinlock here to protect against
+ * concurrent 64k -> 4k demotion ...
+ */
+ spin_lock_irqsave(&slice_convert_lock, flags);
+
+ lpsizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (mask.low_slices & (1u << i))
+ lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (mask.high_slices & (1u << i))
+ hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ mm->context.low_slices_psize = lpsizes;
+ mm->context.high_slices_psize = hpsizes;
+
+ slice_dbg(" lsps=%lx, hsps=%lx\n",
+ mm->context.low_slices_psize,
+ mm->context.high_slices_psize);
+
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+ mb();
+
+ /* XXX this is sub-optimal but will do for now */
+ on_each_cpu(slice_flush_segments, mm, 0, 1);
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+}
+
+static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+ unsigned long len,
+ struct slice_mask available,
+ int psize, int use_cache)
+{
+ struct vm_area_struct *vma;
+ unsigned long start_addr, addr;
+ struct slice_mask mask;
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+ if (use_cache) {
+ if (len <= mm->cached_hole_size) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ } else
+ start_addr = addr = mm->free_area_cache;
+ } else
+ start_addr = addr = TASK_UNMAPPED_BASE;
+
+full_search:
+ for (;;) {
+ addr = _ALIGN_UP(addr, 1ul << pshift);
+ if ((TASK_SIZE - len) < addr)
+ break;
+ vma = find_vma(mm, addr);
+ BUG_ON(vma && (addr >= vma->vm_end));
+
+ mask = slice_range_to_mask(addr, len);
+ if (!slice_check_fit(mask, available)) {
+ if (addr < SLICE_LOW_TOP)
+ addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
+ else
+ addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
+ continue;
+ }
+ if (!vma || addr + len <= vma->vm_start) {
+ /*
+ * Remember the place where we stopped the search:
+ */
+ if (use_cache)
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+ addr = vma->vm_end;
+ }
+
+ /* Make sure we didn't miss any holes */
+ if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ goto full_search;
+ }
+ return -ENOMEM;
+}
+
+static unsigned long slice_find_area_topdown(struct mm_struct *mm,
+ unsigned long len,
+ struct slice_mask available,
+ int psize, int use_cache)
+{
+ struct vm_area_struct *vma;
+ unsigned long addr;
+ struct slice_mask mask;
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+ /* check if free_area_cache is useful for us */
+ if (use_cache) {
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested
+ * address hole
+ */
+ addr = mm->free_area_cache;
+
+ /* make sure it can fit in the remaining address space */
+ if (addr > len) {
+ addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+ mask = slice_range_to_mask(addr, len);
+ if (slice_check_fit(mask, available) &&
+ slice_area_is_free(mm, addr, len))
+ /* remember the address as a hint for
+ * next time
+ */
+ return (mm->free_area_cache = addr);
+ }
+ }
+
+ addr = mm->mmap_base;
+ while (addr > len) {
+ /* Go down by chunk size */
+ addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+
+ /* Check for hit with different page size */
+ mask = slice_range_to_mask(addr, len);
+ if (!slice_check_fit(mask, available)) {
+ if (addr < SLICE_LOW_TOP)
+ addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
+ else if (addr < (1ul << SLICE_HIGH_SHIFT))
+ addr = SLICE_LOW_TOP;
+ else
+ addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
+ continue;
+ }
+
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (!vma || (addr + len) <= vma->vm_start) {
+ /* remember the address as a hint for next time */
+ if (use_cache)
+ mm->free_area_cache = addr;
+ return addr;
+ }
+
+ /* remember the largest hole we saw so far */
+ if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start;
+ }
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ addr = slice_find_area_bottomup(mm, len, available, psize, 0);
+
+ /*
+ * Restore the topdown base:
+ */
+ if (use_cache) {
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+ }
+
+ return addr;
+}
+
+
+static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
+ struct slice_mask mask, int psize,
+ int topdown, int use_cache)
+{
+ if (topdown)
+ return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+ else
+ return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+}
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown, int use_cache)
+{
+ struct slice_mask mask;
+ struct slice_mask good_mask;
+ struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
+ int pmask_set = 0;
+ int fixed = (flags & MAP_FIXED);
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ struct mm_struct *mm = current->mm;
+
+ /* Sanity checks */
+ BUG_ON(mm->task_size == 0);
+
+ slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
+ slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
+ addr, len, flags, topdown, use_cache);
+
+ if (len > mm->task_size)
+ return -ENOMEM;
+ if (fixed && (addr & ((1ul << pshift) - 1)))
+ return -EINVAL;
+ if (fixed && addr > (mm->task_size - len))
+ return -EINVAL;
+
+ /* If hint, make sure it matches our alignment restrictions */
+ if (!fixed && addr) {
+ addr = _ALIGN_UP(addr, 1ul << pshift);
+ slice_dbg(" aligned addr=%lx\n", addr);
+ }
+
+ /* First makeup a "good" mask of slices that have the right size
+ * already
+ */
+ good_mask = slice_mask_for_size(mm, psize);
+ slice_print_mask(" good_mask", good_mask);
+
+ /* First check hint if it's valid or if we have MAP_FIXED */
+ if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) {
+
+ /* Don't bother with hint if it overlaps a VMA */
+ if (!fixed && !slice_area_is_free(mm, addr, len))
+ goto search;
+
+ /* Build a mask for the requested range */
+ mask = slice_range_to_mask(addr, len);
+ slice_print_mask(" mask", mask);
+
+ /* Check if we fit in the good mask. If we do, we just return,
+ * nothing else to do
+ */
+ if (slice_check_fit(mask, good_mask)) {
+ slice_dbg(" fits good !\n");
+ return addr;
+ }
+
+ /* We don't fit in the good mask, check what other slices are
+ * empty and thus can be converted
+ */
+ potential_mask = slice_mask_for_free(mm);
+ potential_mask.low_slices |= good_mask.low_slices;
+ potential_mask.high_slices |= good_mask.high_slices;
+ pmask_set = 1;
+ slice_print_mask(" potential", potential_mask);
+ if (slice_check_fit(mask, potential_mask)) {
+ slice_dbg(" fits potential !\n");
+ goto convert;
+ }
+ }
+
+ /* If we have MAP_FIXED and failed the above step, then error out */
+ if (fixed)
+ return -EBUSY;
+
+ search:
+ slice_dbg(" search...\n");
+
+ /* Now let's see if we can find something in the existing slices
+ * for that size
+ */
+ addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache);
+ if (addr != -ENOMEM) {
+ /* Found within the good mask, we don't have to setup,
+ * we thus return directly
+ */
+ slice_dbg(" found area at 0x%lx\n", addr);
+ return addr;
+ }
+
+ /* Won't fit, check what can be converted */
+ if (!pmask_set) {
+ potential_mask = slice_mask_for_free(mm);
+ potential_mask.low_slices |= good_mask.low_slices;
+ potential_mask.high_slices |= good_mask.high_slices;
+ pmask_set = 1;
+ slice_print_mask(" potential", potential_mask);
+ }
+
+ /* Now let's see if we can find something in the existing slices
+ * for that size
+ */
+ addr = slice_find_area(mm, len, potential_mask, psize, topdown,
+ use_cache);
+ if (addr == -ENOMEM)
+ return -ENOMEM;
+
+ mask = slice_range_to_mask(addr, len);
+ slice_dbg(" found potential area at 0x%lx\n", addr);
+ slice_print_mask(" mask", mask);
+
+ convert:
+ slice_convert(mm, mask, psize);
+ return addr;
+
+}
+EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ return slice_get_unmapped_area(addr, len, flags,
+ current->mm->context.user_psize,
+ 0, 1);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+ const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags)
+{
+ return slice_get_unmapped_area(addr0, len, flags,
+ current->mm->context.user_psize,
+ 1, 1);
+}
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+ u64 psizes;
+ int index;
+
+ if (addr < SLICE_LOW_TOP) {
+ psizes = mm->context.low_slices_psize;
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+
+ return (psizes >> (index * 4)) & 0xf;
+}
+EXPORT_SYMBOL_GPL(get_slice_psize);
+
+/*
+ * This is called by hash_page when it needs to do a lazy conversion of
+ * an address space from real 64K pages to combo 4K pages (typically
+ * when hitting a non cacheable mapping on a processor or hypervisor
+ * that won't allow them for 64K pages).
+ *
+ * This is also called in init_new_context() to change back the user
+ * psize from whatever the parent context had it set to
+ *
+ * This function will only change the content of the {low,high)_slice_psize
+ * masks, it will not flush SLBs as this shall be handled lazily by the
+ * caller.
+ */
+void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+{
+ unsigned long flags, lpsizes, hpsizes;
+ unsigned int old_psize;
+ int i;
+
+ slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+
+ spin_lock_irqsave(&slice_convert_lock, flags);
+
+ old_psize = mm->context.user_psize;
+ slice_dbg(" old_psize=%d\n", old_psize);
+ if (old_psize == psize)
+ goto bail;
+
+ mm->context.user_psize = psize;
+ wmb();
+
+ lpsizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
+ lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
+ hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ mm->context.low_slices_psize = lpsizes;
+ mm->context.high_slices_psize = hpsizes;
+
+ slice_dbg(" lsps=%lx, hsps=%lx\n",
+ mm->context.low_slices_psize,
+ mm->context.high_slices_psize);
+
+ bail:
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+}
+
+/*
+ * is_hugepage_only_range() is used by generic code to verify wether
+ * a normal mmap mapping (non hugetlbfs) is valid on a given area.
+ *
+ * until the generic code provides a more generic hook and/or starts
+ * calling arch get_unmapped_area for MAP_FIXED (which our implementation
+ * here knows how to deal with), we hijack it to keep standard mappings
+ * away from us.
+ *
+ * because of that generic code limitation, MAP_FIXED mapping cannot
+ * "convert" back a slice with no VMAs to the standard page size, only
+ * get_unmapped_area() can. It would be possible to fix it here but I
+ * prefer working on fixing the generic code instead.
+ *
+ * WARNING: This will not work if hugetlbfs isn't enabled since the
+ * generic code will redefine that function as 0 in that. This is ok
+ * for now as we only use slices with hugetlbfs enabled. This should
+ * be fixed as the generic code gets fixed.
+ */
+int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ struct slice_mask mask, available;
+
+ mask = slice_range_to_mask(addr, len);
+ available = slice_mask_for_size(mm, mm->context.user_psize);
+
+#if 0 /* too verbose */
+ slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
+ mm, addr, len);
+ slice_print_mask(" mask", mask);
+ slice_print_mask(" available", available);
+#endif
+ return !slice_check_fit(mask, available);
+}
+
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index 925ff70be8b..6a69417cbc0 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
if (start >= end)
return;
end = (end - 1) | ~PAGE_MASK;
- pmd = pmd_offset(pgd_offset(mm, start), start);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
for (;;) {
pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
if (pmd_end > end)
@@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
return;
}
mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
- pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
if (!pmd_none(*pmd))
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
FINISH_FLUSH;
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index fd8d08c325e..2bfc4d7e1aa 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
*/
addr &= PAGE_MASK;
- /* Get page size (maybe move back to caller) */
+ /* Get page size (maybe move back to caller).
+ *
+ * NOTE: when using special 64K mappings in 4K environment like
+ * for SPEs, we obtain the page size from the slice, which thus
+ * must still exist (and thus the VMA not reused) at the time
+ * of this call
+ */
if (huge) {
#ifdef CONFIG_HUGETLB_PAGE
psize = mmu_huge_psize;
#else
BUG();
- psize = pte_pagesize_index(pte); /* shutup gcc */
+ psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
#endif
} else
- psize = pte_pagesize_index(pte);
+ psize = pte_pagesize_index(mm, addr, pte);
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 7ef0c685479..ba55b0ff0f7 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -15,8 +15,8 @@
#include <linux/init.h>
#include <linux/delay.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/pci-bridge.h>
#include <asm-powerpc/mpic.h>
#include <asm/mpc86xx.h>
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 82551770917..9b2b386ccf4 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,21 @@ config SPU_FS
Units on machines implementing the Broadband Processor
Architecture.
+config SPU_FS_64K_LS
+ bool "Use 64K pages to map SPE local store"
+ # we depend on PPC_MM_SLICES for now rather than selecting
+ # it because we depend on hugetlbfs hooks being present. We
+ # will fix that when the generic code has been improved to
+ # not require hijacking hugetlbfs hooks.
+ depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+ default y
+ select PPC_HAS_HASH_64K
+ help
+ This option causes SPE local stores to be mapped in process
+ address spaces using 64K pages while the rest of the kernel
+ uses 4K pages. This can improve performances of applications
+ using multiple SPEs by lowering the TLB pressure on them.
+
config SPU_BASE
bool
default n
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index fec51525252..a7f5a7653c6 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -144,12 +144,11 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
switch(REGION_ID(ea)) {
case USER_REGION_ID:
-#ifdef CONFIG_HUGETLB_PAGE
- if (in_hugepage_area(mm->context, ea))
- psize = mmu_huge_psize;
- else
+#ifdef CONFIG_PPC_MM_SLICES
+ psize = get_slice_psize(mm, ea);
+#else
+ psize = mm->context.user_psize;
#endif
- psize = mm->context.user_psize;
vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
SLB_VSID_USER;
break;
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index 2cd89c11af5..328afcf8950 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
-obj-y += switch.o fault.o
+obj-y += switch.o fault.o lscsa_alloc.o
obj-$(CONFIG_SPU_FS) += spufs.o
spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index a87d9ca3dba..8654749e317 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -36,10 +36,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
/* Binding to physical processor deferred
* until spu_activate().
*/
- spu_init_csa(&ctx->csa);
- if (!ctx->csa.lscsa) {
+ if (spu_init_csa(&ctx->csa))
goto out_free;
- }
spin_lock_init(&ctx->mmio_lock);
spin_lock_init(&ctx->mapping_lock);
kref_init(&ctx->kref);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index d010b2464a9..45614c73c78 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -118,14 +118,32 @@ spufs_mem_write(struct file *file, const char __user *buffer,
static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
unsigned long address)
{
- struct spu_context *ctx = vma->vm_file->private_data;
- unsigned long pfn, offset = address - vma->vm_start;
-
- offset += vma->vm_pgoff << PAGE_SHIFT;
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long pfn, offset, addr0 = address;
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_state *csa = &ctx->csa;
+ int psize;
+
+ /* Check what page size we are using */
+ psize = get_slice_psize(vma->vm_mm, address);
+
+ /* Some sanity checking */
+ BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+ /* Wow, 64K, cool, we need to align the address though */
+ if (csa->use_big_pages) {
+ BUG_ON(vma->vm_start & 0xffff);
+ address &= ~0xfffful;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+ offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
if (offset >= LS_SIZE)
return NOPFN_SIGBUS;
+ pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n",
+ addr0, address, offset);
+
spu_acquire(ctx);
if (ctx->state == SPU_STATE_SAVED) {
@@ -149,9 +167,24 @@ static struct vm_operations_struct spufs_mem_mmap_vmops = {
.nopfn = spufs_mem_mmap_nopfn,
};
-static int
-spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
-{
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* Sanity check VMA alignment */
+ if (csa->use_big_pages) {
+ pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+ " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+ vma->vm_pgoff);
+ if (vma->vm_start & 0xffff)
+ return -EINVAL;
+ if (vma->vm_pgoff & 0xf)
+ return -EINVAL;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
@@ -163,13 +196,34 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+#ifdef CONFIG_SPU_FS_64K_LS
+unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* If not using big pages, fallback to normal MM g_u_a */
+ if (!csa->use_big_pages)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ /* Else, try to obtain a 64K pages slice */
+ return slice_get_unmapped_area(addr, len, flags,
+ MMU_PAGE_64K, 1, 0);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
static const struct file_operations spufs_mem_fops = {
- .open = spufs_mem_open,
- .release = spufs_mem_release,
- .read = spufs_mem_read,
- .write = spufs_mem_write,
- .llseek = generic_file_llseek,
- .mmap = spufs_mem_mmap,
+ .open = spufs_mem_open,
+ .read = spufs_mem_read,
+ .write = spufs_mem_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+ .get_unmapped_area = spufs_get_unmapped_area,
+#endif
};
static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 00000000000..f4b3c052dab
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,181 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+ struct spu_lscsa *lscsa;
+ unsigned char *p;
+
+ lscsa = vmalloc(sizeof(struct spu_lscsa));
+ if (!lscsa)
+ return -ENOMEM;
+ memset(lscsa, 0, sizeof(struct spu_lscsa));
+ csa->lscsa = lscsa;
+
+ /* Set LS pages reserved to allow for user-space mapping. */
+ for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+ /* Clear reserved bit before vfree. */
+ unsigned char *p;
+
+ if (csa->lscsa == NULL)
+ return;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT 16
+#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ struct page **pgarray;
+ unsigned char *p;
+ int i, j, n_4k;
+
+ /* Check availability of 64K pages */
+ if (mmu_psize_defs[MMU_PAGE_64K].shift == 0)
+ goto fail;
+
+ csa->use_big_pages = 1;
+
+ pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+ csa);
+
+ /* First try to allocate our 64K pages. We need 5 of them
+ * with the current implementation. In the future, we should try
+ * to separate the lscsa with the actual local store image, thus
+ * allowing us to require only 4 64K pages per context
+ */
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+ /* XXX This is likely to fail, we should use a special pool
+ * similiar to what hugetlbfs does.
+ */
+ csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+ SPU_64K_PAGE_ORDER);
+ if (csa->lscsa_pages[i] == NULL)
+ goto fail;
+ }
+
+ pr_debug(" success ! creating vmap...\n");
+
+ /* Now we need to create a vmalloc mapping of these for the kernel
+ * and SPU context switch code to use. Currently, we stick to a
+ * normal kernel vmalloc mapping, which in our case will be 4K
+ */
+ n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+ pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+ if (pgarray == NULL)
+ goto fail;
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+ /* We assume all the struct page's are contiguous
+ * which should be hopefully the case for an order 4
+ * allocation..
+ */
+ pgarray[i * SPU_64K_PAGE_COUNT + j] =
+ csa->lscsa_pages[i] + j;
+ csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+ kfree(pgarray);
+ if (csa->lscsa == NULL)
+ goto fail;
+
+ memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+ /* Set LS pages reserved to allow for user-space mapping.
+ *
+ * XXX isn't that a bit obsolete ? I think we should just
+ * make sure the page count is high enough. Anyway, won't harm
+ * for now
+ */
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ pr_debug(" all good !\n");
+
+ return 0;
+fail:
+ pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+ spu_free_lscsa(csa);
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ unsigned char *p;
+ int i;
+
+ if (!csa->use_big_pages) {
+ spu_free_lscsa_std(csa);
+ return;
+ }
+ csa->use_big_pages = 0;
+
+ if (csa->lscsa == NULL)
+ goto free_pages;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vunmap(csa->lscsa);
+ csa->lscsa = NULL;
+
+ free_pages:
+
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ if (csa->lscsa_pages[i])
+ __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 29dc59cefc3..71a0b41adb8 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2188,40 +2188,30 @@ static void init_priv2(struct spu_state *csa)
* as it is by far the largest of the context save regions,
* and may need to be pinned or otherwise specially aligned.
*/
-void spu_init_csa(struct spu_state *csa)
+int spu_init_csa(struct spu_state *csa)
{
- struct spu_lscsa *lscsa;
- unsigned char *p;
+ int rc;
if (!csa)
- return;
+ return -EINVAL;
memset(csa, 0, sizeof(struct spu_state));
- lscsa = vmalloc(sizeof(struct spu_lscsa));
- if (!lscsa)
- return;
+ rc = spu_alloc_lscsa(csa);
+ if (rc)
+ return rc;
- memset(lscsa, 0, sizeof(struct spu_lscsa));
- csa->lscsa = lscsa;
spin_lock_init(&csa->register_lock);
- /* Set LS pages reserved to allow for user-space mapping. */
- for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
- SetPageReserved(vmalloc_to_page(p));
-
init_prob(csa);
init_priv1(csa);
init_priv2(csa);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(spu_init_csa);
void spu_fini_csa(struct spu_state *csa)
{
- /* Clear reserved bit before vfree. */
- unsigned char *p;
- for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
- ClearPageReserved(vmalloc_to_page(p));
-
- vfree(csa->lscsa);
+ spu_free_lscsa(csa);
}
EXPORT_SYMBOL_GPL(spu_fini_csa);
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index 46c3a8e7c3a..761d9e971fc 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -7,7 +7,9 @@ menu "iSeries device drivers"
depends on PPC_ISERIES
config VIOCONS
- tristate "iSeries Virtual Console Support (Obsolete)"
+ bool "iSeries Virtual Console Support (Obsolete)"
+ depends on !HVC_ISERIES
+ default n
help
This is the old virtual console driver for legacy iSeries.
You should use the iSeries Hypervisor Virtual Console
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 63e23062e98..093438b93bd 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -100,6 +100,9 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(slot_errbuf_lock);
static int eeh_error_buf_size;
+#define EEH_PCI_REGS_LOG_LEN 4096
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
/* System monitoring statistics */
static unsigned long no_device;
static unsigned long no_dn;
@@ -115,7 +118,8 @@ static unsigned long slot_resets;
/* --------------------------------------------------------------- */
/* Below lies the EEH event infrastructure */
-void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
+static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
+ char *driver_log, size_t loglen)
{
int config_addr;
unsigned long flags;
@@ -133,7 +137,8 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
rc = rtas_call(ibm_slot_error_detail,
8, 1, NULL, config_addr,
BUID_HI(pdn->phb->buid),
- BUID_LO(pdn->phb->buid), NULL, 0,
+ BUID_LO(pdn->phb->buid),
+ virt_to_phys(driver_log), loglen,
virt_to_phys(slot_errbuf),
eeh_error_buf_size,
severity);
@@ -144,6 +149,84 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
}
/**
+ * gather_pci_data - copy assorted PCI config space registers to buff
+ * @pdn: device to report data for
+ * @buf: point to buffer in which to log
+ * @len: amount of room in buffer
+ *
+ * This routine captures assorted PCI configuration space data,
+ * and puts them into a buffer for RTAS error logging.
+ */
+static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+{
+ u32 cfg;
+ int cap, i;
+ int n = 0;
+
+ n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
+ printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
+
+ rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+
+ rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
+
+ /* Dump out the PCI-X command and status regs */
+ cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_PCIX);
+ if (cap) {
+ rtas_read_config(pdn, cap, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+
+ rtas_read_config(pdn, cap+4, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+ }
+
+ /* If PCI-E capable, dump PCI-E cap 10, and the AER */
+ cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_EXP);
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+ printk(KERN_WARNING
+ "EEH: PCI-E capabilities and status follow:\n");
+
+ for (i=0; i<=8; i++) {
+ rtas_read_config(pdn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+ }
+
+ cap = pci_find_ext_capability(pdn->pcidev,PCI_EXT_CAP_ID_ERR);
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+ printk(KERN_WARNING
+ "EEH: PCI-E AER capability register set follows:\n");
+
+ for (i=0; i<14; i++) {
+ rtas_read_config(pdn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
+ }
+ }
+ }
+ return n;
+}
+
+void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
+{
+ size_t loglen = 0;
+ memset(pci_regs_buf, 0, EEH_PCI_REGS_LOG_LEN);
+
+ rtas_pci_enable(pdn, EEH_THAW_MMIO);
+ loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+
+ rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
+}
+
+/**
* read_slot_reset_state - Read the reset state of a device node's slot
* @dn: device node to read
* @rets: array to return results in
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 3170e003f76..f07d849cfc8 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -361,11 +361,12 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
goto hard_fail;
}
- eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
printk(KERN_WARNING
- "EEH: This PCI device has failed %d times since last reboot: "
- "location=%s driver=%s pci addr=%s\n",
- frozen_pdn->eeh_freeze_count, location, drv_str, pci_str);
+ "EEH: This PCI device has failed %d times in the last hour:\n",
+ frozen_pdn->eeh_freeze_count);
+ printk(KERN_WARNING
+ "EEH: location=%s driver=%s pci addr=%s\n",
+ location, drv_str, pci_str);
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -375,6 +376,11 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
*/
pci_walk_bus(frozen_bus, eeh_report_error, &result);
+ /* Since rtas may enable MMIO when posting the error log,
+ * don't post the error log until after all dev drivers
+ * have been informed. */
+ eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
+
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
* go down willingly, without panicing the system.
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 8a123c71449..cad17572435 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -907,7 +907,7 @@ static int __init fs_enet_of_init(void)
struct fs_platform_info fs_enet_data;
const unsigned int *id;
const unsigned int *phy_addr;
- void *mac_addr;
+ const void *mac_addr;
const phandle *ph;
const char *model;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 1e32fb834eb..2df42fdcdc9 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -631,7 +631,8 @@ config HVC_CONSOLE
config HVC_ISERIES
bool "iSeries Hypervisor Virtual Console support"
- depends on PPC_ISERIES && !VIOCONS
+ depends on PPC_ISERIES
+ default y
select HVC_DRIVER
help
iSeries machines support a hypervisor virtual console.
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index 6739457d8bc..e2ca55bcfe0 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -350,10 +350,13 @@ typedef unsigned long mm_context_id_t;
typedef struct {
mm_context_id_t id;
- u16 user_psize; /* page size index */
- u16 sllp; /* SLB entry page size encoding */
-#ifdef CONFIG_HUGETLB_PAGE
- u16 low_htlb_areas, high_htlb_areas;
+ u16 user_psize; /* page size index */
+
+#ifdef CONFIG_PPC_MM_SLICES
+ u64 low_slices_psize; /* SLB page size encodings */
+ u64 high_slices_psize; /* 4 bits per slice for now */
+#else
+ u16 sllp; /* SLB page size encoding */
#endif
unsigned long vdso_base;
} mm_context_t;
diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h
index cf95274f735..c6a5b173566 100644
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@@ -83,8 +83,8 @@ struct paca_struct {
mm_context_t context;
u16 vmalloc_sllp;
- u16 slb_cache[SLB_CACHE_ENTRIES];
u16 slb_cache_ptr;
+ u16 slb_cache[SLB_CACHE_ENTRIES];
/*
* then miscellaneous read-write fields
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
index eab779c2199..3448a3d4bc6 100644
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@@ -88,57 +88,55 @@ extern unsigned int HPAGE_SHIFT;
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_HUGETLB_PAGE
+#ifdef CONFIG_PPC_MM_SLICES
-#define HTLB_AREA_SHIFT 40
-#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
-#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
+#define SLICE_LOW_SHIFT 28
+#define SLICE_HIGH_SHIFT 40
-#define LOW_ESID_MASK(addr, len) \
- (((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \
- - (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff)
-#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
- - (1U << GET_HTLB_AREA(addr))) & 0xffff)
+#define SLICE_LOW_TOP (0x100000000ul)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
-#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
-#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
-#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-#define touches_hugepage_low_range(mm, addr, len) \
- (((addr) < 0x100000000UL) \
- && (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas))
-#define touches_hugepage_high_range(mm, addr, len) \
- ((((addr) + (len)) > 0x100000000UL) \
- && (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas))
-
-#define __within_hugepage_low_range(addr, len, segmask) \
- ( (((addr)+(len)) <= 0x100000000UL) \
- && ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)))
-#define within_hugepage_low_range(addr, len) \
- __within_hugepage_low_range((addr), (len), \
- current->mm->context.low_htlb_areas)
-#define __within_hugepage_high_range(addr, len, zonemask) \
- ( ((addr) >= 0x100000000UL) \
- && ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)))
-#define within_hugepage_high_range(addr, len) \
- __within_hugepage_high_range((addr), (len), \
- current->mm->context.high_htlb_areas)
-
-#define is_hugepage_only_range(mm, addr, len) \
- (touches_hugepage_high_range((mm), (addr), (len)) || \
- touches_hugepage_low_range((mm), (addr), (len)))
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#ifndef __ASSEMBLY__
+
+struct slice_mask {
+ u16 low_slices;
+ u16 high_slices;
+};
+
+struct mm_struct;
-#define in_hugepage_area(context, addr) \
- (cpu_has_feature(CPU_FTR_16M_PAGE) && \
- ( ( (addr) >= 0x100000000UL) \
- ? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \
- : ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) )
+extern unsigned long slice_get_unmapped_area(unsigned long addr,
+ unsigned long len,
+ unsigned long flags,
+ unsigned int psize,
+ int topdown,
+ int use_cache);
-#else /* !CONFIG_HUGETLB_PAGE */
+extern unsigned int get_slice_psize(struct mm_struct *mm,
+ unsigned long addr);
-#define in_hugepage_area(mm, addr) 0
+extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
+extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
+
+#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+extern int is_hugepage_only_range(struct mm_struct *m,
+ unsigned long addr,
+ unsigned long len);
+
+#endif /* __ASSEMBLY__ */
+#else
+#define slice_init()
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_HUGETLB_PAGE
+
+#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif /* !CONFIG_HUGETLB_PAGE */
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 30b50cf56e2..d9a3a8ca58a 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -14,18 +14,11 @@
extern struct kmem_cache *pgtable_cache[];
-#ifdef CONFIG_PPC_64K_PAGES
-#define PTE_CACHE_NUM 0
-#define PMD_CACHE_NUM 1
-#define PGD_CACHE_NUM 2
-#define HUGEPTE_CACHE_NUM 3
-#else
-#define PTE_CACHE_NUM 0
-#define PMD_CACHE_NUM 1
-#define PUD_CACHE_NUM 1
-#define PGD_CACHE_NUM 0
-#define HUGEPTE_CACHE_NUM 2
-#endif
+#define PGD_CACHE_NUM 0
+#define PUD_CACHE_NUM 1
+#define PMD_CACHE_NUM 1
+#define HUGEPTE_CACHE_NUM 2
+#define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
@@ -91,8 +84,7 @@ static inline void pmd_free(pmd_t *pmd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
- GFP_KERNEL|__GFP_REPEAT);
+ return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm,
@@ -103,12 +95,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
static inline void pte_free_kernel(pte_t *pte)
{
- kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
+ free_page((unsigned long)pte);
}
static inline void pte_free(struct page *ptepage)
{
- pte_free_kernel(page_address(ptepage));
+ __free_page(ptepage);
}
#define PGF_CACHENUM_MASK 0x3
@@ -130,14 +122,17 @@ static inline void pgtable_free(pgtable_free_t pgf)
void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
int cachenum = pgf.val & PGF_CACHENUM_MASK;
- kmem_cache_free(pgtable_cache[cachenum], p);
+ if (cachenum == PTE_NONCACHE_NUM)
+ free_page((unsigned long)p);
+ else
+ kmem_cache_free(pgtable_cache[cachenum], p);
}
extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
#define __pte_free_tlb(tlb, ptepage) \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
- PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
+ PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
#define __pmd_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index 1744d6ac12a..add5481fd7c 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -80,7 +80,11 @@
#define pte_iterate_hashed_end() } while(0)
-#define pte_pagesize_index(pte) MMU_PAGE_4K
+#ifdef CONFIG_PPC_HAS_HASH_64K
+#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr)
+#else
+#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K
+#endif
/*
* 4-level page tables related bits
diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h
index 16ef4978520..31cbd3d7fce 100644
--- a/include/asm-powerpc/pgtable-64k.h
+++ b/include/asm-powerpc/pgtable-64k.h
@@ -35,6 +35,11 @@
#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */
+
+/* Note the full page bits must be in the same location as for normal
+ * 4k pages as the same asssembly will be used to insert 64K pages
+ * wether the kernel has CONFIG_PPC_64K_PAGES or not
+ */
#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
@@ -88,7 +93,7 @@
#define pte_iterate_hashed_end() } while(0); } } while(0)
-#define pte_pagesize_index(pte) \
+#define pte_pagesize_index(mm, addr, pte) \
(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
#define remap_4k_pfn(vma, addr, pfn, prot) \
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h
index 02e56a6685a..c48ae185c87 100644
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -235,6 +235,12 @@ struct spu_priv2_collapsed {
*/
struct spu_state {
struct spu_lscsa *lscsa;
+#ifdef CONFIG_SPU_FS_64K_LS
+ int use_big_pages;
+ /* One struct page per 64k page */
+#define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000)
+ struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES];
+#endif
struct spu_problem_collapsed prob;
struct spu_priv1_collapsed priv1;
struct spu_priv2_collapsed priv2;
@@ -247,12 +253,14 @@ struct spu_state {
spinlock_t register_lock;
};
-extern void spu_init_csa(struct spu_state *csa);
+extern int spu_init_csa(struct spu_state *csa);
extern void spu_fini_csa(struct spu_state *csa);
extern int spu_save(struct spu_state *prev, struct spu *spu);
extern int spu_restore(struct spu_state *new, struct spu *spu);
extern int spu_switch(struct spu_state *prev, struct spu_state *new,
struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
#endif /* !__SPU__ */
#endif /* __KERNEL__ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d74da9122b6..9c7cb643066 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -52,7 +52,15 @@ struct hibernation_ops {
#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
/* kernel/power/snapshot.c */
-extern void __init register_nosave_region(unsigned long, unsigned long);
+extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
+static inline void register_nosave_region(unsigned long b, unsigned long e)
+{
+ __register_nosave_region(b, e, 0);
+}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e)
+{
+ __register_nosave_region(b, e, 1);
+}
extern int swsusp_page_is_forbidden(struct page *);
extern void swsusp_set_page_free(struct page *);
extern void swsusp_unset_page_free(struct page *);
@@ -62,6 +70,7 @@ extern void hibernation_set_ops(struct hibernation_ops *ops);
extern int hibernate(void);
#else
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
static inline void swsusp_set_page_free(struct page *p) {}
static inline void swsusp_unset_page_free(struct page *p) {}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 48383ea7229..a3b7854b8f7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -607,7 +607,8 @@ static LIST_HEAD(nosave_regions);
*/
void __init
-register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
+ int use_kmalloc)
{
struct nosave_region *region;
@@ -623,8 +624,13 @@ register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
goto Report;
}
}
- /* This allocation cannot fail */
- region = alloc_bootmem_low(sizeof(struct nosave_region));
+ if (use_kmalloc) {
+ /* during init, this shouldn't fail */
+ region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+ BUG_ON(!region);
+ } else
+ /* This allocation cannot fail */
+ region = alloc_bootmem_low(sizeof(struct nosave_region));
region->start_pfn = start_pfn;
region->end_pfn = end_pfn;
list_add_tail(&region->list, &nosave_regions);