aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile3
-rw-r--r--arch/powerpc/mm/gup.c280
-rw-r--r--arch/powerpc/mm/hash_utils_64.c63
-rw-r--r--arch/powerpc/mm/init_64.c9
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c4
-rw-r--r--arch/powerpc/mm/tlb_64.c2
7 files changed, 328 insertions, 35 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1c00e0196f6..e7392b45a5e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -12,7 +12,8 @@ obj-y := fault.o mem.o \
mmu_context_$(CONFIG_WORD_SIZE).o
hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC64) += hash_utils_64.o \
- slb_low.o slb.o stab.o mmap.o $(hash-y)
+ slb_low.o slb.o stab.o \
+ gup.o mmap.o $(hash-y)
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
tlb_$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
new file mode 100644
index 00000000000..9fdf4d6335e
--- /dev/null
+++ b/arch/powerpc/mm/gup.c
@@ -0,0 +1,280 @@
+/*
+ * Lockless get_user_pages_fast for powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask, result;
+ pte_t *ptep;
+
+ result = _PAGE_PRESENT|_PAGE_USER;
+ if (write)
+ result |= _PAGE_RW;
+ mask = result | _PAGE_SPECIAL;
+
+ ptep = pte_offset_kernel(&pmd, addr);
+ do {
+ pte_t pte = *ptep;
+ struct page *page;
+
+ if ((pte_val(pte) & mask) != result)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+ if (!page_cache_get_speculative(page))
+ return 0;
+ if (unlikely(pte != *ptep)) {
+ put_page(page);
+ return 0;
+ }
+ pages[*nr] = page;
+ (*nr)++;
+
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+
+ return 1;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate,
+ unsigned long *addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long mask;
+ unsigned long pte_end;
+ struct page *head, *page;
+ pte_t pte;
+ int refs;
+
+ pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate);
+ if (pte_end < end)
+ end = pte_end;
+
+ pte = *ptep;
+ mask = _PAGE_PRESENT|_PAGE_USER;
+ if (write)
+ mask |= _PAGE_RW;
+ if ((pte_val(pte) & mask) != mask)
+ return 0;
+ /* hugepages are never "special" */
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ refs = 0;
+ head = pte_page(pte);
+ page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (*addr += PAGE_SIZE, *addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+ if (unlikely(pte != *ptep)) {
+ /* Could be optimized better */
+ while (*nr) {
+ put_page(page);
+ (*nr)--;
+ }
+ }
+
+ return 1;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pmd_t *pmdp;
+
+ pmdp = pmd_offset(&pud, addr);
+ do {
+ pmd_t pmd = *pmdp;
+
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(pmd))
+ return 0;
+ if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+ return 0;
+ } while (pmdp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pud_t *pudp;
+
+ pudp = pud_offset(&pgd, addr);
+ do {
+ pud_t pud = *pudp;
+
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ return 0;
+ if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+ return 0;
+ } while (pudp++, addr = next, addr != end);
+
+ return 1;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next;
+ pgd_t *pgdp;
+ int psize, nr = 0;
+ unsigned int shift;
+
+ pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ start, len)))
+ goto slow_irqon;
+
+ pr_debug(" aligned: %lx .. %lx\n", start, end);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* We bail out on slice boundary crossing when hugetlb is
+ * enabled in order to not have to deal with two different
+ * page table formats
+ */
+ if (addr < SLICE_LOW_TOP) {
+ if (end > SLICE_LOW_TOP)
+ goto slow_irqon;
+
+ if (unlikely(GET_LOW_SLICE_INDEX(addr) !=
+ GET_LOW_SLICE_INDEX(end - 1)))
+ goto slow_irqon;
+ } else {
+ if (unlikely(GET_HIGH_SLICE_INDEX(addr) !=
+ GET_HIGH_SLICE_INDEX(end - 1)))
+ goto slow_irqon;
+ }
+#endif /* CONFIG_HUGETLB_PAGE */
+
+ /*
+ * XXX: batch / limit 'nr', to avoid large irq off latency
+ * needs some instrumenting to determine the common sizes used by
+ * important workloads (eg. DB2), and whether limiting the batch size
+ * will decrease performance.
+ *
+ * It seems like we're in the clear for the moment. Direct-IO is
+ * the main guy that batches up lots of get_user_pages, and even
+ * they are limited to 64-at-a-time which is not so many.
+ */
+ /*
+ * This doesn't prevent pagetable teardown, but does prevent
+ * the pagetables from being freed on powerpc.
+ *
+ * So long as we atomically load page table pointers versus teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ */
+ local_irq_disable();
+
+ psize = get_slice_psize(mm, addr);
+ shift = mmu_psize_defs[psize].shift;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (unlikely(mmu_huge_psizes[psize])) {
+ pte_t *ptep;
+ unsigned long a = addr;
+ unsigned long sz = ((1UL) << shift);
+ struct hstate *hstate = size_to_hstate(sz);
+
+ BUG_ON(!hstate);
+ /*
+ * XXX: could be optimized to avoid hstate
+ * lookup entirely (just use shift)
+ */
+
+ do {
+ VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift);
+ ptep = huge_pte_offset(mm, a);
+ pr_debug(" %016lx: huge ptep %p\n", a, ptep);
+ if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages,
+ &nr))
+ goto slow;
+ } while (a != end);
+ } else
+#endif /* CONFIG_HUGETLB_PAGE */
+ {
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = *pgdp;
+
+ VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
+ pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd);
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ goto slow;
+ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ goto slow;
+ } while (pgdp++, addr = next, addr != end);
+ }
+ local_irq_enable();
+
+ VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+ return nr;
+
+ {
+ int ret;
+
+slow:
+ local_irq_enable();
+slow_irqon:
+ pr_debug(" slow path ! nr = %d\n", nr);
+
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, start,
+ (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+ up_read(&mm->mmap_sem);
+
+ /* Have to be a bit careful with return values */
+ if (nr > 0) {
+ if (ret < 0)
+ ret = nr;
+ else
+ ret += nr;
+ }
+
+ return ret;
+ }
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5ce5a4dcd00..14be408dfc9 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -151,39 +151,53 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+static unsigned long htab_convert_pte_flags(unsigned long pteflags)
+{
+ unsigned long rflags = pteflags & 0x1fa;
+
+ /* _PAGE_EXEC -> NOEXEC */
+ if ((pteflags & _PAGE_EXEC) == 0)
+ rflags |= HPTE_R_N;
+
+ /* PP bits. PAGE_USER is already PP bit 0x2, so we only
+ * need to add in 0x1 if it's a read-only user page
+ */
+ if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&
+ (pteflags & _PAGE_DIRTY)))
+ rflags |= 1;
+
+ /* Always add C */
+ return rflags | HPTE_R_C;
+}
int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
- unsigned long pstart, unsigned long mode,
+ unsigned long pstart, unsigned long prot,
int psize, int ssize)
{
unsigned long vaddr, paddr;
unsigned int step, shift;
- unsigned long tmp_mode;
int ret = 0;
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
+ prot = htab_convert_pte_flags(prot);
+
+ DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
+ vstart, vend, pstart, prot, psize, ssize);
+
for (vaddr = vstart, paddr = pstart; vaddr < vend;
vaddr += step, paddr += step) {
unsigned long hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr, ssize);
unsigned long va = hpt_va(vaddr, vsid, ssize);
- tmp_mode = mode;
-
- /* Make non-kernel text non-executable */
- if (!in_kernel_text(vaddr))
- tmp_mode = mode | HPTE_R_N;
-
hash = hpt_hash(va, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- DBG("htab_bolt_mapping: calling %p\n", ppc_md.hpte_insert);
-
BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, va, paddr,
- tmp_mode, HPTE_V_BOLTED, psize, ssize);
+ ret = ppc_md.hpte_insert(hpteg, va, paddr, prot,
+ HPTE_V_BOLTED, psize, ssize);
if (ret < 0)
break;
@@ -519,9 +533,9 @@ static unsigned long __init htab_get_table_size(void)
#ifdef CONFIG_MEMORY_HOTPLUG
void create_section_mapping(unsigned long start, unsigned long end)
{
- BUG_ON(htab_bolt_mapping(start, end, __pa(start),
- _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
- mmu_linear_psize, mmu_kernel_ssize));
+ BUG_ON(htab_bolt_mapping(start, end, __pa(start),
+ PAGE_KERNEL, mmu_linear_psize,
+ mmu_kernel_ssize));
}
int remove_section_mapping(unsigned long start, unsigned long end)
@@ -570,7 +584,7 @@ void __init htab_initialize(void)
{
unsigned long table;
unsigned long pteg_count;
- unsigned long mode_rw;
+ unsigned long prot, tprot;
unsigned long base = 0, size = 0, limit;
int i;
@@ -628,7 +642,7 @@ void __init htab_initialize(void)
mtspr(SPRN_SDR1, _SDR1);
}
- mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
+ prot = PAGE_KERNEL;
#ifdef CONFIG_DEBUG_PAGEALLOC
linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT;
@@ -646,8 +660,10 @@ void __init htab_initialize(void)
for (i=0; i < lmb.memory.cnt; i++) {
base = (unsigned long)__va(lmb.memory.region[i].base);
size = lmb.memory.region[i].size;
+ tprot = prot | (in_kernel_text(base) ? _PAGE_EXEC : 0);
- DBG("creating mapping for region: %lx : %lx\n", base, size);
+ DBG("creating mapping for region: %lx..%lx (prot: %x)\n",
+ base, size, tprot);
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
@@ -664,21 +680,21 @@ void __init htab_initialize(void)
unsigned long dart_table_end = dart_tablebase + 16 * MB;
if (base != dart_tablebase)
BUG_ON(htab_bolt_mapping(base, dart_tablebase,
- __pa(base), mode_rw,
+ __pa(base), tprot,
mmu_linear_psize,
mmu_kernel_ssize));
if ((base + size) > dart_table_end)
BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
base + size,
__pa(dart_table_end),
- mode_rw,
+ tprot,
mmu_linear_psize,
mmu_kernel_ssize));
continue;
}
#endif /* CONFIG_U3_DART */
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
- mode_rw, mmu_linear_psize, mmu_kernel_ssize));
+ tprot, mmu_linear_psize, mmu_kernel_ssize));
}
/*
@@ -696,7 +712,7 @@ void __init htab_initialize(void)
tce_alloc_start = base + size + 1;
BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
- __pa(tce_alloc_start), mode_rw,
+ __pa(tce_alloc_start), prot,
mmu_linear_psize, mmu_kernel_ssize));
}
@@ -1117,8 +1133,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
- unsigned long mode = _PAGE_ACCESSED | _PAGE_DIRTY |
- _PAGE_COHERENT | PP_RWXX | HPTE_R_N;
+ unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
int ret;
hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 4f7df85129d..036fe2f10c7 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -206,13 +206,10 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
int __meminit vmemmap_populate(struct page *start_page,
unsigned long nr_pages, int node)
{
- unsigned long mode_rw;
unsigned long start = (unsigned long)start_page;
unsigned long end = (unsigned long)(start_page + nr_pages);
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
- mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
-
/* Align to the page size of the linear mapping. */
start = _ALIGN_DOWN(start, page_size);
@@ -230,9 +227,9 @@ int __meminit vmemmap_populate(struct page *start_page,
pr_debug("vmemmap %08lx allocated at %p, physical %08lx.\n",
start, p, __pa(p));
- mapped = htab_bolt_mapping(start, start + page_size,
- __pa(p), mode_rw, mmu_vmemmap_psize,
- mmu_kernel_ssize);
+ mapped = htab_bolt_mapping(start, start + page_size, __pa(p),
+ PAGE_KERNEL, mmu_vmemmap_psize,
+ mmu_kernel_ssize);
BUG_ON(mapped < 0);
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 702691cb9e8..1c93c255873 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -311,7 +311,7 @@ void __init paging_init(void)
#endif /* CONFIG_HIGHMEM */
printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n",
- (u64)top_of_ram, total_ram);
+ (unsigned long long)top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index c53145f6194..6aa12081377 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -236,8 +236,8 @@ void __init MMU_init_hw(void)
Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
- printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
- total_memory >> 20, Hash_size >> 10, Hash);
+ printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
+ (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
/*
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 409fcc7b63c..be7dd422c0f 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -34,7 +34,7 @@
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
/* This is declared as we are using the more or less generic
- * include/asm-powerpc/tlb.h file -- tgall
+ * arch/powerpc/include/asm/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);