diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 95 | ||||
-rw-r--r-- | arch/powerpc/mm/gup.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 33 | ||||
-rw-r--r-- | arch/powerpc/mm/mmap.c | 64 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 174 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 131 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash_low.S | 44 |
12 files changed, 400 insertions, 251 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 953cc4a1cde..6d2838fc879 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif -obj-y := fault.o mem.o pgtable.o \ +obj-y := fault.o mem.o pgtable.o gup.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ @@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ - gup.o mmap.o $(hash-y) + mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 91c7b8636b8..76993941cac 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -253,45 +253,33 @@ good_area: #endif /* CONFIG_8xx */ if (is_exec) { -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - /* protection fault */ +#ifdef CONFIG_PPC_STD_MMU + /* Protection fault on exec go straight to failure on + * Hash based MMUs as they either don't support per-page + * execute permission, or if they do, it's handled already + * at the hash level. This test would probably have to + * be removed if we change the way this works to make hash + * processors use the same I/D cache coherency mechanism + * as embedded. + */ if (error_code & DSISR_PROTFAULT) goto bad_area; +#endif /* CONFIG_PPC_STD_MMU */ + /* * Allow execution from readable areas if the MMU does not * provide separate controls over reading and executing. + * + * Note: That code used to not be enabled for 4xx/BookE. + * It is now as I/D cache coherency for these is done at + * set_pte_at() time and I see no reason why the test + * below wouldn't be valid on those processors. This -may- + * break programs compiled with a really old ABI though. */ if (!(vma->vm_flags & VM_EXEC) && (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; -#else - pte_t *ptep; - pmd_t *pmdp; - - /* Since 4xx/Book-E supports per-page execute permission, - * we lazily flush dcache to icache. */ - ptep = NULL; - if (get_pteptr(mm, address, &ptep, &pmdp)) { - spinlock_t *ptl = pte_lockptr(mm, pmdp); - spin_lock(ptl); - if (pte_present(*ptep)) { - struct page *page = pte_page(*ptep); - - if (!test_bit(PG_arch_1, &page->flags)) { - flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); - } - pte_update(ptep, 0, _PAGE_HWEXEC | - _PAGE_ACCESSED); - local_flush_tlb_page(vma, address); - pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); - return 0; - } - pte_unmap_unlock(ptep, ptl); - } -#endif /* a write */ } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index ea6e41e39d9..985b6c361ab 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -56,10 +56,14 @@ extern void loadcam_entry(unsigned int index); unsigned int tlbcam_index; -static unsigned long __cam0, __cam1, __cam2; +static unsigned long cam[CONFIG_LOWMEM_CAM_NUM]; #define NUM_TLBCAMS (16) +#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) +#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" +#endif + struct tlbcam TLBCAM[NUM_TLBCAMS]; struct tlbcamrange { @@ -107,7 +111,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned int tsize, lz; asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); - tsize = (21 - lz) / 2; + tsize = 21 - lz; #ifdef CONFIG_SMP if ((flags & _PAGE_NO_CACHE) == 0) @@ -152,19 +156,19 @@ void invalidate_tlbcam_entry(int index) loadcam_entry(index); } -void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, - unsigned long cam2) +unsigned long __init mmu_mapin_ram(void) { - settlbcam(0, PAGE_OFFSET, memstart_addr, cam0, _PAGE_KERNEL, 0); - tlbcam_index++; - if (cam1) { - tlbcam_index++; - settlbcam(1, PAGE_OFFSET+cam0, memstart_addr+cam0, cam1, _PAGE_KERNEL, 0); - } - if (cam2) { + unsigned long virt = PAGE_OFFSET; + phys_addr_t phys = memstart_addr; + + while (cam[tlbcam_index] && tlbcam_index < ARRAY_SIZE(cam)) { + settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], _PAGE_KERNEL, 0); + virt += cam[tlbcam_index]; + phys += cam[tlbcam_index]; tlbcam_index++; - settlbcam(2, PAGE_OFFSET+cam0+cam1, memstart_addr+cam0+cam1, cam2, _PAGE_KERNEL, 0); } + + return virt - PAGE_OFFSET; } /* @@ -175,51 +179,46 @@ void __init MMU_init_hw(void) flush_instruction_cache(); } -unsigned long __init mmu_mapin_ram(void) -{ - cam_mapin_ram(__cam0, __cam1, __cam2); - - return __cam0 + __cam1 + __cam2; -} - - void __init adjust_total_lowmem(void) { - phys_addr_t max_lowmem_size = __max_low_memory; - phys_addr_t cam_max_size = 0x10000000; phys_addr_t ram; + unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff; + char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf; + int i; + unsigned long virt = PAGE_OFFSET & 0xffffffffUL; + unsigned long phys = memstart_addr & 0xffffffffUL; - /* adjust CAM size to max_lowmem_size */ - if (max_lowmem_size < cam_max_size) - cam_max_size = max_lowmem_size; + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = max_cam * 2 + 10; - /* adjust lowmem size to max_lowmem_size */ - ram = min(max_lowmem_size, total_lowmem); + /* adjust lowmem size to __max_low_memory */ + ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); /* Calculate CAM values */ - __cam0 = 1UL << 2 * (__ilog2(ram) / 2); - if (__cam0 > cam_max_size) - __cam0 = cam_max_size; - ram -= __cam0; - if (ram) { - __cam1 = 1UL << 2 * (__ilog2(ram) / 2); - if (__cam1 > cam_max_size) - __cam1 = cam_max_size; - ram -= __cam1; - } - if (ram) { - __cam2 = 1UL << 2 * (__ilog2(ram) / 2); - if (__cam2 > cam_max_size) - __cam2 = cam_max_size; - ram -= __cam2; + __max_low_memory = 0; + for (i = 0; ram && i < ARRAY_SIZE(cam); i++) { + unsigned int camsize = __ilog2(ram) & ~1U; + unsigned int align = __ffs(virt | phys) & ~1U; + + if (camsize > align) + camsize = align; + if (camsize > max_cam) + camsize = max_cam; + + cam[i] = 1UL << camsize; + ram -= cam[i]; + __max_low_memory += cam[i]; + virt += cam[i]; + phys += cam[i]; + + p += sprintf(p, "%lu/", cam[i] >> 20); } + for (; i < ARRAY_SIZE(cam); i++) + p += sprintf(p, "0/"); + p[-1] = '\0'; - printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb," - " CAM2=%ldMb residual: %ldMb\n", - __cam0 >> 20, __cam1 >> 20, __cam2 >> 20, - (long int)((total_lowmem - __cam0 - __cam1 - __cam2) - >> 20)); - __max_low_memory = __cam0 + __cam1 + __cam2; + pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf, + (unsigned int)((total_lowmem - __max_low_memory) >> 20)); __initial_memory_limit_addr = memstart_addr + __max_low_memory; } diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 28a114db3ba..bc400c78c97 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -14,6 +14,8 @@ #include <linux/rwsem.h> #include <asm/pgtable.h> +#ifdef __HAVE_ARCH_PTE_SPECIAL + /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -151,8 +153,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; - int psize, nr = 0; + int nr = 0; +#ifdef CONFIG_PPC64 unsigned int shift; + int psize; +#endif pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); @@ -205,8 +210,13 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, */ local_irq_disable(); +#ifdef CONFIG_PPC64 + /* Those bits are related to hugetlbfs implementation and only exist + * on 64-bit for now + */ psize = get_slice_psize(mm, addr); shift = mmu_psize_defs[psize].shift; +#endif /* CONFIG_PPC64 */ #ifdef CONFIG_HUGETLB_PAGE if (unlikely(mmu_huge_psizes[psize])) { @@ -236,7 +246,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, do { pgd_t pgd = *pgdp; +#ifdef CONFIG_PPC64 VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); +#endif pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd_val(pgd)); next = pgd_addr_end(addr, end); @@ -279,3 +291,5 @@ slow_irqon: return ret; } } + +#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8d5b4758c13..f5bc1b213f2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -516,7 +516,7 @@ static int __init htab_dt_scan_pftsize(unsigned long node, static unsigned long __init htab_get_table_size(void) { - unsigned long mem_size, rnd_mem_size, pteg_count; + unsigned long mem_size, rnd_mem_size, pteg_count, psize; /* If hash size isn't already provided by the platform, we try to * retrieve it from the device-tree. If it's not there neither, we @@ -534,7 +534,8 @@ static unsigned long __init htab_get_table_size(void) rnd_mem_size <<= 1; /* # pages / 2 */ - pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); + psize = mmu_psize_defs[mmu_virtual_psize].shift; + pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); return pteg_count << 7; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f00f09a77f1..f668fa9ba80 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -472,40 +472,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, { #ifdef CONFIG_PPC_STD_MMU unsigned long access = 0, trap; -#endif - unsigned long pfn = pte_pfn(pte); - - /* handle i-cache coherency */ - if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && - !cpu_has_feature(CPU_FTR_NOEXECUTE) && - pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); -#ifdef CONFIG_8xx - /* On 8xx, cache control instructions (particularly - * "dcbst" from flush_dcache_icache) fault as write - * operation if there is an unpopulated TLB entry - * for the address in question. To workaround that, - * we invalidate the TLB here, thus avoiding dcbst - * misbehaviour. - */ - _tlbil_va(address, 0 /* 8xx doesn't care about PID */); -#endif - /* The _PAGE_USER test should really be _PAGE_EXEC, but - * older glibc versions execute some code from no-exec - * pages, which for now we are supporting. If exec-only - * pages are ever implemented, this will have to change. - */ - if (!PageReserved(page) && (pte_val(pte) & _PAGE_USER) - && !test_bit(PG_arch_1, &page->flags)) { - if (vma->vm_mm == current->active_mm) { - __flush_dcache_icache((void *) address); - } else - flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); - } - } -#ifdef CONFIG_PPC_STD_MMU /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(pte) || address >= TASK_SIZE) return; diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 86010fc7d3b..0d957a4c70f 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -24,36 +24,26 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> /* * Top of mmap area (just below the process stack). * - * Leave an at least ~128 MB hole. + * Leave at least a ~128 MB hole on 32bit applications. + * + * On 64bit applications we randomise the stack by 1GB so we need to + * space our mmap start address by a further 1GB, otherwise there is a + * chance the mmap area will end up closer to the stack than our ulimit + * requires. */ -#define MIN_GAP (128*1024*1024) +#define MIN_GAP32 (128*1024*1024) +#define MIN_GAP64 ((128 + 1024)*1024*1024UL) +#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64) #define MAX_GAP (TASK_SIZE/6*5) -static inline unsigned long mmap_base(void) -{ - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return TASK_SIZE - (gap & PAGE_MASK); -} - static inline int mmap_is_legacy(void) { - /* - * Force standard allocation for 64 bit programs. - */ - if (!test_thread_flag(TIF_32BIT)) - return 1; - if (current->personality & ADDR_COMPAT_LAYOUT) return 1; @@ -64,6 +54,40 @@ static inline int mmap_is_legacy(void) } /* + * Since get_random_int() returns the same value within a 1 jiffy window, + * we will almost always get the same randomisation for the stack and mmap + * region. This will mean the relative distance between stack and mmap will + * be the same. + * + * To avoid this we can shift the randomness by 1 bit. + */ +static unsigned long mmap_rnd(void) +{ + unsigned long rnd = 0; + + if (current->flags & PF_RANDOMIZE) { + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + rnd = (long)(get_random_int() % (1<<(22-PAGE_SHIFT))); + else + rnd = (long)(get_random_int() % (1<<(29-PAGE_SHIFT))); + } + return (rnd << PAGE_SHIFT) * 2; +} + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); +} + +/* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5ac08b8ab65..9047145095a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -158,35 +158,6 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) -{ - unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); - struct device_node *cpu_node = NULL; - const unsigned int *interrupt_server, *reg; - int len; - - while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { - /* Try interrupt server first */ - interrupt_server = of_get_property(cpu_node, - "ibm,ppc-interrupt-server#s", &len); - - len = len / sizeof(u32); - - if (interrupt_server && (len > 0)) { - while (len--) { - if (interrupt_server[len] == hw_cpuid) - return cpu_node; - } - } else { - reg = of_get_property(cpu_node, "reg", &len); - if (reg && (len > 0) && (reg[0] == hw_cpuid)) - return cpu_node; - } - } - - return NULL; -} - /* must hold reference to node during call */ static const int *of_get_associativity(struct device_node *dev) { @@ -290,7 +261,7 @@ static int __init find_min_common_depth(void) ref_points = of_get_property(rtas_root, "ibm,associativity-reference-points", &len); - if ((len >= 1) && ref_points) { + if ((len >= 2 * sizeof(unsigned int)) && ref_points) { depth = ref_points[1]; } else { dbg("NUMA: ibm,associativity-reference-points not found.\n"); @@ -470,7 +441,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, static int __cpuinit numa_setup_cpu(unsigned long lcpu) { int nid = 0; - struct device_node *cpu = find_cpu_node(lcpu); + struct device_node *cpu = of_get_cpu_node(lcpu, NULL); if (!cpu) { WARN_ON(1); @@ -652,7 +623,7 @@ static int __init parse_numa_properties(void) for_each_present_cpu(i) { int nid; - cpu = find_cpu_node(i); + cpu = of_get_cpu_node(i, NULL); BUG_ON(!cpu); nid = of_node_to_nid_single(cpu); of_node_put(cpu); @@ -1041,57 +1012,32 @@ early_param("numa", early_numa); #ifdef CONFIG_MEMORY_HOTPLUG /* - * Validate the node associated with the memory section we are - * trying to add. - */ -int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, - unsigned long scn_addr) -{ - nodemask_t nodes; - - if (*nid < 0 || !node_online(*nid)) - *nid = any_online_node(NODE_MASK_ALL); - - if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { - nodes_setall(nodes); - while (NODE_DATA(*nid)->node_spanned_pages == 0) { - node_clear(*nid, nodes); - *nid = any_online_node(nodes); - } - - return 1; - } - - return 0; -} - -/* - * Find the node associated with a hot added memory section represented - * by the ibm,dynamic-reconfiguration-memory node. + * Find the node associated with a hot added memory section for + * memory represented in the device tree by the property + * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. */ static int hot_add_drconf_scn_to_nid(struct device_node *memory, unsigned long scn_addr) { const u32 *dm; - unsigned int n, rc; + unsigned int drconf_cell_cnt, rc; unsigned long lmb_size; - int default_nid = any_online_node(NODE_MASK_ALL); - int nid; struct assoc_arrays aa; + int nid = -1; - n = of_get_drconf_memory(memory, &dm); - if (!n) - return default_nid;; + drconf_cell_cnt = of_get_drconf_memory(memory, &dm); + if (!drconf_cell_cnt) + return -1; lmb_size = of_get_lmb_size(memory); if (!lmb_size) - return default_nid; + return -1; rc = of_get_assoc_arrays(memory, &aa); if (rc) - return default_nid; + return -1; - for (; n != 0; --n) { + for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { struct of_drconf_cell drmem; read_drconf_cell(&drmem, &dm); @@ -1102,15 +1048,57 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, || !(drmem.flags & DRCONF_MEM_ASSIGNED)) continue; + if ((scn_addr < drmem.base_addr) + || (scn_addr >= (drmem.base_addr + lmb_size))) + continue; + nid = of_drconf_to_nid_single(&drmem, &aa); + break; + } + + return nid; +} - if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, - scn_addr)) - return nid; +/* + * Find the node associated with a hot added memory section for memory + * represented in the device tree as a node (i.e. memory@XXXX) for + * each lmb. + */ +int hot_add_node_scn_to_nid(unsigned long scn_addr) +{ + struct device_node *memory = NULL; + int nid = -1; + + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long start, size; + int ranges; + const unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = of_get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + /* ranges in cell */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); + + while (ranges--) { + start = read_n_cells(n_mem_addr_cells, &memcell_buf); + size = read_n_cells(n_mem_size_cells, &memcell_buf); + + if ((scn_addr < start) || (scn_addr >= (start + size))) + continue; + + nid = of_node_to_nid_single(memory); + break; + } + + of_node_put(memory); + if (nid >= 0) + break; } - BUG(); /* section address should be found above */ - return 0; + return nid; } /* @@ -1121,7 +1109,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - int nid; + int nid, found = 0; if (!numa_enabled || (min_common_depth < 0)) return any_online_node(NODE_MASK_ALL); @@ -1130,35 +1118,25 @@ int hot_add_scn_to_nid(unsigned long scn_addr) if (memory) { nid = hot_add_drconf_scn_to_nid(memory, scn_addr); of_node_put(memory); - return nid; + } else { + nid = hot_add_node_scn_to_nid(scn_addr); } - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { - unsigned long start, size; - int ranges; - const unsigned int *memcell_buf; - unsigned int len; - - memcell_buf = of_get_property(memory, "reg", &len); - if (!memcell_buf || len <= 0) - continue; + if (nid < 0 || !node_online(nid)) + nid = any_online_node(NODE_MASK_ALL); - /* ranges in cell */ - ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); -ha_new_range: - start = read_n_cells(n_mem_addr_cells, &memcell_buf); - size = read_n_cells(n_mem_size_cells, &memcell_buf); - nid = of_node_to_nid_single(memory); + if (NODE_DATA(nid)->node_spanned_pages) + return nid; - if (valid_hot_add_scn(&nid, start, size, scn_addr)) { - of_node_put(memory); - return nid; + for_each_online_node(nid) { + if (NODE_DATA(nid)->node_spanned_pages) { + found = 1; + break; } - - if (--ranges) /* process all ranges in cell */ - goto ha_new_range; } - BUG(); /* section address should be found above */ - return 0; + + BUG_ON(!found); + return nid; } + #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 6d94116fdea..a27ded3adac 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -1,5 +1,6 @@ /* * This file contains common routines for dealing with free of page tables + * Along with common page table handling code * * Derived from arch/powerpc/mm/tlb_64.c: * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -115,3 +116,133 @@ void pte_free_finish(void) pte_free_submit(*batchp); *batchp = NULL; } + +/* + * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags() + */ +static pte_t do_dcache_icache_coherency(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + struct page *page; + + if (unlikely(!pfn_valid(pfn))) + return pte; + page = pfn_to_page(pfn); + + if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) { + pr_debug("do_dcache_icache_coherency... flushing\n"); + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + else + pr_debug("do_dcache_icache_coherency... already clean\n"); + return __pte(pte_val(pte) | _PAGE_HWEXEC); +} + +static inline int is_exec_fault(void) +{ + return current->thread.regs && TRAP(current->thread.regs) == 0x400; +} + +/* We only try to do i/d cache coherency on stuff that looks like + * reasonably "normal" PTEs. We currently require a PTE to be present + * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE + */ +static inline int pte_looks_normal(pte_t pte) +{ + return (pte_val(pte) & + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == + (_PAGE_PRESENT); +} + +#if defined(CONFIG_PPC_STD_MMU) +/* Server-style MMU handles coherency when hashing if HW exec permission + * is supposed per page (currently 64-bit only). Else, we always flush + * valid PTEs in set_pte. + */ +static inline int pte_need_exec_flush(pte_t pte, int set_pte) +{ + return set_pte && pte_looks_normal(pte) && + !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || + cpu_has_feature(CPU_FTR_NOEXECUTE)); +} +#elif _PAGE_HWEXEC == 0 +/* Embedded type MMU without HW exec support (8xx only so far), we flush + * the cache for any present PTE + */ +static inline int pte_need_exec_flush(pte_t pte, int set_pte) +{ + return set_pte && pte_looks_normal(pte); +} +#else +/* Other embedded CPUs with HW exec support per-page, we flush on exec + * fault if HWEXEC is not set + */ +static inline int pte_need_exec_flush(pte_t pte, int set_pte) +{ + return pte_looks_normal(pte) && is_exec_fault() && + !(pte_val(pte) & _PAGE_HWEXEC); +} +#endif + +/* + * set_pte stores a linux PTE into the linux page table. + */ +void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_DEBUG_VM + WARN_ON(pte_present(*ptep)); +#endif + /* Note: mm->context.id might not yet have been assigned as + * this context might not have been activated yet when this + * is called. + */ + pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + if (pte_need_exec_flush(pte, 1)) + pte = do_dcache_icache_coherency(pte); + + /* Perform the setting of the PTE */ + __set_pte_at(mm, addr, ptep, pte, 0); +} + +/* + * This is called when relaxing access to a PTE. It's also called in the page + * fault path when we don't hit any of the major fault cases, ie, a minor + * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have + * handled those two for us, we additionally deal with missing execute + * permission here on some processors + */ +int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty) +{ + int changed; + if (!dirty && pte_need_exec_flush(entry, 0)) + entry = do_dcache_icache_coherency(entry); + changed = !pte_same(*(ptep), entry); + if (changed) { + assert_pte_locked(vma->vm_mm, address); + __ptep_set_access_flags(ptep, entry); + flush_tlb_page_nohash(vma, address); + } + return changed; +} + +#ifdef CONFIG_DEBUG_VM +void assert_pte_locked(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + if (mm == &init_mm) + return; + pgd = mm->pgd + pgd_index(addr); + BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, addr); + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, addr); + BUG_ON(!pmd_present(*pmd)); + BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd))); +} +#endif /* CONFIG_DEBUG_VM */ + diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 58bcaeba728..0f8c4371dfa 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -129,7 +129,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) void __iomem * ioremap(phys_addr_t addr, unsigned long size) { - return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); + return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED, + __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap); @@ -143,13 +144,20 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); - return __ioremap(addr, size, flags); + return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_flags); void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) { + return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); +} + +void __iomem * +__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, + void *caller) +{ unsigned long v, i; phys_addr_t p; int err; @@ -212,7 +220,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) if (mem_init_done) { struct vm_struct *area; - area = get_vm_area(size, VM_IOREMAP); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (area == 0) return NULL; v = (unsigned long) area->addr; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 365e61ae5db..bfa7db6b2fd 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -144,8 +144,8 @@ void __iounmap_at(void *ea, unsigned long size) unmap_kernel_range((unsigned long)ea, size); } -void __iomem * __ioremap(phys_addr_t addr, unsigned long size, - unsigned long flags) +void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, + unsigned long flags, void *caller) { phys_addr_t paligned; void __iomem *ret; @@ -168,8 +168,9 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size, if (mem_init_done) { struct vm_struct *area; - area = __get_vm_area(size, VM_IOREMAP, - ioremap_bot, IOREMAP_END); + area = __get_vm_area_caller(size, VM_IOREMAP, + ioremap_bot, IOREMAP_END, + caller); if (area == NULL) return NULL; ret = __ioremap_at(paligned, area->addr, size, flags); @@ -186,19 +187,27 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size, return ret; } +void __iomem * __ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags) +{ + return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); +} void __iomem * ioremap(phys_addr_t addr, unsigned long size) { unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED; + void *caller = __builtin_return_address(0); if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, flags); - return __ioremap(addr, size, flags); + return ppc_md.ioremap(addr, size, flags, caller); + return __ioremap_caller(addr, size, flags, caller); } void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) { + void *caller = __builtin_return_address(0); + /* writeable implies dirty for kernel addresses */ if (flags & _PAGE_RW) flags |= _PAGE_DIRTY; @@ -207,8 +216,8 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, flags &= ~(_PAGE_USER | _PAGE_EXEC); if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, flags); - return __ioremap(addr, size, flags); + return ppc_md.ioremap(addr, size, flags, caller); + return __ioremap_caller(addr, size, flags, caller); } diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index f900a39e6ec..788b87c36f7 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -118,25 +118,50 @@ _GLOBAL(_tlbil_pid) #elif defined(CONFIG_FSL_BOOKE) /* - * FSL BookE implementations. Currently _pid and _all are the - * same. This will change when tlbilx is actually supported and - * performs invalidate-by-PID. This change will be driven by - * mmu_features conditional + * FSL BookE implementations. + * + * Since feature sections are using _SECTION_ELSE we need + * to have the larger code path before the _SECTION_ELSE */ +#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ + MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) /* * Flush MMU TLB on the local processor */ -_GLOBAL(_tlbil_pid) _GLOBAL(_tlbil_all) -#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ - MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) +BEGIN_MMU_FTR_SECTION + li r3,(MMUCSR0_TLBFI)@l + mtspr SPRN_MMUCSR0, r3 +1: + mfspr r3,SPRN_MMUCSR0 + andi. r3,r3,MMUCSR0_TLBFI@l + bne 1b +MMU_FTR_SECTION_ELSE + PPC_TLBILX_ALL(0,0) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) + msync + isync + blr + +_GLOBAL(_tlbil_pid) +BEGIN_MMU_FTR_SECTION + slwi r3,r3,16 + mfmsr r10 + wrteei 0 + mfspr r4,SPRN_MAS6 /* save MAS6 */ + mtspr SPRN_MAS6,r3 + PPC_TLBILX_PID(0,0) + mtspr SPRN_MAS6,r4 /* restore MAS6 */ + wrtee r10 +MMU_FTR_SECTION_ELSE li r3,(MMUCSR0_TLBFI)@l mtspr SPRN_MMUCSR0, r3 1: mfspr r3,SPRN_MMUCSR0 andi. r3,r3,MMUCSR0_TLBFI@l bne 1b +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX) msync isync blr @@ -149,7 +174,9 @@ _GLOBAL(_tlbil_va) mfmsr r10 wrteei 0 slwi r4,r4,16 + ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ +BEGIN_MMU_FTR_SECTION tlbsx 0,r3 mfspr r4,SPRN_MAS1 /* check valid */ andis. r3,r4,MAS1_VALID@h @@ -157,6 +184,9 @@ _GLOBAL(_tlbil_va) rlwinm r4,r4,0,1,31 mtspr SPRN_MAS1,r4 tlbwe +MMU_FTR_SECTION_ELSE + PPC_TLBILX_VA(0,r3) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) msync isync 1: wrtee r10 |