aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/fault.c46
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c95
-rw-r--r--arch/powerpc/mm/gup.c16
-rw-r--r--arch/powerpc/mm/hash_utils_64.c5
-rw-r--r--arch/powerpc/mm/mem.c33
-rw-r--r--arch/powerpc/mm/mmap.c64
-rw-r--r--arch/powerpc/mm/numa.c174
-rw-r--r--arch/powerpc/mm/pgtable.c131
-rw-r--r--arch/powerpc/mm/pgtable_32.c14
-rw-r--r--arch/powerpc/mm/pgtable_64.c25
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S44
12 files changed, 400 insertions, 251 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 953cc4a1cde..6d2838fc879 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
endif
-obj-y := fault.o mem.o pgtable.o \
+obj-y := fault.o mem.o pgtable.o gup.o \
init_$(CONFIG_WORD_SIZE).o \
pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
@@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC64) += hash_utils_64.o \
slb_low.o slb.o stab.o \
- gup.o mmap.o $(hash-y)
+ mmap.o $(hash-y)
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
tlb_hash$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b8636b8..76993941cac 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,45 +253,33 @@ good_area:
#endif /* CONFIG_8xx */
if (is_exec) {
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- /* protection fault */
+#ifdef CONFIG_PPC_STD_MMU
+ /* Protection fault on exec go straight to failure on
+ * Hash based MMUs as they either don't support per-page
+ * execute permission, or if they do, it's handled already
+ * at the hash level. This test would probably have to
+ * be removed if we change the way this works to make hash
+ * processors use the same I/D cache coherency mechanism
+ * as embedded.
+ */
if (error_code & DSISR_PROTFAULT)
goto bad_area;
+#endif /* CONFIG_PPC_STD_MMU */
+
/*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
*/
if (!(vma->vm_flags & VM_EXEC) &&
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
-#else
- pte_t *ptep;
- pmd_t *pmdp;
-
- /* Since 4xx/Book-E supports per-page execute permission,
- * we lazily flush dcache to icache. */
- ptep = NULL;
- if (get_pteptr(mm, address, &ptep, &pmdp)) {
- spinlock_t *ptl = pte_lockptr(mm, pmdp);
- spin_lock(ptl);
- if (pte_present(*ptep)) {
- struct page *page = pte_page(*ptep);
-
- if (!test_bit(PG_arch_1, &page->flags)) {
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- }
- pte_update(ptep, 0, _PAGE_HWEXEC |
- _PAGE_ACCESSED);
- local_flush_tlb_page(vma, address);
- pte_unmap_unlock(ptep, ptl);
- up_read(&mm->mmap_sem);
- return 0;
- }
- pte_unmap_unlock(ptep, ptl);
- }
-#endif
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ea6e41e39d9..985b6c361ab 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -56,10 +56,14 @@
extern void loadcam_entry(unsigned int index);
unsigned int tlbcam_index;
-static unsigned long __cam0, __cam1, __cam2;
+static unsigned long cam[CONFIG_LOWMEM_CAM_NUM];
#define NUM_TLBCAMS (16)
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
struct tlbcam TLBCAM[NUM_TLBCAMS];
struct tlbcamrange {
@@ -107,7 +111,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys,
unsigned int tsize, lz;
asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
- tsize = (21 - lz) / 2;
+ tsize = 21 - lz;
#ifdef CONFIG_SMP
if ((flags & _PAGE_NO_CACHE) == 0)
@@ -152,19 +156,19 @@ void invalidate_tlbcam_entry(int index)
loadcam_entry(index);
}
-void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
- unsigned long cam2)
+unsigned long __init mmu_mapin_ram(void)
{
- settlbcam(0, PAGE_OFFSET, memstart_addr, cam0, _PAGE_KERNEL, 0);
- tlbcam_index++;
- if (cam1) {
- tlbcam_index++;
- settlbcam(1, PAGE_OFFSET+cam0, memstart_addr+cam0, cam1, _PAGE_KERNEL, 0);
- }
- if (cam2) {
+ unsigned long virt = PAGE_OFFSET;
+ phys_addr_t phys = memstart_addr;
+
+ while (cam[tlbcam_index] && tlbcam_index < ARRAY_SIZE(cam)) {
+ settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], _PAGE_KERNEL, 0);
+ virt += cam[tlbcam_index];
+ phys += cam[tlbcam_index];
tlbcam_index++;
- settlbcam(2, PAGE_OFFSET+cam0+cam1, memstart_addr+cam0+cam1, cam2, _PAGE_KERNEL, 0);
}
+
+ return virt - PAGE_OFFSET;
}
/*
@@ -175,51 +179,46 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
-unsigned long __init mmu_mapin_ram(void)
-{
- cam_mapin_ram(__cam0, __cam1, __cam2);
-
- return __cam0 + __cam1 + __cam2;
-}
-
-
void __init
adjust_total_lowmem(void)
{
- phys_addr_t max_lowmem_size = __max_low_memory;
- phys_addr_t cam_max_size = 0x10000000;
phys_addr_t ram;
+ unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff;
+ char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf;
+ int i;
+ unsigned long virt = PAGE_OFFSET & 0xffffffffUL;
+ unsigned long phys = memstart_addr & 0xffffffffUL;
- /* adjust CAM size to max_lowmem_size */
- if (max_lowmem_size < cam_max_size)
- cam_max_size = max_lowmem_size;
+ /* Convert (4^max) kB to (2^max) bytes */
+ max_cam = max_cam * 2 + 10;
- /* adjust lowmem size to max_lowmem_size */
- ram = min(max_lowmem_size, total_lowmem);
+ /* adjust lowmem size to __max_low_memory */
+ ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
/* Calculate CAM values */
- __cam0 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam0 > cam_max_size)
- __cam0 = cam_max_size;
- ram -= __cam0;
- if (ram) {
- __cam1 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam1 > cam_max_size)
- __cam1 = cam_max_size;
- ram -= __cam1;
- }
- if (ram) {
- __cam2 = 1UL << 2 * (__ilog2(ram) / 2);
- if (__cam2 > cam_max_size)
- __cam2 = cam_max_size;
- ram -= __cam2;
+ __max_low_memory = 0;
+ for (i = 0; ram && i < ARRAY_SIZE(cam); i++) {
+ unsigned int camsize = __ilog2(ram) & ~1U;
+ unsigned int align = __ffs(virt | phys) & ~1U;
+
+ if (camsize > align)
+ camsize = align;
+ if (camsize > max_cam)
+ camsize = max_cam;
+
+ cam[i] = 1UL << camsize;
+ ram -= cam[i];
+ __max_low_memory += cam[i];
+ virt += cam[i];
+ phys += cam[i];
+
+ p += sprintf(p, "%lu/", cam[i] >> 20);
}
+ for (; i < ARRAY_SIZE(cam); i++)
+ p += sprintf(p, "0/");
+ p[-1] = '\0';
- printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
- " CAM2=%ldMb residual: %ldMb\n",
- __cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
- (long int)((total_lowmem - __cam0 - __cam1 - __cam2)
- >> 20));
- __max_low_memory = __cam0 + __cam1 + __cam2;
+ pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf,
+ (unsigned int)((total_lowmem - __max_low_memory) >> 20));
__initial_memory_limit_addr = memstart_addr + __max_low_memory;
}
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index 28a114db3ba..bc400c78c97 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -14,6 +14,8 @@
#include <linux/rwsem.h>
#include <asm/pgtable.h>
+#ifdef __HAVE_ARCH_PTE_SPECIAL
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -151,8 +153,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
unsigned long addr, len, end;
unsigned long next;
pgd_t *pgdp;
- int psize, nr = 0;
+ int nr = 0;
+#ifdef CONFIG_PPC64
unsigned int shift;
+ int psize;
+#endif
pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
@@ -205,8 +210,13 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
*/
local_irq_disable();
+#ifdef CONFIG_PPC64
+ /* Those bits are related to hugetlbfs implementation and only exist
+ * on 64-bit for now
+ */
psize = get_slice_psize(mm, addr);
shift = mmu_psize_defs[psize].shift;
+#endif /* CONFIG_PPC64 */
#ifdef CONFIG_HUGETLB_PAGE
if (unlikely(mmu_huge_psizes[psize])) {
@@ -236,7 +246,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
do {
pgd_t pgd = *pgdp;
+#ifdef CONFIG_PPC64
VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
+#endif
pr_debug(" %016lx: normal pgd %p\n", addr,
(void *)pgd_val(pgd));
next = pgd_addr_end(addr, end);
@@ -279,3 +291,5 @@ slow_irqon:
return ret;
}
}
+
+#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8d5b4758c13..f5bc1b213f2 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -516,7 +516,7 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
static unsigned long __init htab_get_table_size(void)
{
- unsigned long mem_size, rnd_mem_size, pteg_count;
+ unsigned long mem_size, rnd_mem_size, pteg_count, psize;
/* If hash size isn't already provided by the platform, we try to
* retrieve it from the device-tree. If it's not there neither, we
@@ -534,7 +534,8 @@ static unsigned long __init htab_get_table_size(void)
rnd_mem_size <<= 1;
/* # pages / 2 */
- pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
+ psize = mmu_psize_defs[mmu_virtual_psize].shift;
+ pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11);
return pteg_count << 7;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f00f09a77f1..f668fa9ba80 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -472,40 +472,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
{
#ifdef CONFIG_PPC_STD_MMU
unsigned long access = 0, trap;
-#endif
- unsigned long pfn = pte_pfn(pte);
-
- /* handle i-cache coherency */
- if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
- !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
- pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
-#ifdef CONFIG_8xx
- /* On 8xx, cache control instructions (particularly
- * "dcbst" from flush_dcache_icache) fault as write
- * operation if there is an unpopulated TLB entry
- * for the address in question. To workaround that,
- * we invalidate the TLB here, thus avoiding dcbst
- * misbehaviour.
- */
- _tlbil_va(address, 0 /* 8xx doesn't care about PID */);
-#endif
- /* The _PAGE_USER test should really be _PAGE_EXEC, but
- * older glibc versions execute some code from no-exec
- * pages, which for now we are supporting. If exec-only
- * pages are ever implemented, this will have to change.
- */
- if (!PageReserved(page) && (pte_val(pte) & _PAGE_USER)
- && !test_bit(PG_arch_1, &page->flags)) {
- if (vma->vm_mm == current->active_mm) {
- __flush_dcache_icache((void *) address);
- } else
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- }
- }
-#ifdef CONFIG_PPC_STD_MMU
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 86010fc7d3b..0d957a4c70f 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -24,36 +24,26 @@
#include <linux/personality.h>
#include <linux/mm.h>
+#include <linux/random.h>
#include <linux/sched.h>
/*
* Top of mmap area (just below the process stack).
*
- * Leave an at least ~128 MB hole.
+ * Leave at least a ~128 MB hole on 32bit applications.
+ *
+ * On 64bit applications we randomise the stack by 1GB so we need to
+ * space our mmap start address by a further 1GB, otherwise there is a
+ * chance the mmap area will end up closer to the stack than our ulimit
+ * requires.
*/
-#define MIN_GAP (128*1024*1024)
+#define MIN_GAP32 (128*1024*1024)
+#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
+#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
#define MAX_GAP (TASK_SIZE/6*5)
-static inline unsigned long mmap_base(void)
-{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return TASK_SIZE - (gap & PAGE_MASK);
-}
-
static inline int mmap_is_legacy(void)
{
- /*
- * Force standard allocation for 64 bit programs.
- */
- if (!test_thread_flag(TIF_32BIT))
- return 1;
-
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
@@ -64,6 +54,40 @@ static inline int mmap_is_legacy(void)
}
/*
+ * Since get_random_int() returns the same value within a 1 jiffy window,
+ * we will almost always get the same randomisation for the stack and mmap
+ * region. This will mean the relative distance between stack and mmap will
+ * be the same.
+ *
+ * To avoid this we can shift the randomness by 1 bit.
+ */
+static unsigned long mmap_rnd(void)
+{
+ unsigned long rnd = 0;
+
+ if (current->flags & PF_RANDOMIZE) {
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ rnd = (long)(get_random_int() % (1<<(22-PAGE_SHIFT)));
+ else
+ rnd = (long)(get_random_int() % (1<<(29-PAGE_SHIFT)));
+ }
+ return (rnd << PAGE_SHIFT) * 2;
+}
+
+static inline unsigned long mmap_base(void)
+{
+ unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+}
+
+/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ac08b8ab65..9047145095a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -158,35 +158,6 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
-{
- unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
- struct device_node *cpu_node = NULL;
- const unsigned int *interrupt_server, *reg;
- int len;
-
- while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
- /* Try interrupt server first */
- interrupt_server = of_get_property(cpu_node,
- "ibm,ppc-interrupt-server#s", &len);
-
- len = len / sizeof(u32);
-
- if (interrupt_server && (len > 0)) {
- while (len--) {
- if (interrupt_server[len] == hw_cpuid)
- return cpu_node;
- }
- } else {
- reg = of_get_property(cpu_node, "reg", &len);
- if (reg && (len > 0) && (reg[0] == hw_cpuid))
- return cpu_node;
- }
- }
-
- return NULL;
-}
-
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
{
@@ -290,7 +261,7 @@ static int __init find_min_common_depth(void)
ref_points = of_get_property(rtas_root,
"ibm,associativity-reference-points", &len);
- if ((len >= 1) && ref_points) {
+ if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
depth = ref_points[1];
} else {
dbg("NUMA: ibm,associativity-reference-points not found.\n");
@@ -470,7 +441,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
static int __cpuinit numa_setup_cpu(unsigned long lcpu)
{
int nid = 0;
- struct device_node *cpu = find_cpu_node(lcpu);
+ struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
if (!cpu) {
WARN_ON(1);
@@ -652,7 +623,7 @@ static int __init parse_numa_properties(void)
for_each_present_cpu(i) {
int nid;
- cpu = find_cpu_node(i);
+ cpu = of_get_cpu_node(i, NULL);
BUG_ON(!cpu);
nid = of_node_to_nid_single(cpu);
of_node_put(cpu);
@@ -1041,57 +1012,32 @@ early_param("numa", early_numa);
#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * Validate the node associated with the memory section we are
- * trying to add.
- */
-int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size,
- unsigned long scn_addr)
-{
- nodemask_t nodes;
-
- if (*nid < 0 || !node_online(*nid))
- *nid = any_online_node(NODE_MASK_ALL);
-
- if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) {
- nodes_setall(nodes);
- while (NODE_DATA(*nid)->node_spanned_pages == 0) {
- node_clear(*nid, nodes);
- *nid = any_online_node(nodes);
- }
-
- return 1;
- }
-
- return 0;
-}
-
-/*
- * Find the node associated with a hot added memory section represented
- * by the ibm,dynamic-reconfiguration-memory node.
+ * Find the node associated with a hot added memory section for
+ * memory represented in the device tree by the property
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
*/
static int hot_add_drconf_scn_to_nid(struct device_node *memory,
unsigned long scn_addr)
{
const u32 *dm;
- unsigned int n, rc;
+ unsigned int drconf_cell_cnt, rc;
unsigned long lmb_size;
- int default_nid = any_online_node(NODE_MASK_ALL);
- int nid;
struct assoc_arrays aa;
+ int nid = -1;
- n = of_get_drconf_memory(memory, &dm);
- if (!n)
- return default_nid;;
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ if (!drconf_cell_cnt)
+ return -1;
lmb_size = of_get_lmb_size(memory);
if (!lmb_size)
- return default_nid;
+ return -1;
rc = of_get_assoc_arrays(memory, &aa);
if (rc)
- return default_nid;
+ return -1;
- for (; n != 0; --n) {
+ for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
struct of_drconf_cell drmem;
read_drconf_cell(&drmem, &dm);
@@ -1102,15 +1048,57 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
|| !(drmem.flags & DRCONF_MEM_ASSIGNED))
continue;
+ if ((scn_addr < drmem.base_addr)
+ || (scn_addr >= (drmem.base_addr + lmb_size)))
+ continue;
+
nid = of_drconf_to_nid_single(&drmem, &aa);
+ break;
+ }
+
+ return nid;
+}
- if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size,
- scn_addr))
- return nid;
+/*
+ * Find the node associated with a hot added memory section for memory
+ * represented in the device tree as a node (i.e. memory@XXXX) for
+ * each lmb.
+ */
+int hot_add_node_scn_to_nid(unsigned long scn_addr)
+{
+ struct device_node *memory = NULL;
+ int nid = -1;
+
+ while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+ unsigned long start, size;
+ int ranges;
+ const unsigned int *memcell_buf;
+ unsigned int len;
+
+ memcell_buf = of_get_property(memory, "reg", &len);
+ if (!memcell_buf || len <= 0)
+ continue;
+
+ /* ranges in cell */
+ ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+
+ while (ranges--) {
+ start = read_n_cells(n_mem_addr_cells, &memcell_buf);
+ size = read_n_cells(n_mem_size_cells, &memcell_buf);
+
+ if ((scn_addr < start) || (scn_addr >= (start + size)))
+ continue;
+
+ nid = of_node_to_nid_single(memory);
+ break;
+ }
+
+ of_node_put(memory);
+ if (nid >= 0)
+ break;
}
- BUG(); /* section address should be found above */
- return 0;
+ return nid;
}
/*
@@ -1121,7 +1109,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
int hot_add_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory = NULL;
- int nid;
+ int nid, found = 0;
if (!numa_enabled || (min_common_depth < 0))
return any_online_node(NODE_MASK_ALL);
@@ -1130,35 +1118,25 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
if (memory) {
nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
of_node_put(memory);
- return nid;
+ } else {
+ nid = hot_add_node_scn_to_nid(scn_addr);
}
- while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
- unsigned long start, size;
- int ranges;
- const unsigned int *memcell_buf;
- unsigned int len;
-
- memcell_buf = of_get_property(memory, "reg", &len);
- if (!memcell_buf || len <= 0)
- continue;
+ if (nid < 0 || !node_online(nid))
+ nid = any_online_node(NODE_MASK_ALL);
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-ha_new_range:
- start = read_n_cells(n_mem_addr_cells, &memcell_buf);
- size = read_n_cells(n_mem_size_cells, &memcell_buf);
- nid = of_node_to_nid_single(memory);
+ if (NODE_DATA(nid)->node_spanned_pages)
+ return nid;
- if (valid_hot_add_scn(&nid, start, size, scn_addr)) {
- of_node_put(memory);
- return nid;
+ for_each_online_node(nid) {
+ if (NODE_DATA(nid)->node_spanned_pages) {
+ found = 1;
+ break;
}
-
- if (--ranges) /* process all ranges in cell */
- goto ha_new_range;
}
- BUG(); /* section address should be found above */
- return 0;
+
+ BUG_ON(!found);
+ return nid;
}
+
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6d94116fdea..a27ded3adac 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -1,5 +1,6 @@
/*
* This file contains common routines for dealing with free of page tables
+ * Along with common page table handling code
*
* Derived from arch/powerpc/mm/tlb_64.c:
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -115,3 +116,133 @@ void pte_free_finish(void)
pte_free_submit(*batchp);
*batchp = NULL;
}
+
+/*
+ * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
+ */
+static pte_t do_dcache_icache_coherency(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+ struct page *page;
+
+ if (unlikely(!pfn_valid(pfn)))
+ return pte;
+ page = pfn_to_page(pfn);
+
+ if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
+ pr_debug("do_dcache_icache_coherency... flushing\n");
+ flush_dcache_icache_page(page);
+ set_bit(PG_arch_1, &page->flags);
+ }
+ else
+ pr_debug("do_dcache_icache_coherency... already clean\n");
+ return __pte(pte_val(pte) | _PAGE_HWEXEC);
+}
+
+static inline int is_exec_fault(void)
+{
+ return current->thread.regs && TRAP(current->thread.regs) == 0x400;
+}
+
+/* We only try to do i/d cache coherency on stuff that looks like
+ * reasonably "normal" PTEs. We currently require a PTE to be present
+ * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
+ */
+static inline int pte_looks_normal(pte_t pte)
+{
+ return (pte_val(pte) &
+ (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
+ (_PAGE_PRESENT);
+}
+
+#if defined(CONFIG_PPC_STD_MMU)
+/* Server-style MMU handles coherency when hashing if HW exec permission
+ * is supposed per page (currently 64-bit only). Else, we always flush
+ * valid PTEs in set_pte.
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return set_pte && pte_looks_normal(pte) &&
+ !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
+ cpu_has_feature(CPU_FTR_NOEXECUTE));
+}
+#elif _PAGE_HWEXEC == 0
+/* Embedded type MMU without HW exec support (8xx only so far), we flush
+ * the cache for any present PTE
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return set_pte && pte_looks_normal(pte);
+}
+#else
+/* Other embedded CPUs with HW exec support per-page, we flush on exec
+ * fault if HWEXEC is not set
+ */
+static inline int pte_need_exec_flush(pte_t pte, int set_pte)
+{
+ return pte_looks_normal(pte) && is_exec_fault() &&
+ !(pte_val(pte) & _PAGE_HWEXEC);
+}
+#endif
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(pte_present(*ptep));
+#endif
+ /* Note: mm->context.id might not yet have been assigned as
+ * this context might not have been activated yet when this
+ * is called.
+ */
+ pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+ if (pte_need_exec_flush(pte, 1))
+ pte = do_dcache_icache_coherency(pte);
+
+ /* Perform the setting of the PTE */
+ __set_pte_at(mm, addr, ptep, pte, 0);
+}
+
+/*
+ * This is called when relaxing access to a PTE. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, pte_t entry, int dirty)
+{
+ int changed;
+ if (!dirty && pte_need_exec_flush(entry, 0))
+ entry = do_dcache_icache_coherency(entry);
+ changed = !pte_same(*(ptep), entry);
+ if (changed) {
+ assert_pte_locked(vma->vm_mm, address);
+ __ptep_set_access_flags(ptep, entry);
+ flush_tlb_page_nohash(vma, address);
+ }
+ return changed;
+}
+
+#ifdef CONFIG_DEBUG_VM
+void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (mm == &init_mm)
+ return;
+ pgd = mm->pgd + pgd_index(addr);
+ BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, addr);
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, addr);
+ BUG_ON(!pmd_present(*pmd));
+ BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
+}
+#endif /* CONFIG_DEBUG_VM */
+
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 58bcaeba728..0f8c4371dfa 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -129,7 +129,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
void __iomem *
ioremap(phys_addr_t addr, unsigned long size)
{
- return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap);
@@ -143,13 +144,20 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC);
- return __ioremap(addr, size, flags);
+ return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_flags);
void __iomem *
__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
{
+ return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+}
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
+ void *caller)
+{
unsigned long v, i;
phys_addr_t p;
int err;
@@ -212,7 +220,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
if (mem_init_done) {
struct vm_struct *area;
- area = get_vm_area(size, VM_IOREMAP);
+ area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (area == 0)
return NULL;
v = (unsigned long) area->addr;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 365e61ae5db..bfa7db6b2fd 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -144,8 +144,8 @@ void __iounmap_at(void *ea, unsigned long size)
unmap_kernel_range((unsigned long)ea, size);
}
-void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags)
+void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
+ unsigned long flags, void *caller)
{
phys_addr_t paligned;
void __iomem *ret;
@@ -168,8 +168,9 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
if (mem_init_done) {
struct vm_struct *area;
- area = __get_vm_area(size, VM_IOREMAP,
- ioremap_bot, IOREMAP_END);
+ area = __get_vm_area_caller(size, VM_IOREMAP,
+ ioremap_bot, IOREMAP_END,
+ caller);
if (area == NULL)
return NULL;
ret = __ioremap_at(paligned, area->addr, size, flags);
@@ -186,19 +187,27 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
return ret;
}
+void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
+ unsigned long flags)
+{
+ return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+}
void __iomem * ioremap(phys_addr_t addr, unsigned long size)
{
unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
+ void *caller = __builtin_return_address(0);
if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags);
- return __ioremap(addr, size, flags);
+ return ppc_md.ioremap(addr, size, flags, caller);
+ return __ioremap_caller(addr, size, flags, caller);
}
void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
+ void *caller = __builtin_return_address(0);
+
/* writeable implies dirty for kernel addresses */
if (flags & _PAGE_RW)
flags |= _PAGE_DIRTY;
@@ -207,8 +216,8 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
flags &= ~(_PAGE_USER | _PAGE_EXEC);
if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags);
- return __ioremap(addr, size, flags);
+ return ppc_md.ioremap(addr, size, flags, caller);
+ return __ioremap_caller(addr, size, flags, caller);
}
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index f900a39e6ec..788b87c36f7 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -118,25 +118,50 @@ _GLOBAL(_tlbil_pid)
#elif defined(CONFIG_FSL_BOOKE)
/*
- * FSL BookE implementations. Currently _pid and _all are the
- * same. This will change when tlbilx is actually supported and
- * performs invalidate-by-PID. This change will be driven by
- * mmu_features conditional
+ * FSL BookE implementations.
+ *
+ * Since feature sections are using _SECTION_ELSE we need
+ * to have the larger code path before the _SECTION_ELSE
*/
+#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
+ MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
/*
* Flush MMU TLB on the local processor
*/
-_GLOBAL(_tlbil_pid)
_GLOBAL(_tlbil_all)
-#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
- MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
+BEGIN_MMU_FTR_SECTION
+ li r3,(MMUCSR0_TLBFI)@l
+ mtspr SPRN_MMUCSR0, r3
+1:
+ mfspr r3,SPRN_MMUCSR0
+ andi. r3,r3,MMUCSR0_TLBFI@l
+ bne 1b
+MMU_FTR_SECTION_ELSE
+ PPC_TLBILX_ALL(0,0)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
+ msync
+ isync
+ blr
+
+_GLOBAL(_tlbil_pid)
+BEGIN_MMU_FTR_SECTION
+ slwi r3,r3,16
+ mfmsr r10
+ wrteei 0
+ mfspr r4,SPRN_MAS6 /* save MAS6 */
+ mtspr SPRN_MAS6,r3
+ PPC_TLBILX_PID(0,0)
+ mtspr SPRN_MAS6,r4 /* restore MAS6 */
+ wrtee r10
+MMU_FTR_SECTION_ELSE
li r3,(MMUCSR0_TLBFI)@l
mtspr SPRN_MMUCSR0, r3
1:
mfspr r3,SPRN_MMUCSR0
andi. r3,r3,MMUCSR0_TLBFI@l
bne 1b
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
msync
isync
blr
@@ -149,7 +174,9 @@ _GLOBAL(_tlbil_va)
mfmsr r10
wrteei 0
slwi r4,r4,16
+ ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
+BEGIN_MMU_FTR_SECTION
tlbsx 0,r3
mfspr r4,SPRN_MAS1 /* check valid */
andis. r3,r4,MAS1_VALID@h
@@ -157,6 +184,9 @@ _GLOBAL(_tlbil_va)
rlwinm r4,r4,0,1,31
mtspr SPRN_MAS1,r4
tlbwe
+MMU_FTR_SECTION_ELSE
+ PPC_TLBILX_VA(0,r3)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
msync
isync
1: wrtee r10