From d7a5b2ffa1352f0310630934a56aecbdfb617b72 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jan 2006 21:31:28 +1300 Subject: [PATCH] powerpc: Always panic if lmb_alloc() fails Currently most callers of lmb_alloc() don't check if it worked or not, if it ever does weird bad things will probably happen. The few callers who do check just panic or BUG_ON. So make lmb_alloc() panic internally, to catch bugs at the source. The few callers who did check the result no longer need to. The only caller that did anything interesting with the return result was careful_allocation(). For it we create __lmb_alloc_base() which _doesn't_ panic automatically, a little messy, but passable. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 1 - arch/powerpc/mm/lmb.c | 14 ++++++++++++++ arch/powerpc/mm/mem.c | 1 - arch/powerpc/mm/numa.c | 4 ++-- arch/powerpc/mm/stab.c | 4 ---- 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 149351a84b9..95b4cd6b65e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -430,7 +430,6 @@ void __init htab_initialize(void) * the absolute address space. */ table = lmb_alloc(htab_size_bytes, htab_size_bytes); - BUG_ON(table == 0); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index bbe3eac918e..d9c76ce5fa8 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -225,6 +225,20 @@ unsigned long __init lmb_alloc(unsigned long size, unsigned long align) unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) +{ + unsigned long alloc; + + alloc = __lmb_alloc_base(size, align, max_addr); + + if (alloc < 0) + panic("ERROR: Failed to allocate 0x%lx bytes below 0x%lx.\n", + size, max_addr); + + return alloc; +} + +unsigned long __init __lmb_alloc_base(unsigned long size, unsigned long align, + unsigned long max_addr) { long i, j; unsigned long base = 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 550517c2dd4..6809cdba6e9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -249,7 +249,6 @@ void __init do_init_bootmem(void) bootmap_pages = bootmem_bootmap_pages(total_pages); start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - BUG_ON(!start); boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2863a912bcd..da5280f8cf4 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -570,11 +570,11 @@ static void __init *careful_allocation(int nid, unsigned long size, unsigned long end_pfn) { int new_nid; - unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); + unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ if (!ret) - ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); + ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); if (!ret) panic("numa.c: cannot allocate %lu bytes on node %d", diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 82e4951826b..91d25fb27f8 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -247,10 +247,6 @@ void stabs_alloc(void) newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, 1< Date: Wed, 25 Jan 2006 21:31:30 +1300 Subject: [PATCH] powerpc: Move LMB_ALLOC_ANYWHERE out of lmb.h LMB_ALLOC_ANYWHERE doesn't need to be part of the API, it's only used in lmb.c - so move it out of the header file. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/lmb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index d9c76ce5fa8..874cd103ce6 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -31,6 +31,8 @@ #define DBG(fmt...) #endif +#define LMB_ALLOC_ANYWHERE 0 + struct lmb lmb; void lmb_dump_all(void) -- cgit v1.2.3 From 2ef9481e666b4654159ac9f847e6963809e3c470 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 23 Jan 2006 10:58:20 -0600 Subject: [PATCH] powerpc: trivial: modify comments to refer to new location of files This patch removes all self references and fixes references to files in the now defunct arch/ppc64 tree. I think this accomplises everything wanted, though there might be a few references I missed. Signed-off-by: Jon Mason Signed-off-by: Paul Mackerras --- arch/powerpc/mm/fault.c | 2 -- arch/powerpc/mm/hash_low_32.S | 2 -- arch/powerpc/mm/mmap.c | 2 -- arch/powerpc/mm/slb_low.S | 2 -- arch/powerpc/mm/tlb_64.c | 2 +- 5 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a4815d31672..ec4adcb4bc2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -1,6 +1,4 @@ /* - * arch/ppc/mm/fault.c - * * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 12ccd7155ba..ea469eefa14 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -1,6 +1,4 @@ /* - * arch/ppc/kernel/hashtable.S - * * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ * * PowerPC version diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index fe65f522aff..972a8e884b9 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -1,6 +1,4 @@ /* - * linux/arch/ppc64/mm/mmap.c - * * flexible mmap layout support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index d1acee38f16..abfaabf667b 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -1,6 +1,4 @@ /* - * arch/ppc64/mm/slb_low.S - * * Low-level SLB routines * * Copyright (C) 2004 David Gibson , IBM diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index bb3afb6e631..f734b11566c 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -36,7 +36,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); /* This is declared as we are using the more or less generic - * include/asm-ppc64/tlb.h file -- tgall + * include/asm-powerpc/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -- cgit v1.2.3 From 2c276603c3e5ebf38155a9d1fbbda656d52d138e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 16 Mar 2006 14:47:20 +1100 Subject: [PATCH] powerpc: Fix bug in bug fix for bug in lmb_alloc() My patch (d7a5b2ffa1352f0310630934a56aecbdfb617b72) to always panic if lmb_alloc() fails is broken because it checks alloc < 0, but should be checking alloc == 0. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/lmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 874cd103ce6..417d5851855 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -232,7 +232,7 @@ unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align, alloc = __lmb_alloc_base(size, align, max_addr); - if (alloc < 0) + if (alloc == 0) panic("ERROR: Failed to allocate 0x%lx bytes below 0x%lx.\n", size, max_addr); -- cgit v1.2.3 From c08888cf3c80fe07bfd176113c390ca31d3ba5c2 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:34:15 -0600 Subject: [PATCH] powerpc numa: fix boot_cpuid always assigned to node 0 At boot, the numa code is assigning boot_cpuid to node 0 unconditionally. Basically, numa_setup_cpu is being stupid about it, but this is the minimal fix -- just call numa_setup_cpu(boot_cpuid) later, after all nodes have been set online. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index da5280f8cf4..dc6392ce253 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -375,7 +375,7 @@ static int __init parse_numa_properties(void) { struct device_node *cpu = NULL; struct device_node *memory = NULL; - int max_domain; + int max_domain = 0; unsigned long i; if (numa_enabled == 0) { @@ -389,8 +389,6 @@ static int __init parse_numa_properties(void) if (min_common_depth < 0) return min_common_depth; - max_domain = numa_setup_cpu(boot_cpuid); - /* * Even though we connect cpus to numa domains later in SMP init, * we need to know the maximum node id now. This is because each @@ -469,6 +467,8 @@ new_range: for (i = 0; i <= max_domain; i++) node_set_online(i); + max_domain = numa_setup_cpu(boot_cpuid); + return 0; } -- cgit v1.2.3 From bf4b85b0e4bab42b3e8d8b0acc6851bb85e2050b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:34:45 -0600 Subject: [PATCH] powerpc numa: Minor debugging code changes Add debug statement for map_cpu_to_node; it's useful for cpu hotplug. Clarify debug statement about not finding the numa reference points property. Don't print a meaningless associativity depth (-1) on non-numa systems. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dc6392ce253..2ae491d9404 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -133,6 +133,8 @@ static inline void map_cpu_to_node(int cpu, int node) { numa_cpu_lookup_table[cpu] = node; + dbg("adding cpu %d to node %d\n", cpu, node); + if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) cpu_set(cpu, numa_cpumask_lookup_table[node]); } @@ -246,8 +248,7 @@ static int __init find_min_common_depth(void) if ((len >= 1) && ref_points) { depth = ref_points[1]; } else { - dbg("WARNING: could not find NUMA " - "associativity reference point\n"); + dbg("NUMA: ibm,associativity-reference-points not found.\n"); depth = -1; } of_node_put(rtas_root); @@ -385,10 +386,11 @@ static int __init parse_numa_properties(void) min_common_depth = find_min_common_depth(); - dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); if (min_common_depth < 0) return min_common_depth; + dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); + /* * Even though we connect cpus to numa domains later in SMP init, * we need to know the maximum node id now. This is because each -- cgit v1.2.3 From 2e5ce39d6703836b583c43131c365201a76285a5 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:35:15 -0600 Subject: [PATCH] powerpc numa: Minor cpu hotplug-related cleanups map_cpu_to_node does not need to be inline, it is never called in a hot path. map_cpu_to_node, numa_setup_cpu, and find_cpu_node can be marked __cpuinit, as they are never used after boot if CONFIG_HOTPLUG_CPU=n. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2ae491d9404..1fb11bbe1ac 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -129,7 +129,7 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn, *start_pfn = 0; } -static inline void map_cpu_to_node(int cpu, int node) +static void __cpuinit map_cpu_to_node(int cpu, int node) { numa_cpu_lookup_table[cpu] = node; @@ -155,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static struct device_node *find_cpu_node(unsigned int cpu) +static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) { unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); struct device_node *cpu_node = NULL; @@ -284,7 +284,7 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf) * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. */ -static int numa_setup_cpu(unsigned long lcpu) +static int __cpuinit numa_setup_cpu(unsigned long lcpu) { int numa_domain = 0; struct device_node *cpu = find_cpu_node(lcpu); -- cgit v1.2.3 From cf950b7af0e51935e559c54262214423e2b6c88a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:35:45 -0600 Subject: [PATCH] powerpc numa: Get rid of "numa domain" terminology Since we effectively treat the domain ids given to us by firmare as logical node ids, make this explicit (basically s/numa_domain/nid/). No functional changes, only variable and function names are modified. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 78 +++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 1fb11bbe1ac..a5286a68760 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -191,9 +191,9 @@ static int *of_get_associativity(struct device_node *dev) return (unsigned int *)get_property(dev, "ibm,associativity", NULL); } -static int of_node_numa_domain(struct device_node *device) +static int of_node_to_nid(struct device_node *device) { - int numa_domain; + int nid; unsigned int *tmp; if (min_common_depth == -1) @@ -201,13 +201,13 @@ static int of_node_numa_domain(struct device_node *device) tmp = of_get_associativity(device); if (tmp && (tmp[0] >= min_common_depth)) { - numa_domain = tmp[min_common_depth]; + nid = tmp[min_common_depth]; } else { dbg("WARNING: no NUMA information for %s\n", device->full_name); - numa_domain = 0; + nid = 0; } - return numa_domain; + return nid; } /* @@ -286,7 +286,7 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf) */ static int __cpuinit numa_setup_cpu(unsigned long lcpu) { - int numa_domain = 0; + int nid = 0; struct device_node *cpu = find_cpu_node(lcpu); if (!cpu) { @@ -294,27 +294,27 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) goto out; } - numa_domain = of_node_numa_domain(cpu); + nid = of_node_to_nid(cpu); - if (numa_domain >= num_online_nodes()) { + if (nid >= num_online_nodes()) { /* * POWER4 LPAR uses 0xffff as invalid node, * dont warn in this case. */ - if (numa_domain != 0xffff) + if (nid != 0xffff) printk(KERN_ERR "WARNING: cpu %ld " "maps to invalid NUMA node %d\n", - lcpu, numa_domain); - numa_domain = 0; + lcpu, nid); + nid = 0; } out: - node_set_online(numa_domain); + node_set_online(nid); - map_cpu_to_node(lcpu, numa_domain); + map_cpu_to_node(lcpu, nid); of_node_put(cpu); - return numa_domain; + return nid; } static int cpu_numa_callback(struct notifier_block *nfb, @@ -399,17 +399,17 @@ static int __init parse_numa_properties(void) * with larger node ids. In that case we force the cpu into node 0. */ for_each_cpu(i) { - int numa_domain; + int nid; cpu = find_cpu_node(i); if (cpu) { - numa_domain = of_node_numa_domain(cpu); + nid = of_node_to_nid(cpu); of_node_put(cpu); - if (numa_domain < MAX_NUMNODES && - max_domain < numa_domain) - max_domain = numa_domain; + if (nid < MAX_NUMNODES && + max_domain < nid) + max_domain = nid; } } @@ -418,7 +418,7 @@ static int __init parse_numa_properties(void) while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start; unsigned long size; - int numa_domain; + int nid; int ranges; unsigned int *memcell_buf; unsigned int len; @@ -439,18 +439,18 @@ new_range: start = read_n_cells(n_mem_addr_cells, &memcell_buf); size = read_n_cells(n_mem_size_cells, &memcell_buf); - numa_domain = of_node_numa_domain(memory); + nid = of_node_to_nid(memory); - if (numa_domain >= MAX_NUMNODES) { - if (numa_domain != 0xffff) + if (nid >= MAX_NUMNODES) { + if (nid != 0xffff) printk(KERN_ERR "WARNING: memory at %lx maps " "to invalid NUMA node %d\n", start, - numa_domain); - numa_domain = 0; + nid); + nid = 0; } - if (max_domain < numa_domain) - max_domain = numa_domain; + if (max_domain < nid) + max_domain = nid; if (!(size = numa_enforce_memory_limit(start, size))) { if (--ranges) @@ -459,7 +459,7 @@ new_range: continue; } - add_region(numa_domain, start >> PAGE_SHIFT, + add_region(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT); if (--ranges) @@ -769,10 +769,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; nodemask_t nodes; - int numa_domain = 0; + int nid = 0; if (!numa_enabled || (min_common_depth < 0)) - return numa_domain; + return nid; while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; @@ -789,15 +789,15 @@ int hot_add_scn_to_nid(unsigned long scn_addr) ha_new_range: start = read_n_cells(n_mem_addr_cells, &memcell_buf); size = read_n_cells(n_mem_size_cells, &memcell_buf); - numa_domain = of_node_numa_domain(memory); + nid = of_node_to_nid(memory); /* Domains not present at boot default to 0 */ - if (!node_online(numa_domain)) - numa_domain = any_online_node(NODE_MASK_ALL); + if (!node_online(nid)) + nid = any_online_node(NODE_MASK_ALL); if ((scn_addr >= start) && (scn_addr < (start + size))) { of_node_put(memory); - goto got_numa_domain; + goto got_nid; } if (--ranges) /* process all ranges in cell */ @@ -806,12 +806,12 @@ ha_new_range: BUG(); /* section address should be found above */ /* Temporary code to ensure that returned node is not empty */ -got_numa_domain: +got_nid: nodes_setall(nodes); - while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { - node_clear(numa_domain, nodes); - numa_domain = any_online_node(nodes); + while (NODE_DATA(nid)->node_spanned_pages == 0) { + node_clear(nid, nodes); + nid = any_online_node(nodes); } - return numa_domain; + return nid; } #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From bc16a75926941094db6b42d76014abb5e8d3a910 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:36:15 -0600 Subject: [PATCH] powerpc numa: Consolidate handling of Power4 special case Code to handle Power4's invalid node id (0xffff) is duplicated for cpu and memory. Better to handle this case in one place -- of_node_to_nid. Overall behavior should be unchanged. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a5286a68760..dd611ef8df7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -207,6 +207,11 @@ static int of_node_to_nid(struct device_node *device) device->full_name); nid = 0; } + + /* POWER4 LPAR uses 0xffff as invalid node */ + if (nid == 0xffff) + nid = 0; + return nid; } @@ -297,14 +302,9 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) nid = of_node_to_nid(cpu); if (nid >= num_online_nodes()) { - /* - * POWER4 LPAR uses 0xffff as invalid node, - * dont warn in this case. - */ - if (nid != 0xffff) - printk(KERN_ERR "WARNING: cpu %ld " - "maps to invalid NUMA node %d\n", - lcpu, nid); + printk(KERN_ERR "WARNING: cpu %ld " + "maps to invalid NUMA node %d\n", + lcpu, nid); nid = 0; } out: @@ -442,10 +442,9 @@ new_range: nid = of_node_to_nid(memory); if (nid >= MAX_NUMNODES) { - if (nid != 0xffff) - printk(KERN_ERR "WARNING: memory at %lx maps " - "to invalid NUMA node %d\n", start, - nid); + printk(KERN_ERR "WARNING: memory at %lx maps " + "to invalid NUMA node %d\n", start, + nid); nid = 0; } -- cgit v1.2.3 From 482ec7c403d239bb4f1732faf9a14988094ce08b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:36:45 -0600 Subject: [PATCH] powerpc numa: Support sparse online node map The powerpc numa code unconditionally onlines all nodes from 0 to the highest node id found, regardless of whether cpus or memory are present in the nodes. This wastes 8K per node and complicates some cpu and memory hotplug situations, such as adding a resource that doesn't map to one of the nodes discovered at boot. Set nodes online as resources are scanned. Fall back to node 0 only when we're sure this isn't a NUMA machine. Instead of defaulting to node 0 for cases of hot-adding a resource which doesn't belong to any initialized node, assign it to the first online node. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 95 +++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 52 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dd611ef8df7..7d6ebe3c3b9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -191,27 +191,28 @@ static int *of_get_associativity(struct device_node *dev) return (unsigned int *)get_property(dev, "ibm,associativity", NULL); } +/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa + * info is found. + */ static int of_node_to_nid(struct device_node *device) { - int nid; + int nid = -1; unsigned int *tmp; if (min_common_depth == -1) - return 0; + goto out; tmp = of_get_associativity(device); - if (tmp && (tmp[0] >= min_common_depth)) { + if (!tmp) + goto out; + + if (tmp[0] >= min_common_depth) nid = tmp[min_common_depth]; - } else { - dbg("WARNING: no NUMA information for %s\n", - device->full_name); - nid = 0; - } /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff) - nid = 0; - + if (nid == 0xffff || nid >= MAX_NUMNODES) + nid = -1; +out: return nid; } @@ -301,15 +302,9 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) nid = of_node_to_nid(cpu); - if (nid >= num_online_nodes()) { - printk(KERN_ERR "WARNING: cpu %ld " - "maps to invalid NUMA node %d\n", - lcpu, nid); - nid = 0; - } + if (nid < 0 || !node_online(nid)) + nid = any_online_node(NODE_MASK_ALL); out: - node_set_online(nid); - map_cpu_to_node(lcpu, nid); of_node_put(cpu); @@ -376,7 +371,7 @@ static int __init parse_numa_properties(void) { struct device_node *cpu = NULL; struct device_node *memory = NULL; - int max_domain = 0; + int default_nid = 0; unsigned long i; if (numa_enabled == 0) { @@ -392,25 +387,26 @@ static int __init parse_numa_properties(void) dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); /* - * Even though we connect cpus to numa domains later in SMP init, - * we need to know the maximum node id now. This is because each - * node id must have NODE_DATA etc backing it. - * As a result of hotplug we could still have cpus appear later on - * with larger node ids. In that case we force the cpu into node 0. + * Even though we connect cpus to numa domains later in SMP + * init, we need to know the node ids now. This is because + * each node to be onlined must have NODE_DATA etc backing it. */ - for_each_cpu(i) { + for_each_present_cpu(i) { int nid; cpu = find_cpu_node(i); + BUG_ON(!cpu); + nid = of_node_to_nid(cpu); + of_node_put(cpu); - if (cpu) { - nid = of_node_to_nid(cpu); - of_node_put(cpu); - - if (nid < MAX_NUMNODES && - max_domain < nid) - max_domain = nid; - } + /* + * Don't fall back to default_nid yet -- we will plug + * cpus into nodes once the memory scan has discovered + * the topology. + */ + if (nid < 0) + continue; + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); @@ -439,17 +435,15 @@ new_range: start = read_n_cells(n_mem_addr_cells, &memcell_buf); size = read_n_cells(n_mem_size_cells, &memcell_buf); + /* + * Assumption: either all memory nodes or none will + * have associativity properties. If none, then + * everything goes to default_nid. + */ nid = of_node_to_nid(memory); - - if (nid >= MAX_NUMNODES) { - printk(KERN_ERR "WARNING: memory at %lx maps " - "to invalid NUMA node %d\n", start, - nid); - nid = 0; - } - - if (max_domain < nid) - max_domain = nid; + if (nid < 0) + nid = default_nid; + node_set_online(nid); if (!(size = numa_enforce_memory_limit(start, size))) { if (--ranges) @@ -465,10 +459,7 @@ new_range: goto new_range; } - for (i = 0; i <= max_domain; i++) - node_set_online(i); - - max_domain = numa_setup_cpu(boot_cpuid); + numa_setup_cpu(boot_cpuid); return 0; } @@ -768,10 +759,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; nodemask_t nodes; - int nid = 0; + int default_nid = any_online_node(NODE_MASK_ALL); if (!numa_enabled || (min_common_depth < 0)) - return nid; + return default_nid; while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; @@ -791,8 +782,8 @@ ha_new_range: nid = of_node_to_nid(memory); /* Domains not present at boot default to 0 */ - if (!node_online(nid)) - nid = any_online_node(NODE_MASK_ALL); + if (nid < 0 || !node_online(nid)) + nid = default_nid; if ((scn_addr >= start) && (scn_addr < (start + size))) { of_node_put(memory); -- cgit v1.2.3 From 2b2612272c77288b2bd53d5831df737cd669cd93 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 20 Mar 2006 18:37:15 -0600 Subject: [PATCH] powerpc numa: Consolidate assignment of cpus to nodes We can plug the boot cpu into its node independently of whether numa topology is detected. And numa_setup_cpu does the right thing for all cases now, so remove special-casing for non-numa from the cpu hotplug callback. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 7d6ebe3c3b9..e89b22aa539 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -321,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: - if (min_common_depth == -1 || !numa_enabled) - map_cpu_to_node(lcpu, 0); - else - numa_setup_cpu(lcpu); + numa_setup_cpu(lcpu); ret = NOTIFY_OK; break; #ifdef CONFIG_HOTPLUG_CPU @@ -459,8 +456,6 @@ new_range: goto new_range; } - numa_setup_cpu(boot_cpuid); - return 0; } @@ -475,7 +470,6 @@ static void __init setup_nonnuma(void) printk(KERN_INFO "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - map_cpu_to_node(boot_cpuid, 0); for (i = 0; i < lmb.memory.cnt; ++i) add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, lmb_size_pages(&lmb.memory, i)); @@ -612,6 +606,8 @@ void __init do_init_bootmem(void) dump_numa_memory_topology(); register_cpu_notifier(&ppc64_numa_nb); + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); for_each_online_node(nid) { unsigned long start_pfn, end_pfn, pages_present; -- cgit v1.2.3 From caf80e579b5fc0048681a47c5a55487116e56a88 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Mar 2006 20:45:51 +1100 Subject: [PATCH] powerpc: Unconfuse htab_bolt_mapping() callers htab_bolt_mapping() takes a vstart and pstart parameter, but all but one of its callers actually pass it vstart and vstart. Luckily before it passes paddr (calculated from paddr) to the hpte_insert routines it calls virt_to_abs() (aka. __pa()) on the address, so there isn't actually a bug. map_io_page() however does pass pstart properly, so currently it's broken AFAICT because we're calling __pa(paddr) which will get us something very large. Presumably no one's calling map_io_page() in the right context. Anyway, change htab_bolt_mapping() callers to properly pass pstart, and then use it properly in htab_bolt_mapping(), ie. don't call __pa() on it again. Booted on p5 LPAR, iSeries and Power3. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d9ce9a4821..7b4eccffd00 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -169,7 +169,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, #ifdef CONFIG_PPC_ISERIES if (_machine == PLATFORM_ISERIES_LPAR) ret = iSeries_hpte_insert(hpteg, va, - __pa(vaddr), + paddr, tmp_mode, HPTE_V_BOLTED, psize); @@ -178,7 +178,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, #ifdef CONFIG_PPC_PSERIES if (_machine & PLATFORM_LPAR) ret = pSeries_lpar_hpte_insert(hpteg, va, - virt_to_abs(paddr), + paddr, tmp_mode, HPTE_V_BOLTED, psize); @@ -186,7 +186,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, #endif #ifdef CONFIG_PPC_MULTIPLATFORM ret = native_hpte_insert(hpteg, va, - virt_to_abs(paddr), + paddr, tmp_mode, HPTE_V_BOLTED, psize); #endif @@ -392,7 +392,7 @@ static unsigned long __init htab_get_table_size(void) #ifdef CONFIG_MEMORY_HOTPLUG void create_section_mapping(unsigned long start, unsigned long end) { - BUG_ON(htab_bolt_mapping(start, end, start, + BUG_ON(htab_bolt_mapping(start, end, __pa(start), _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX, mmu_linear_psize)); } @@ -473,21 +473,22 @@ void __init htab_initialize(void) if (dart_tablebase != 0 && dart_tablebase >= base && dart_tablebase < (base + size)) { + unsigned long dart_table_end = dart_tablebase + 16 * MB; if (base != dart_tablebase) BUG_ON(htab_bolt_mapping(base, dart_tablebase, - base, mode_rw, - mmu_linear_psize)); - if ((base + size) > (dart_tablebase + 16*MB)) + __pa(base), mode_rw, + mmu_linear_psize)); + if ((base + size) > dart_table_end) BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, - base + size, - dart_tablebase+16*MB, + base + size, + __pa(dart_table_end), mode_rw, mmu_linear_psize)); continue; } #endif /* CONFIG_U3_DART */ - BUG_ON(htab_bolt_mapping(base, base + size, base, - mode_rw, mmu_linear_psize)); + BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), + mode_rw, mmu_linear_psize)); } /* @@ -504,8 +505,8 @@ void __init htab_initialize(void) if (base + size >= tce_alloc_start) tce_alloc_start = base + size + 1; - BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, - tce_alloc_start, mode_rw, + BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, + __pa(tce_alloc_start), mode_rw, mmu_linear_psize)); } -- cgit v1.2.3 From 57cfb814f698d30894bc28e22125550193ebe549 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Mar 2006 20:45:59 +1100 Subject: [PATCH] powerpc: Replace platform_is_lpar() with a firmware feature It has been decreed that platform numbers are evil, so as a step in that direction, replace platform_is_lpar() with a FW_FEATURE_LPAR bit. Currently FW_FEATURE_LPAR really means i/pSeries LPAR, in the future we might have to clean that up if we need to be more specific about what LPAR actually means. But that's another patch ... Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7b4eccffd00..89b35c18131 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -422,7 +422,7 @@ void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (platform_is_lpar()) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -517,7 +517,7 @@ void __init htab_initialize(void) void htab_initialize_secondary(void) { - if (!platform_is_lpar()) + if (!firmware_has_feature(FW_FEATURE_LPAR)) mtspr(SPRN_SDR1, _SDR1); } -- cgit v1.2.3 From f8642ebee8e46d054d83828a4048fba4ebcd8f68 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Mar 2006 20:46:11 +1100 Subject: [PATCH] powerpc: Remove calculation of io hole In mm_init_ppc64() we calculate the location of the "IO hole", but then no one ever looks at the value. So don't bother. That's actually all mm_init_ppc64() does, so get rid of it too. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/init_64.c | 48 ----------------------------------------------- 1 file changed, 48 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 81cfb0c2ec5..5d4733d6180 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -84,54 +84,6 @@ /* max amount of RAM to use */ unsigned long __max_memory; -/* info on what we think the IO hole is */ -unsigned long io_hole_start; -unsigned long io_hole_size; - -/* - * Do very early mm setup. - */ -void __init mm_init_ppc64(void) -{ -#ifndef CONFIG_PPC_ISERIES - unsigned long i; -#endif - - ppc64_boot_msg(0x100, "MM Init"); - - /* This is the story of the IO hole... please, keep seated, - * unfortunately, we are out of oxygen masks at the moment. - * So we need some rough way to tell where your big IO hole - * is. On pmac, it's between 2G and 4G, on POWER3, it's around - * that area as well, on POWER4 we don't have one, etc... - * We need that as a "hint" when sizing the TCE table on POWER3 - * So far, the simplest way that seem work well enough for us it - * to just assume that the first discontinuity in our physical - * RAM layout is the IO hole. That may not be correct in the future - * (and isn't on iSeries but then we don't care ;) - */ - -#ifndef CONFIG_PPC_ISERIES - for (i = 1; i < lmb.memory.cnt; i++) { - unsigned long base, prevbase, prevsize; - - prevbase = lmb.memory.region[i-1].base; - prevsize = lmb.memory.region[i-1].size; - base = lmb.memory.region[i].base; - if (base > (prevbase + prevsize)) { - io_hole_start = prevbase + prevsize; - io_hole_size = base - (prevbase + prevsize); - break; - } - } -#endif /* CONFIG_PPC_ISERIES */ - if (io_hole_start) - printk("IO Hole assumed to be %lx -> %lx\n", - io_hole_start, io_hole_start + io_hole_size - 1); - - ppc64_boot_msg(0x100, "MM Init Done"); -} - void free_initmem(void) { unsigned long addr; -- cgit v1.2.3 From 2d0eee14b23f20c501a6c2536edf68f93c56edcd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 21 Mar 2006 23:00:05 -0800 Subject: [PATCH] powerpc: Fix warning in add_memory arch/powerpc/mm/mem.c: In function `add_memory': arch/powerpc/mm/mem.c:128: warning: assignment makes integer from pointer without a cast Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 6809cdba6e9..80550b28450 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -125,7 +125,7 @@ int __devinit add_memory(u64 start, u64 size) nid = hot_add_scn_to_nid(start); pgdata = NODE_DATA(nid); - start = __va(start); + start = (unsigned long)__va(start); create_section_mapping(start, start + size); /* this should work for most non-highmem platforms */ -- cgit v1.2.3