From ea0854170c95245a258b386c7a9314399c949fe0 Mon Sep 17 00:00:00 2001 From: Shaohui Zheng Date: Tue, 2 Feb 2010 13:44:16 -0800 Subject: memory hotplug: fix a bug on /dev/mem for 64-bit kernels Newly added memory can not be accessed via /dev/mem, because we do not update the variables high_memory, max_pfn and max_low_pfn. Add a function update_end_of_memory_vars() to update these variables for 64-bit kernels. [akpm@linux-foundation.org: simplify comment] Signed-off-by: Shaohui Zheng Cc: Andi Kleen Cc: Li Haicheng Reviewed-by: Wu Fengguang Reviewed-by: KAMEZAWA Hiroyuki Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5198b9bb34e..69ddfbd9113 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -49,6 +49,7 @@ #include #include #include +#include static unsigned long dma_reserve __initdata; @@ -615,6 +616,21 @@ void __init paging_init(void) * Memory hotplug specific functions */ #ifdef CONFIG_MEMORY_HOTPLUG +/* + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need + * updating. + */ +static void update_end_of_memory_vars(u64 start, u64 size) +{ + unsigned long end_pfn = PFN_UP(start + size); + + if (end_pfn > max_pfn) { + max_pfn = end_pfn; + max_low_pfn = end_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + } +} + /* * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. @@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size) ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start, size); + return ret; } EXPORT_SYMBOL_GPL(arch_add_memory); -- cgit v1.2.3 From 1842f90cc98625d4d9bf8f8b927f17705ceb4e9c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:15 -0800 Subject: x86: Call early_res_to_bootmem one time Simplify setup_node_mem: don't use bootmem from other node, instead just find_e820_area in early_node_mem. This keeps the boundary between early_res and boot mem more clear, and lets us only call early_res_to_bootmem() one time instead of for all nodes. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-12-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 69ddfbd9113..a15abaae5ba 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -579,13 +579,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, PAGE_SIZE); if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, 0, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); - early_res_to_bootmem(0, end_pfn< Date: Wed, 10 Feb 2010 01:20:20 -0800 Subject: x86: Make 64 bit use early_res instead of bootmem before slab Finally we can use early_res to replace bootmem for x86_64 now. Still can use CONFIG_NO_BOOTMEM to enable it or not. -v2: fix 32bit compiling about MAX_DMA32_PFN -v3: folded bug fix from LKML message below Signed-off-by: Yinghai Lu LKML-Reference: <4B747239.4070907@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a15abaae5ba..53158b7e5d4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start, void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, int acpi, int k8) { +#ifndef CONFIG_NO_BOOTMEM unsigned long bootmap_size, bootmap; bootmap_size = bootmem_bootmap_pages(end_pfn)< Date: Wed, 10 Feb 2010 01:20:22 -0800 Subject: sparsemem: Put mem map for one node together. Add vmemmap_alloc_block_buf for mem map only. It will fallback to the old way if it cannot get a block that big. Before this patch, when a node have 128g ram installed, memmap are split into two parts or more. [ 0.000000] [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1 [ 0.000000] [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1 [ 0.000000] [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0 [ 0.000000] [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0 [ 0.000000] [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0 [ 0.000000] [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2 [ 0.000000] [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2 [ 0.000000] [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2 [ 0.000000] [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3 [ 0.000000] [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3 [ 0.000000] [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4 [ 0.000000] [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4 [ 0.000000] [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5 [ 0.000000] [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5 [ 0.000000] [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5 [ 0.000000] [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6 [ 0.000000] [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6 [ 0.000000] [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6 [ 0.000000] [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7 [ 0.000000] [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7 after patch will get [ 0.000000] [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0 [ 0.000000] [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1 [ 0.000000] [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2 [ 0.000000] [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3 [ 0.000000] [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4 [ 0.000000] [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5 [ 0.000000] [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6 [ 0.000000] [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7 -v2: change buf to vmemmap_buf instead according to Ingo also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo -v3: according to Andrew, use sizeof(name) instead of hard coded 15 Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org> Cc: Christoph Lameter Acked-by: Christoph Lameter Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 53158b7e5d4..e9b040e1cde 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -977,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) if (pmd_none(*pmd)) { pte_t entry; - p = vmemmap_alloc_block(PMD_SIZE, node); + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (!p) return -ENOMEM; -- cgit v1.2.3