From 2b72394e4089643f11669d9610907a1442fe044a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 28 Apr 2009 16:00:49 +0300 Subject: x86: move max_pfn_mapped and max_low_pfn_mapped to setup.c This patch moves the max_pfn_mapped and max_low_pfn_mapped global variables to kernel/setup.c where they're initialized. [ Impact: cleanup ] Signed-off-by: Pekka Enberg LKML-Reference: <1240923649.1982.21.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4158439bf6..0d77e56e821 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,14 @@ #define ARCH_SETUP #endif +/* + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access + * apertures, ACPI and other tables without having to play with fixmaps. + */ +unsigned long max_low_pfn_mapped; +unsigned long max_pfn_mapped; + RESERVE_BRK(dmi_alloc, 65536); unsigned int boot_cpu_id __read_mostly; -- cgit v1.2.3 From 45fbe3ee01b8e463b28c2751b5dcc0cbdc142d90 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 May 2009 08:06:44 -0700 Subject: x86, e820, pci: reserve extra free space near end of RAM The point is to take all RAM resources we have, and _after_ we've added all the resources we've seen in the E820 tree, we then _also_ try to add fake reserved entries for any "round up to X" at the end of the RAM resources. [ Impact: improve PCI mem-resource allocation robustness, protect "stolen RAM" ] Reported-by: Yannick Roehlly Acked-by: Jesse Barnes Signed-off-by: Yinghai Lu Cc: Ivan Kokshaysky Cc: Andrew Morton Cc: yannick.roehlly@free.fr LKML-Reference: <4A01A784.2050407@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 00628130292..a2335d9de05 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1371,6 +1371,23 @@ void __init e820_reserve_resources(void) } } +/* How much should we pad RAM ending depending on where it is? */ +static unsigned long ram_alignment(resource_size_t pos) +{ + unsigned long mb = pos >> 20; + + /* To 64kB in the first megabyte */ + if (!mb) + return 64*1024; + + /* To 1MB in the first 16MB */ + if (mb < 16) + return 1024*1024; + + /* To 32MB for anything above that */ + return 32*1024*1024; +} + void __init e820_reserve_resources_late(void) { int i; @@ -1382,6 +1399,24 @@ void __init e820_reserve_resources_late(void) insert_resource_expand_to_fit(&iomem_resource, res); res++; } + + /* + * Try to bump up RAM regions to reasonable boundaries to + * avoid stolen RAM: + */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *entry = &e820_saved.map[i]; + resource_size_t start, end; + + if (entry->type != E820_RAM) + continue; + start = entry->addr + entry->size; + end = round_up(start, ram_alignment(start)); + if (start == end) + continue; + reserve_region_with_split(&iomem_resource, start, + end - 1, "RAM buffer"); + } } char *__init default_machine_specific_memory_setup(void) -- cgit v1.2.3 From 5d423ccd7ba4285f1084e91b26805e1d0ae978ed Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 6 May 2009 08:07:52 -0700 Subject: x86/pci: remove rounding quirk from e820_setup_gap() Now that the e820 code explicitly reserves 'potentially dangerous' free physical memory address space to protect ACPI stolen RAM, there's no need for the rounding quirk in the PCI allocator anymore. Also, this quirk was open-ended iteration that could end up reserving a lot of free space and potentially breaking drivers - such as the one reported by Yannick Roehlly where there's a PCI device with a large memory resource. So remove it. [ Impact: make more of the PCI hole available for assigning pci devices ] Reported-by: Yannick Roehlly Signed-off-by: Yinghai Lu Acked-by: Jesse Barnes Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4A01A7C8.5090701@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a2335d9de05..7271fa33d79 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, */ __init void e820_setup_gap(void) { - unsigned long gapstart, gapsize, round; + unsigned long gapstart, gapsize; int found; gapstart = 0x10000000; @@ -635,14 +635,9 @@ __init void e820_setup_gap(void) #endif /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. + * e820_reserve_resources_late protect stolen RAM already */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; + pci_mem_start = gapstart; printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", -- cgit v1.2.3 From 80989ce0643c1034822f3e339ed8d790b649abe1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 9 May 2009 23:47:42 -0700 Subject: x86: clean up and and print out initial max_pfn_mapped Do this so we can check the range that is mapped before init_memory_mapping(). To be able to print out meaningful info, we first have to fix 64-bit to have max_pfn_mapped assigned before that call. This also unifies the code-path a bit. [ Impact: print more debug info, cleanup ] Signed-off-by: Yinghai Lu LKML-Reference: <49BF0978.40605@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0d77e56e821..4031d6cb3ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -862,12 +862,16 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = max_pfn; high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION setup_bios_corruption_check(); #endif + printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", + max_pfn_mapped< Date: Mon, 11 May 2009 22:05:28 -0400 Subject: x86: merge process.c a bit Merge arch_align_stack() and arch_randomize_brk(), since they are the same. Tested on x86_64. [ Impact: cleanup ] Signed-off-by: Amerigo Wang Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 14 ++++++++++++++ arch/x86/kernel/process_32.c | 13 ------------- arch/x86/kernel/process_64.c | 13 ------------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca989158e84..2b9a8d0fb47 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -613,3 +614,16 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() % 8192; + return sp & ~0xf; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} + diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84043a..a3bb049ad08 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -497,15 +496,3 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41392b..34386f72bdc 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -660,15 +659,3 @@ long sys_arch_prctl(int code, unsigned long addr) return do_arch_prctl(current, code, addr); } -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} -- cgit v1.2.3 From bf78ad69cd351798b9447a269c6bd41ce1f111f4 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 11 May 2009 23:29:09 -0400 Subject: x86: process.c, remove useless headers is not needed by these files, remove them. [ Impact: cleanup ] Signed-off-by: WANG Cong Cc: akpm@linux-foundation.org LKML-Reference: <20090512032956.5040.77055.sendpatchset@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 2 -- arch/x86/kernel/process_64.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3bb049ad08..56d50b7d71d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -9,8 +9,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include - #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 34386f72bdc..9d6b20e6cd8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -14,8 +14,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include - #include #include #include -- cgit v1.2.3 From 35d5a9a61490bf39d2e48d7f499c8c801a39ebe9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: x86: fix system without memory on node0 Jack found a boot crash on a system which doesn't have memory on node0. It turns out with recent per_cpu changes, node_number for BSP will always be 0, and it is not consistent to cpu_to_node() that might set it to a different (nearer) node already. aka when numa_set_node() for node0 is called early before per_cpu area is setup: two places touched that per_cpu(node_number,): 1. in cpu/common.c::cpu_init() and it is not for BP | #ifdef CONFIG_NUMA | if (cpu != 0 && percpu_read(node_number) == 0 && | cpu_to_node(cpu) != NUMA_NO_NODE) | percpu_write(node_number, cpu_to_node(cpu)); | #endif for BP: traps_init ==> cpu_init for AP: start_secondary ==> cpu_init 2. cpu/intel.c or amd.c::srat_detect_node via numa_set_node() for BP: check_bugs ==> identify_boot_cpu ==> identify_cpu() that is rather later before numa_node_id() is used for BP... for AP: start_secondary => smp_callin => smp_store_cpu_info() => => identify_secondary_cpu => identify_cpu() so try to set that for BP earlier in setup_per_cpu_areas(), and don't bother to set that for APs there (it will be updated later and will be used later) (and don't mess the 0 before the copying BP per_cpu data to APs) [ Impact: fix boot crash on memoryless node-0 ] Reported-and-tested-by: Jack Steiner Cc: Tejun Heo Signed-off-by: Yinghai Lu LKML-Reference: <4A0C4A02.7050401@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 3a97a4cf187..3b5f3271e73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) + /* + * make sure boot cpu node_number is right, when boot cpu is on the + * node that doesn't have mem installed + */ + per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); +#endif + /* Setup node to cpumask map */ setup_node_to_cpumask_map(); -- cgit v1.2.3