From e81703724a966120ace6504c993bda9e084cbf3e Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Thu, 16 Oct 2008 18:59:43 +0000 Subject: powerpc/numa: Make memory reserve code more robust Adjust amount to reserve based on previous nodes for reserves spanning multiple nodes. Check if the node active range is empty before attempting to pass the reserve to bootmem. In practice the range shouldn't be empty, but to be sure we check. Signed-off-by: Jon Tollefson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 6cf5c71c431..195bfcd0895 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -116,6 +116,7 @@ static int __init get_active_region_work_fn(unsigned long start_pfn, /* * get_node_active_region - Return active region containing start_pfn + * Active range returned is empty if none found. * @start_pfn: The page to return the region for. * @node_ar: Returned set to the active region containing start_pfn */ @@ -126,6 +127,7 @@ static void __init get_node_active_region(unsigned long start_pfn, node_ar->nid = nid; node_ar->start_pfn = start_pfn; + node_ar->end_pfn = start_pfn; work_with_active_regions(nid, get_active_region_work_fn, node_ar); } @@ -933,18 +935,20 @@ void __init do_init_bootmem(void) struct node_active_region node_ar; get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn) { + while (start_pfn < end_pfn && + node_ar.start_pfn < node_ar.end_pfn) { + unsigned long reserve_size = size; /* * if reserved region extends past active region * then trim size to active region */ if (end_pfn > node_ar.end_pfn) - size = (node_ar.end_pfn << PAGE_SHIFT) + reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - (start_pfn << PAGE_SHIFT); - dbg("reserve_bootmem %lx %lx nid=%d\n", physbase, size, - node_ar.nid); + dbg("reserve_bootmem %lx %lx nid=%d\n", physbase, + reserve_size, node_ar.nid); reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase, - size, BOOTMEM_DEFAULT); + reserve_size, BOOTMEM_DEFAULT); /* * if reserved region is contained in the active region * then done. @@ -959,6 +963,7 @@ void __init do_init_bootmem(void) */ start_pfn = node_ar.end_pfn; physbase = start_pfn << PAGE_SHIFT; + size = size - reserve_size; get_node_active_region(start_pfn, &node_ar); } -- cgit v1.2.3 From fe55249d17f7979cf9bbc58e38e9ceaf1918b415 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Mon, 20 Oct 2008 15:37:04 +0000 Subject: powerpc: Always trim numa memory to lmb_end_of_DRAM() numa_enforce_memory_limit tried to be smart and only call lmb_end_of_DRAM when a memory limit was set via mem= on the command line. However, the early boot code will also limit memory added to the lmb system when iommu=off is specified. When this happens, the page allocator is given pages not in the linear mapping and this results in a fatal data reference to the unmapped page. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 195bfcd0895..eb505ad34a8 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -528,12 +528,10 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, /* * We use lmb_end_of_DRAM() in here instead of memory_limit because * we've already adjusted it for the limit and it takes care of - * having memory holes below the limit. + * having memory holes below the limit. Also, in the case of + * iommu_is_off, memory_limit is not set but is implicitly enforced. */ - if (! memory_limit) - return size; - if (start + size <= lmb_end_of_DRAM()) return size; -- cgit v1.2.3 From 4792adbac9eb41cea77a45ab76258ea10d411173 Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Tue, 21 Oct 2008 15:27:36 +0000 Subject: powerpc: Don't use a 16G page if beyond mem= limits If mem= is used on the boot command line to limit memory then the memory block where a 16G page resides may not be available. Thanks to Michael Ellerman for finding the problem. Signed-off-by: Jon Tollefson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5c64af17475..8d5b4758c13 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -382,8 +382,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, printk(KERN_INFO "Huge page(16GB) memory: " "addr = 0x%lX size = 0x%lX pages = %d\n", phys_addr, block_size, expected_pages); - lmb_reserve(phys_addr, block_size * expected_pages); - add_gpage(phys_addr, block_size, expected_pages); + if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) { + lmb_reserve(phys_addr, block_size * expected_pages); + add_gpage(phys_addr, block_size, expected_pages); + } return 0; } #endif /* CONFIG_HUGETLB_PAGE */ -- cgit v1.2.3