From 927604c7592473742891dc13e1da09febc06e01b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 23:34:17 -0700 Subject: x86: rename discontig_32.c to numa_32.c name it in line with its purpose. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 6 +- arch/x86/mm/discontig_32.c | 444 --------------------------------------------- arch/x86/mm/numa_32.c | 444 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 445 insertions(+), 449 deletions(-) delete mode 100644 arch/x86/mm/discontig_32.c create mode 100644 arch/x86/mm/numa_32.c (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index dfb932dcf13..59f89b434b4 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -13,12 +13,8 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o -ifeq ($(CONFIG_X86_32),y) -obj-$(CONFIG_NUMA) += discontig_32.o -else -obj-$(CONFIG_NUMA) += numa_64.o +obj-$(CONFIG_NUMA) += numa_$(BITS).o obj-$(CONFIG_K8_NUMA) += k8topology_64.o -endif obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c deleted file mode 100644 index 847c164725f..00000000000 --- a/arch/x86/mm/discontig_32.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Written by: Patricia Gaughen , IBM Corporation - * August 2002: added remote node KVA remap - Martin J. Bligh - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_data); - -/* - * numa interface - we expect the numa architecture specific code to have - * populated the following initialisation. - * - * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) node_start_pfn - the starting page frame number for a node - * 3) node_end_pfn - the ending page fram number for a node - */ -unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; -unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; - - -#ifdef CONFIG_DISCONTIGMEM -/* - * 4) physnode_map - the mapping between a pfn and owning node - * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 64Mb break (each element of the array will - * represent 64Mb of memory and will be marked by the node id. so, - * if the first gig is on node 0, and the second gig is on node 1 - * physnode_map will contain: - * - * physnode_map[0-15] = 0; - * physnode_map[16-31] = 1; - * physnode_map[32- ] = -1; - */ -s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; -EXPORT_SYMBOL(physnode_map); - -void memory_present(int nid, unsigned long start, unsigned long end) -{ - unsigned long pfn; - - printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", - nid, start, end); - printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); - printk(KERN_DEBUG " "); - for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { - physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk(KERN_CONT "%lx ", pfn); - } - printk(KERN_CONT "\n"); -} - -unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long nr_pages = end_pfn - start_pfn; - - if (!nr_pages) - return 0; - - return (nr_pages + 1) * sizeof(struct page); -} -#endif - -extern unsigned long find_max_low_pfn(void); -extern unsigned long highend_pfn, highstart_pfn; - -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -unsigned long node_remap_size[MAX_NUMNODES]; -static void *node_remap_start_vaddr[MAX_NUMNODES]; -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -static unsigned long kva_start_pfn; -static unsigned long kva_pages; -/* - * FLAT - support for basic PC memory model with discontig enabled, essentially - * a single node with all available processors in it with a flat - * memory map. - */ -int __init get_memcfg_numa_flat(void) -{ - printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); - - node_start_pfn[0] = 0; - node_end_pfn[0] = max_pfn; - e820_register_active_regions(0, 0, max_pfn); - memory_present(0, 0, max_pfn); - node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); - - /* Indicate there is one node available. */ - nodes_clear(node_online_map); - node_set_online(0); - return 1; -} - -/* - * Find the highest page frame number we have available for the node - */ -static void __init propagate_e820_map_node(int nid) -{ - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - /* - * if a user has given mem=XXXX, then we need to make sure - * that the node _starts_ before that, too, not just ends - */ - if (node_start_pfn[nid] > max_pfn) - node_start_pfn[nid] = max_pfn; - BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); -} - -/* - * Allocate memory for the pg_data_t for this node via a crude pre-bootmem - * method. For node zero take this from the bottom of memory, for - * subsequent nodes place them at node_remap_start_vaddr which contains - * node local data in physically node local memory. See setup_memory() - * for details. - */ -static void __init allocate_pgdat(int nid) -{ - char buf[16]; - - if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) - NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; - else { - unsigned long pgdat_phys; - pgdat_phys = find_e820_area(min_low_pfn<>PAGE_SHIFT)); - memset(buf, 0, sizeof(buf)); - sprintf(buf, "NODE_DATA %d", nid); - reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); - } - printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", - nid, (unsigned long)NODE_DATA(nid)); -} - -/* - * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel - * virtual address space (KVA) is reserved and portions of nodes are mapped - * using it. This is to allow node-local memory to be allocated for - * structures that would normally require ZONE_NORMAL. The memory is - * allocated with alloc_remap() and callers should be prepared to allocate - * from the bootmem allocator instead. - */ -static unsigned long node_remap_start_pfn[MAX_NUMNODES]; -static void *node_remap_end_vaddr[MAX_NUMNODES]; -static void *node_remap_alloc_vaddr[MAX_NUMNODES]; -static unsigned long node_remap_offset[MAX_NUMNODES]; - -void *alloc_remap(int nid, unsigned long size) -{ - void *allocation = node_remap_alloc_vaddr[nid]; - - size = ALIGN(size, L1_CACHE_BYTES); - - if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) - return 0; - - node_remap_alloc_vaddr[nid] += size; - memset(allocation, 0, size); - - return allocation; -} - -static void __init remap_numa_kva(void) -{ - void *vaddr; - unsigned long pfn; - int node; - - for_each_online_node(node) { - printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn< max_pfn) - continue; - if (!node_end_pfn[nid]) - continue; - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - - /* ensure the remap includes space for the pgdat. */ - size = node_remap_size[nid] + sizeof(pg_data_t); - - /* convert size to large (pmd size) pages, rounding up */ - size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; - /* now the roundup is correct, convert to PAGE_SIZE pages */ - size = size * PTRS_PER_PTE; - - node_kva_target = round_down(node_end_pfn[nid] - size, - PTRS_PER_PTE); - node_kva_target <<= PAGE_SHIFT; - do { - node_kva_final = find_e820_area(node_kva_target, - ((u64)node_end_pfn[nid])<>PAGE_SHIFT) > (node_start_pfn[nid])); - - if (node_kva_final == -1ULL) - panic("Can not get kva ram\n"); - - node_remap_size[nid] = size; - node_remap_offset[nid] = reserve_pages; - reserve_pages += size; - printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" - " node %d at %llx\n", - size, nid, node_kva_final>>PAGE_SHIFT); - - /* - * prevent kva address below max_low_pfn want it on system - * with less memory later. - * layout will be: KVA address , KVA RAM - * - * we are supposed to only record the one less then max_low_pfn - * but we could have some hole in high memory, and it will only - * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide - * to use it as free. - * So reserve_early here, hope we don't run out of that array - */ - reserve_early(node_kva_final, - node_kva_final+(((u64)size)<>PAGE_SHIFT; - remove_active_range(nid, node_remap_start_pfn[nid], - node_remap_start_pfn[nid] + size); - } - printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", - reserve_pages); - return reserve_pages; -} - -static void init_remap_allocator(int nid) -{ - node_remap_start_vaddr[nid] = pfn_to_kaddr( - kva_start_pfn + node_remap_offset[nid]); - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + - (node_remap_size[nid] * PAGE_SIZE); - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + - ALIGN(sizeof(pg_data_t), PAGE_SIZE); - - printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, - (ulong) node_remap_start_vaddr[nid], - (ulong) node_remap_end_vaddr[nid]); -} - -void __init initmem_init(unsigned long start_pfn, - unsigned long end_pfn) -{ - int nid; - long kva_target_pfn; - - /* - * When mapping a NUMA machine we allocate the node_mem_map arrays - * from node local memory. They are then mapped directly into KVA - * between zone normal and vmalloc space. Calculate the size of - * this space and use it to adjust the boundary between ZONE_NORMAL - * and ZONE_HIGHMEM. - */ - - get_memcfg_numa(); - - kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); - - kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); - do { - kva_start_pfn = find_e820_area(kva_target_pfn<> PAGE_SHIFT; - kva_target_pfn -= PTRS_PER_PTE; - } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); - - if (kva_start_pfn == -1UL) - panic("Can not get kva space\n"); - - printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", - kva_start_pfn, max_low_pfn); - printk(KERN_INFO "max_pfn = %lx\n", max_pfn); - - /* avoid clash with initrd */ - reserve_early(kva_start_pfn< max_low_pfn) - highstart_pfn = max_low_pfn; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - num_physpages = max_low_pfn; - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", - max_low_pfn, highstart_pfn); - - printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", - (ulong) pfn_to_kaddr(max_low_pfn)); - for_each_online_node(nid) { - init_remap_allocator(nid); - - allocate_pgdat(nid); - } - remap_numa_kva(); - - printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", - (ulong) pfn_to_kaddr(highstart_pfn)); - for_each_online_node(nid) - propagate_e820_map_node(nid); - - for_each_online_node(nid) - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - - NODE_DATA(0)->bdata = &bootmem_node_data[0]; - setup_bootmem_allocator(); -} - -void __init set_highmem_pages_init(void) -{ -#ifdef CONFIG_HIGHMEM - struct zone *zone; - int nid; - - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } - totalram_pages += totalhigh_pages; -#endif -} - -#ifdef CONFIG_MEMORY_HOTPLUG -static int paddr_to_nid(u64 addr) -{ - int nid; - unsigned long pfn = PFN_DOWN(addr); - - for_each_node(nid) - if (node_start_pfn[nid] <= pfn && - pfn < node_end_pfn[nid]) - return nid; - - return -1; -} - -/* - * This function is used to ask node id BEFORE memmap and mem_section's - * initialization (pfn_to_nid() can't be used yet). - * If _PXM is not defined on ACPI's DSDT, node id must be found by this. - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - int nid = paddr_to_nid(addr); - return (nid >= 0) ? nid : 0; -} - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c new file mode 100644 index 00000000000..847c164725f --- /dev/null +++ b/arch/x86/mm/numa_32.c @@ -0,0 +1,444 @@ +/* + * Written by: Patricia Gaughen , IBM Corporation + * August 2002: added remote node KVA remap - Martin J. Bligh + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); + +/* + * numa interface - we expect the numa architecture specific code to have + * populated the following initialisation. + * + * 1) node_online_map - the map of all nodes configured (online) in the system + * 2) node_start_pfn - the starting page frame number for a node + * 3) node_end_pfn - the ending page fram number for a node + */ +unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; +unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; + + +#ifdef CONFIG_DISCONTIGMEM +/* + * 4) physnode_map - the mapping between a pfn and owning node + * physnode_map keeps track of the physical memory layout of a generic + * numa node on a 64Mb break (each element of the array will + * represent 64Mb of memory and will be marked by the node id. so, + * if the first gig is on node 0, and the second gig is on node 1 + * physnode_map will contain: + * + * physnode_map[0-15] = 0; + * physnode_map[16-31] = 1; + * physnode_map[32- ] = -1; + */ +s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); + +void memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long pfn; + + printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", + nid, start, end); + printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); + printk(KERN_DEBUG " "); + for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { + physnode_map[pfn / PAGES_PER_ELEMENT] = nid; + printk(KERN_CONT "%lx ", pfn); + } + printk(KERN_CONT "\n"); +} + +unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long nr_pages = end_pfn - start_pfn; + + if (!nr_pages) + return 0; + + return (nr_pages + 1) * sizeof(struct page); +} +#endif + +extern unsigned long find_max_low_pfn(void); +extern unsigned long highend_pfn, highstart_pfn; + +#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) + +unsigned long node_remap_size[MAX_NUMNODES]; +static void *node_remap_start_vaddr[MAX_NUMNODES]; +void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); + +static unsigned long kva_start_pfn; +static unsigned long kva_pages; +/* + * FLAT - support for basic PC memory model with discontig enabled, essentially + * a single node with all available processors in it with a flat + * memory map. + */ +int __init get_memcfg_numa_flat(void) +{ + printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); + + node_start_pfn[0] = 0; + node_end_pfn[0] = max_pfn; + e820_register_active_regions(0, 0, max_pfn); + memory_present(0, 0, max_pfn); + node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); + + /* Indicate there is one node available. */ + nodes_clear(node_online_map); + node_set_online(0); + return 1; +} + +/* + * Find the highest page frame number we have available for the node + */ +static void __init propagate_e820_map_node(int nid) +{ + if (node_end_pfn[nid] > max_pfn) + node_end_pfn[nid] = max_pfn; + /* + * if a user has given mem=XXXX, then we need to make sure + * that the node _starts_ before that, too, not just ends + */ + if (node_start_pfn[nid] > max_pfn) + node_start_pfn[nid] = max_pfn; + BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); +} + +/* + * Allocate memory for the pg_data_t for this node via a crude pre-bootmem + * method. For node zero take this from the bottom of memory, for + * subsequent nodes place them at node_remap_start_vaddr which contains + * node local data in physically node local memory. See setup_memory() + * for details. + */ +static void __init allocate_pgdat(int nid) +{ + char buf[16]; + + if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) + NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; + else { + unsigned long pgdat_phys; + pgdat_phys = find_e820_area(min_low_pfn<>PAGE_SHIFT)); + memset(buf, 0, sizeof(buf)); + sprintf(buf, "NODE_DATA %d", nid); + reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); + } + printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", + nid, (unsigned long)NODE_DATA(nid)); +} + +/* + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. + */ +static unsigned long node_remap_start_pfn[MAX_NUMNODES]; +static void *node_remap_end_vaddr[MAX_NUMNODES]; +static void *node_remap_alloc_vaddr[MAX_NUMNODES]; +static unsigned long node_remap_offset[MAX_NUMNODES]; + +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + +static void __init remap_numa_kva(void) +{ + void *vaddr; + unsigned long pfn; + int node; + + for_each_online_node(node) { + printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); + for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { + vaddr = node_remap_start_vaddr[node]+(pfn< max_pfn) + continue; + if (!node_end_pfn[nid]) + continue; + if (node_end_pfn[nid] > max_pfn) + node_end_pfn[nid] = max_pfn; + + /* ensure the remap includes space for the pgdat. */ + size = node_remap_size[nid] + sizeof(pg_data_t); + + /* convert size to large (pmd size) pages, rounding up */ + size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; + /* now the roundup is correct, convert to PAGE_SIZE pages */ + size = size * PTRS_PER_PTE; + + node_kva_target = round_down(node_end_pfn[nid] - size, + PTRS_PER_PTE); + node_kva_target <<= PAGE_SHIFT; + do { + node_kva_final = find_e820_area(node_kva_target, + ((u64)node_end_pfn[nid])<>PAGE_SHIFT) > (node_start_pfn[nid])); + + if (node_kva_final == -1ULL) + panic("Can not get kva ram\n"); + + node_remap_size[nid] = size; + node_remap_offset[nid] = reserve_pages; + reserve_pages += size; + printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" + " node %d at %llx\n", + size, nid, node_kva_final>>PAGE_SHIFT); + + /* + * prevent kva address below max_low_pfn want it on system + * with less memory later. + * layout will be: KVA address , KVA RAM + * + * we are supposed to only record the one less then max_low_pfn + * but we could have some hole in high memory, and it will only + * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide + * to use it as free. + * So reserve_early here, hope we don't run out of that array + */ + reserve_early(node_kva_final, + node_kva_final+(((u64)size)<>PAGE_SHIFT; + remove_active_range(nid, node_remap_start_pfn[nid], + node_remap_start_pfn[nid] + size); + } + printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", + reserve_pages); + return reserve_pages; +} + +static void init_remap_allocator(int nid) +{ + node_remap_start_vaddr[nid] = pfn_to_kaddr( + kva_start_pfn + node_remap_offset[nid]); + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + + printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, + (ulong) node_remap_start_vaddr[nid], + (ulong) node_remap_end_vaddr[nid]); +} + +void __init initmem_init(unsigned long start_pfn, + unsigned long end_pfn) +{ + int nid; + long kva_target_pfn; + + /* + * When mapping a NUMA machine we allocate the node_mem_map arrays + * from node local memory. They are then mapped directly into KVA + * between zone normal and vmalloc space. Calculate the size of + * this space and use it to adjust the boundary between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ + + get_memcfg_numa(); + + kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); + + kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); + do { + kva_start_pfn = find_e820_area(kva_target_pfn<> PAGE_SHIFT; + kva_target_pfn -= PTRS_PER_PTE; + } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); + + if (kva_start_pfn == -1UL) + panic("Can not get kva space\n"); + + printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", + kva_start_pfn, max_low_pfn); + printk(KERN_INFO "max_pfn = %lx\n", max_pfn); + + /* avoid clash with initrd */ + reserve_early(kva_start_pfn< max_low_pfn) + highstart_pfn = max_low_pfn; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); + printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", + max_low_pfn, highstart_pfn); + + printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", + (ulong) pfn_to_kaddr(max_low_pfn)); + for_each_online_node(nid) { + init_remap_allocator(nid); + + allocate_pgdat(nid); + } + remap_numa_kva(); + + printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", + (ulong) pfn_to_kaddr(highstart_pfn)); + for_each_online_node(nid) + propagate_e820_map_node(nid); + + for_each_online_node(nid) + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(0)->bdata = &bootmem_node_data[0]; + setup_bootmem_allocator(); +} + +void __init set_highmem_pages_init(void) +{ +#ifdef CONFIG_HIGHMEM + struct zone *zone; + int nid; + + for_each_zone(zone) { + unsigned long zone_start_pfn, zone_end_pfn; + + if (!is_highmem(zone)) + continue; + + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + nid = zone_to_nid(zone); + printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, nid, zone_start_pfn, zone_end_pfn); + + add_highpages_with_active_regions(nid, zone_start_pfn, + zone_end_pfn); + } + totalram_pages += totalhigh_pages; +#endif +} + +#ifdef CONFIG_MEMORY_HOTPLUG +static int paddr_to_nid(u64 addr) +{ + int nid; + unsigned long pfn = PFN_DOWN(addr); + + for_each_node(nid) + if (node_start_pfn[nid] <= pfn && + pfn < node_end_pfn[nid]) + return nid; + + return -1; +} + +/* + * This function is used to ask node id BEFORE memmap and mem_section's + * initialization (pfn_to_nid() can't be used yet). + * If _PXM is not defined on ACPI's DSDT, node id must be found by this. + */ +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + return (nid >= 0) ? nid : 0; +} + +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + -- cgit v1.2.3 From be43d72835ba610e4af274f2d123b26f66f4f7ed Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:13 -0700 Subject: x86: add _PAGE_IOMAP pte flag for IO mappings Use one of the software-defined PTE bits to indicate that a mapping is intended for an IO address. On native hardware this is irrelevent, since a physical address is a physical address. But in a virtual environment, physical addresses are also virtualized, so there needs to be some way to distinguish between pseudo-physical addresses and actual hardware addresses; _PAGE_IOMAP indicates this intent. By default, __supported_pte_mask masks out _PAGE_IOMAP, so it doesn't even appear in the final pagetable. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/ioremap.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bbe044dbe01..8396868e82c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -558,7 +558,7 @@ void zap_low_mappings(void) int nx_enabled; -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); #ifdef CONFIG_X86_PAE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3e10054c573..dec5c775e92 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -89,7 +89,7 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; -unsigned long __supported_pte_mask __read_mostly = ~0UL; +pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); static int do_not_nx __cpuinitdata; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8cbeda15cd2..43c3b6896cd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -242,16 +242,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, switch (prot_val) { case _PAGE_CACHE_UC: default: - prot = PAGE_KERNEL_NOCACHE; + prot = PAGE_KERNEL_IO_NOCACHE; break; case _PAGE_CACHE_UC_MINUS: - prot = PAGE_KERNEL_UC_MINUS; + prot = PAGE_KERNEL_IO_UC_MINUS; break; case _PAGE_CACHE_WC: - prot = PAGE_KERNEL_WC; + prot = PAGE_KERNEL_IO_WC; break; case _PAGE_CACHE_WB: - prot = PAGE_KERNEL; + prot = PAGE_KERNEL_IO; break; } -- cgit v1.2.3 From 1494177942b23b7094ca291d37e6f6263fa60fdd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:15 -0700 Subject: x86: add early_memremap() early_ioremap() is also used to map normal memory when constructing the linear memory mapping. However, since we sometimes need to be able to distinguish between actual IO mappings and normal memory mappings, add a early_memremap() call, which maps with PAGE_KERNEL (as opposed to PAGE_KERNEL_IO for early_ioremap()), and use it when constructing pagetables. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/ioremap.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dec5c775e92..5beb8968345 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -313,7 +313,7 @@ static __ref void *alloc_low_page(unsigned long *phys) if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); - adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); + adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); memset(adr, 0, PAGE_SIZE); *phys = pfn * PAGE_SIZE; return adr; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 43c3b6896cd..7fb737c6b54 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -568,12 +568,12 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, } static inline void __init early_set_fixmap(enum fixed_addresses idx, - unsigned long phys) + unsigned long phys, pgprot_t prot) { if (after_paging_init) - set_fixmap(idx, phys); + __set_fixmap(idx, phys, prot); else - __early_set_fixmap(idx, phys, PAGE_KERNEL); + __early_set_fixmap(idx, phys, prot); } static inline void __init early_clear_fixmap(enum fixed_addresses idx) @@ -601,7 +601,7 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -void __init *early_ioremap(unsigned long phys_addr, unsigned long size) +static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages, nesting; @@ -650,7 +650,7 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size) idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; idx = idx0; while (nrpages > 0) { - early_set_fixmap(idx, phys_addr); + early_set_fixmap(idx, phys_addr, prot); phys_addr += PAGE_SIZE; --idx; --nrpages; @@ -661,6 +661,18 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size) return (void *) (offset + fix_to_virt(idx0)); } +/* Remap an IO device */ +void __init *early_ioremap(unsigned long phys_addr, unsigned long size) +{ + return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); +} + +/* Remap memory */ +void __init *early_memremap(unsigned long phys_addr, unsigned long size) +{ + return __early_ioremap(phys_addr, size, PAGE_KERNEL); +} + void __init early_iounmap(void *addr, unsigned long size) { unsigned long virt_addr; -- cgit v1.2.3 From a32ad4626776f09b30ef98a872a5f6fb64fe6607 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:17 -0700 Subject: x86-64: don't check for map replacement The check prevents flags on mappings from being changed, which is not desireable. There's no need to check for replacing a mapping, and x86-32 does not do this check. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5beb8968345..7c8bb46e83e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -196,9 +196,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) } pte = pte_offset_kernel(pmd, vaddr); - if (!pte_none(*pte) && pte_val(new_pte) && - pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) - pte_ERROR(*pte); set_pte(pte, new_pte); /* -- cgit v1.2.3 From 5e72d9e4850c91b6a0f06fa803f7393b55a38aa8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 12 Sep 2008 15:43:04 +0100 Subject: x86-64: fix combining of regions in init_memory_mapping() When nr_range gets decremented, the same slot must be considered for coalescing with its new successor again. The issue is apparently pretty benign to native code, but surfaces as a boot time crash in our forward ported Xen tree (where the page table setup overall works differently than in native). Signed-off-by: Jan Beulich Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7c8bb46e83e..b8e461d4941 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -746,7 +746,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, old_start = mr[i].start; memmove(&mr[i], &mr[i+1], (nr_range - 1 - i) * sizeof (struct map_range)); - mr[i].start = old_start; + mr[i--].start = old_start; nr_range--; } -- cgit v1.2.3 From 606ee44dbb72fd48beb47f171d7b9cecf6ade6dd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 17 Sep 2008 16:48:17 +0100 Subject: x86: make mm/gup.c more virtualization friendly Since pte_flags() is much cheaper than pte_val() in some virtualized environments (namely, Xen), use the former whereever possible. Signed-off-by: Jan Beulich Cc: "Nick Piggin" Signed-off-by: Ingo Molnar --- arch/x86/mm/gup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 007bb06c750..4ba373c5b8c 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -82,7 +82,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_t pte = gup_get_pte(ptep); struct page *page; - if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { + if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { pte_unmap(ptep); return 0; } @@ -116,10 +116,10 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_val(pte) & mask) != mask) + if ((pte_flags(pte) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); + VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); VM_BUG_ON(!pfn_valid(pte_pfn(pte))); refs = 0; @@ -173,10 +173,10 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_val(pte) & mask) != mask) + if ((pte_flags(pte) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); + VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); VM_BUG_ON(!pfn_valid(pte_pfn(pte))); refs = 0; -- cgit v1.2.3 From 2e42060c19cb79adacc48beb5e9ec5361df976a2 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 23 Sep 2008 15:37:13 -0500 Subject: x86, uv: add early detection of UV system types Portions of the ACPI code needs to know if a system is a UV system prior to genapic initialization. This patch adds a call early_acpi_boot_init() so that the apic type is discovered earlier. V2 of the patch adding fixes from Yinghai Lu. Much cleaner and smaller. Signed-off-by: Jack Steiner Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1b4763e26ea..51c0a2fc14f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -138,7 +138,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) return; } - if (is_uv_system()) + if (get_uv_system_type() >= UV_X2APIC) apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; else apic_id = pa->apic_id; -- cgit v1.2.3 From 69c89b5bf7f253756f3056e84b8603abe1c50f5b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:07 +0200 Subject: traps: x86: remove trace_hardirqs_fixup from pagefault handler The last use of trace_hardirqs_fixup is unnecessary, because the trap is taken with interrupt off on i386 as well as x86_64, and the irq-tracer is notified of this from the assembly code. trace_hardirqs_fixup and trace_hardirqs_fixup_flags are removed from include/asm-x86/irqflags.h as they are no longer used. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a742d753d5b..3f2b8962cbd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -592,11 +592,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long flags; #endif - /* - * We can fault from pretty much anywhere, with unknown IRQ state. - */ - trace_hardirqs_fixup(); - tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); -- cgit v1.2.3 From af5c2bd16ac2e5688c3bf46ea1f95112d696d294 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 3 Oct 2008 17:54:25 +0200 Subject: x86: fix virt_addr_valid() with CONFIG_DEBUG_VIRTUAL=y, v2 virt_addr_valid() calls __pa(), which calls __phys_addr(). With CONFIG_DEBUG_VIRTUAL=y, __phys_addr() will kill the kernel if the address *isn't* valid. That's clearly wrong for virt_addr_valid(). We also incorporate the debugging checks into virt_addr_valid(). Signed-off-by: Vegard Nossum Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 7fb737c6b54..8876220d906 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -45,6 +45,27 @@ unsigned long __phys_addr(unsigned long x) } EXPORT_SYMBOL(__phys_addr); +bool __virt_addr_valid(unsigned long x) +{ + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + if (x >= KERNEL_IMAGE_SIZE) + return false; + x += phys_base; + } else { + if (x < PAGE_OFFSET) + return false; + x -= PAGE_OFFSET; + if (system_state == SYSTEM_BOOTING ? + x > MAXMEM : !phys_addr_valid(x)) { + return false; + } + } + + return pfn_valid(x >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + #else static inline int phys_addr_valid(unsigned long addr) @@ -56,13 +77,24 @@ static inline int phys_addr_valid(unsigned long addr) unsigned long __phys_addr(unsigned long x) { /* VMALLOC_* aren't constants; not available at the boot time */ - VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING && - is_vmalloc_addr((void *)x))); + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && + is_vmalloc_addr((void *) x)); return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); #endif +bool __virt_addr_valid(unsigned long x) +{ + if (x < PAGE_OFFSET) + return false; + if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) + return false; + return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + #endif int page_is_ram(unsigned long pagenr) -- cgit v1.2.3 From c1a2f4b10852ce68e70f7e4c53600c36cc63ea45 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:12 -0700 Subject: x86: change early_ioremap to use slots instead of nesting so we could remove the requirement that one needs to call early_iounmap() in exactly reverse order of early_ioremap(). Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 77 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 20 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8876220d906..e4c43ec71b2 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -616,16 +616,22 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) __early_set_fixmap(idx, 0, __pgprot(0)); } - -static int __initdata early_ioremap_nested; - +static void *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; static int __init check_early_ioremap_leak(void) { - if (!early_ioremap_nested) + int count = 0; + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (prev_map[i]) + count++; + + if (!count) return 0; WARN(1, KERN_WARNING "Debug warning: early ioremap leak of %d areas detected.\n", - early_ioremap_nested); + count); printk(KERN_WARNING "please boot with early_ioremap_debug and report the dmesg.\n"); @@ -636,15 +642,30 @@ late_initcall(check_early_ioremap_leak); static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; - unsigned int nrpages, nesting; + unsigned int nrpages; enum fixed_addresses idx0, idx; + int i, slot; WARN_ON(system_state != SYSTEM_BOOTING); - nesting = early_ioremap_nested; + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (!prev_map[i]) { + slot = i; + break; + } + } + + if (slot < 0) { + printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", + phys_addr, size); + WARN_ON(1); + return NULL; + } + if (early_ioremap_debug) { printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", - phys_addr, size, nesting); + phys_addr, size, slot); dump_stack(); } @@ -655,11 +676,7 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, return NULL; } - if (nesting >= FIX_BTMAPS_NESTING) { - WARN_ON(1); - return NULL; - } - early_ioremap_nested++; + prev_size[slot] = size; /* * Mappings have to be page-aligned */ @@ -679,7 +696,7 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, /* * Ok, go for it.. */ - idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; + idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; idx = idx0; while (nrpages > 0) { early_set_fixmap(idx, phys_addr, prot); @@ -690,7 +707,8 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, if (early_ioremap_debug) printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); - return (void *) (offset + fix_to_virt(idx0)); + prev_map[slot] = (void *) (offset + fix_to_virt(idx0)); + return prev_map[slot]; } /* Remap an IO device */ @@ -711,15 +729,33 @@ void __init early_iounmap(void *addr, unsigned long size) unsigned long offset; unsigned int nrpages; enum fixed_addresses idx; - int nesting; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } - nesting = --early_ioremap_nested; - if (WARN_ON(nesting < 0)) + if (slot < 0) { + printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", + addr, size); + WARN_ON(1); + return; + } + + if (prev_size[slot] != size) { + printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot]); + WARN_ON(1); return; + } if (early_ioremap_debug) { printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, - size, nesting); + size, slot); dump_stack(); } @@ -731,12 +767,13 @@ void __init early_iounmap(void *addr, unsigned long size) offset = virt_addr & ~PAGE_MASK; nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { early_clear_fixmap(idx); --idx; --nrpages; } + prev_map[slot] = 0; } void __this_fixmap_does_not_exist(void) -- cgit v1.2.3